syn/
buffer.rs

1//! A stably addressed token buffer supporting efficient traversal based on a
2//! cheaply copyable cursor.
3
4// This module is heavily commented as it contains most of the unsafe code in
5// Syn, and caution should be used when editing it. The public-facing interface
6// is 100% safe but the implementation is fragile internally.
7
8use crate::ext::TokenStreamExt as _;
9use crate::Lifetime;
10use proc_macro2::extra::DelimSpan;
11use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
12use std::cmp::Ordering;
13use std::marker::PhantomData;
14use std::ptr;
15
16/// Internal type which is used instead of `TokenTree` to represent a token tree
17/// within a `TokenBuffer`.
18enum Entry {
19    // Mimicking types from proc-macro.
20    // Group entries contain the offset to the matching End entry.
21    Group(Group, usize),
22    Ident(Ident),
23    Punct(Punct),
24    Literal(Literal),
25    // End entries contain the offset (negative) to the start of the buffer, and
26    // offset (negative) to the matching Group entry.
27    End(isize, isize),
28}
29
30/// A buffer that can be efficiently traversed multiple times, unlike
31/// `TokenStream` which requires a deep copy in order to traverse more than
32/// once.
33pub struct TokenBuffer {
34    // NOTE: Do not implement clone on this - while the current design could be
35    // cloned, other designs which could be desirable may not be cloneable.
36    entries: Box<[Entry]>,
37}
38
39impl TokenBuffer {
40    fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) {
41        for tt in stream {
42            match tt {
43                TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
44                TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
45                TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
46                TokenTree::Group(group) => {
47                    let group_start_index = entries.len();
48                    entries.push(Entry::End(0, 0)); // we replace this below
49                    Self::recursive_new(entries, group.stream());
50                    let group_end_index = entries.len();
51                    let group_offset = group_end_index - group_start_index;
52                    entries.push(Entry::End(
53                        -(group_end_index as isize),
54                        -(group_offset as isize),
55                    ));
56                    entries[group_start_index] = Entry::Group(group, group_offset);
57                }
58            }
59        }
60    }
61
62    /// Creates a `TokenBuffer` containing all the tokens from the input
63    /// `proc_macro::TokenStream`.
64    #[cfg(feature = "proc-macro")]
65    #[cfg_attr(docsrs, doc(cfg(feature = "proc-macro")))]
66    pub fn new(stream: proc_macro::TokenStream) -> Self {
67        Self::new2(stream.into())
68    }
69
70    /// Creates a `TokenBuffer` containing all the tokens from the input
71    /// `proc_macro2::TokenStream`.
72    pub fn new2(stream: TokenStream) -> Self {
73        let mut entries = Vec::new();
74        Self::recursive_new(&mut entries, stream);
75        entries.push(Entry::End(-(entries.len() as isize), 0));
76        Self {
77            entries: entries.into_boxed_slice(),
78        }
79    }
80
81    /// Creates a cursor referencing the first token in the buffer and able to
82    /// traverse until the end of the buffer.
83    pub fn begin(&self) -> Cursor {
84        let ptr = self.entries.as_ptr();
85        unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
86    }
87}
88
89/// A cheaply copyable cursor into a `TokenBuffer`.
90///
91/// This cursor holds a shared reference into the immutable data which is used
92/// internally to represent a `TokenStream`, and can be efficiently manipulated
93/// and copied around.
94///
95/// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
96/// object and get a cursor to its first token with `begin()`.
97pub struct Cursor<'a> {
98    // The current entry which the `Cursor` is pointing at.
99    ptr: *const Entry,
100    // This is the only `Entry::End` object which this cursor is allowed to
101    // point at. All other `End` objects are skipped over in `Cursor::create`.
102    scope: *const Entry,
103    // Cursor is covariant in 'a. This field ensures that our pointers are still
104    // valid.
105    marker: PhantomData<&'a Entry>,
106}
107
108impl<'a> Cursor<'a> {
109    /// Creates a cursor referencing a static empty TokenStream.
110    pub fn empty() -> Self {
111        // It's safe in this situation for us to put an `Entry` object in global
112        // storage, despite it not actually being safe to send across threads
113        // (`Ident` is a reference into a thread-local table). This is because
114        // this entry never includes a `Ident` object.
115        //
116        // This wrapper struct allows us to break the rules and put a `Sync`
117        // object in global storage.
118        struct UnsafeSyncEntry(Entry);
119        unsafe impl Sync for UnsafeSyncEntry {}
120        static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0, 0));
121
122        Cursor {
123            ptr: &EMPTY_ENTRY.0,
124            scope: &EMPTY_ENTRY.0,
125            marker: PhantomData,
126        }
127    }
128
129    /// This create method intelligently exits non-explicitly-entered
130    /// `None`-delimited scopes when the cursor reaches the end of them,
131    /// allowing for them to be treated transparently.
132    unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
133        // NOTE: If we're looking at a `End`, we want to advance the cursor
134        // past it, unless `ptr == scope`, which means that we're at the edge of
135        // our cursor's scope. We should only have `ptr != scope` at the exit
136        // from None-delimited groups entered with `ignore_none`.
137        while let Entry::End(..) = unsafe { &*ptr } {
138            if ptr::eq(ptr, scope) {
139                break;
140            }
141            ptr = unsafe { ptr.add(1) };
142        }
143
144        Cursor {
145            ptr,
146            scope,
147            marker: PhantomData,
148        }
149    }
150
151    /// Get the current entry.
152    fn entry(self) -> &'a Entry {
153        unsafe { &*self.ptr }
154    }
155
156    /// Bump the cursor to point at the next token after the current one. This
157    /// is undefined behavior if the cursor is currently looking at an
158    /// `Entry::End`.
159    ///
160    /// If the cursor is looking at an `Entry::Group`, the bumped cursor will
161    /// point at the first token in the group (with the same scope end).
162    unsafe fn bump_ignore_group(self) -> Cursor<'a> {
163        unsafe { Cursor::create(self.ptr.offset(1), self.scope) }
164    }
165
166    /// While the cursor is looking at a `None`-delimited group, move it to look
167    /// at the first token inside instead. If the group is empty, this will move
168    /// the cursor past the `None`-delimited group.
169    ///
170    /// WARNING: This mutates its argument.
171    fn ignore_none(&mut self) {
172        while let Entry::Group(group, _) = self.entry() {
173            if group.delimiter() == Delimiter::None {
174                unsafe { *self = self.bump_ignore_group() };
175            } else {
176                break;
177            }
178        }
179    }
180
181    /// Checks whether the cursor is currently pointing at the end of its valid
182    /// scope.
183    pub fn eof(self) -> bool {
184        // We're at eof if we're at the end of our scope.
185        ptr::eq(self.ptr, self.scope)
186    }
187
188    /// If the cursor is pointing at a `Ident`, returns it along with a cursor
189    /// pointing at the next `TokenTree`.
190    pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
191        self.ignore_none();
192        match self.entry() {
193            Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
194            _ => None,
195        }
196    }
197
198    /// If the cursor is pointing at a `Punct`, returns it along with a cursor
199    /// pointing at the next `TokenTree`.
200    pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> {
201        self.ignore_none();
202        match self.entry() {
203            Entry::Punct(punct) if punct.as_char() != '\'' => {
204                Some((punct.clone(), unsafe { self.bump_ignore_group() }))
205            }
206            _ => None,
207        }
208    }
209
210    /// If the cursor is pointing at a `Literal`, return it along with a cursor
211    /// pointing at the next `TokenTree`.
212    pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
213        self.ignore_none();
214        match self.entry() {
215            Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
216            _ => None,
217        }
218    }
219
220    /// If the cursor is pointing at a `Lifetime`, returns it along with a
221    /// cursor pointing at the next `TokenTree`.
222    pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> {
223        self.ignore_none();
224        match self.entry() {
225            Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
226                let next = unsafe { self.bump_ignore_group() };
227                let (ident, rest) = next.ident()?;
228                let lifetime = Lifetime {
229                    apostrophe: punct.span(),
230                    ident,
231                };
232                Some((lifetime, rest))
233            }
234            _ => None,
235        }
236    }
237
238    /// If the cursor is pointing at a `Group` with the given delimiter, returns
239    /// a cursor into that group and one pointing to the next `TokenTree`.
240    pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)> {
241        // If we're not trying to enter a none-delimited group, we want to
242        // ignore them. We have to make sure to _not_ ignore them when we want
243        // to enter them, of course. For obvious reasons.
244        if delim != Delimiter::None {
245            self.ignore_none();
246        }
247
248        if let Entry::Group(group, end_offset) = self.entry() {
249            if group.delimiter() == delim {
250                let span = group.delim_span();
251                let end_of_group = unsafe { self.ptr.add(*end_offset) };
252                let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
253                let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
254                return Some((inside_of_group, span, after_group));
255            }
256        }
257
258        None
259    }
260
261    /// If the cursor is pointing at a `Group`, returns a cursor into the group
262    /// and one pointing to the next `TokenTree`.
263    pub fn any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)> {
264        if let Entry::Group(group, end_offset) = self.entry() {
265            let delimiter = group.delimiter();
266            let span = group.delim_span();
267            let end_of_group = unsafe { self.ptr.add(*end_offset) };
268            let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
269            let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
270            return Some((inside_of_group, delimiter, span, after_group));
271        }
272
273        None
274    }
275
276    pub(crate) fn any_group_token(self) -> Option<(Group, Cursor<'a>)> {
277        if let Entry::Group(group, end_offset) = self.entry() {
278            let end_of_group = unsafe { self.ptr.add(*end_offset) };
279            let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
280            return Some((group.clone(), after_group));
281        }
282
283        None
284    }
285
286    /// Copies all remaining tokens visible from this cursor into a
287    /// `TokenStream`.
288    pub fn token_stream(self) -> TokenStream {
289        let mut tokens = TokenStream::new();
290        let mut cursor = self;
291        while let Some((tt, rest)) = cursor.token_tree() {
292            tokens.append(tt);
293            cursor = rest;
294        }
295        tokens
296    }
297
298    /// If the cursor is pointing at a `TokenTree`, returns it along with a
299    /// cursor pointing at the next `TokenTree`.
300    ///
301    /// Returns `None` if the cursor has reached the end of its stream.
302    ///
303    /// This method does not treat `None`-delimited groups as transparent, and
304    /// will return a `Group(None, ..)` if the cursor is looking at one.
305    pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
306        let (tree, len) = match self.entry() {
307            Entry::Group(group, end_offset) => (group.clone().into(), *end_offset),
308            Entry::Literal(literal) => (literal.clone().into(), 1),
309            Entry::Ident(ident) => (ident.clone().into(), 1),
310            Entry::Punct(punct) => (punct.clone().into(), 1),
311            Entry::End(..) => return None,
312        };
313
314        let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) };
315        Some((tree, rest))
316    }
317
318    /// Returns the `Span` of the current token, or `Span::call_site()` if this
319    /// cursor points to eof.
320    pub fn span(mut self) -> Span {
321        match self.entry() {
322            Entry::Group(group, _) => group.span(),
323            Entry::Literal(literal) => literal.span(),
324            Entry::Ident(ident) => ident.span(),
325            Entry::Punct(punct) => punct.span(),
326            Entry::End(_, offset) => {
327                self.ptr = unsafe { self.ptr.offset(*offset) };
328                if let Entry::Group(group, _) = self.entry() {
329                    group.span_close()
330                } else {
331                    Span::call_site()
332                }
333            }
334        }
335    }
336
337    /// Returns the `Span` of the token immediately prior to the position of
338    /// this cursor, or of the current token if there is no previous one.
339    #[cfg(any(feature = "full", feature = "derive"))]
340    pub(crate) fn prev_span(mut self) -> Span {
341        if start_of_buffer(self) < self.ptr {
342            self.ptr = unsafe { self.ptr.offset(-1) };
343        }
344        self.span()
345    }
346
347    /// Skip over the next token that is not a None-delimited group, without
348    /// cloning it. Returns `None` if this cursor points to eof.
349    ///
350    /// This method treats `'lifetimes` as a single token.
351    pub(crate) fn skip(mut self) -> Option<Cursor<'a>> {
352        self.ignore_none();
353
354        let len = match self.entry() {
355            Entry::End(..) => return None,
356
357            // Treat lifetimes as a single tt for the purposes of 'skip'.
358            Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
359                match unsafe { &*self.ptr.add(1) } {
360                    Entry::Ident(_) => 2,
361                    _ => 1,
362                }
363            }
364
365            Entry::Group(_, end_offset) => *end_offset,
366            _ => 1,
367        };
368
369        Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) })
370    }
371
372    pub(crate) fn scope_delimiter(self) -> Delimiter {
373        match unsafe { &*self.scope } {
374            Entry::End(_, offset) => match unsafe { &*self.scope.offset(*offset) } {
375                Entry::Group(group, _) => group.delimiter(),
376                _ => Delimiter::None,
377            },
378            _ => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
379        }
380    }
381}
382
383impl<'a> Copy for Cursor<'a> {}
384
385impl<'a> Clone for Cursor<'a> {
386    fn clone(&self) -> Self {
387        *self
388    }
389}
390
391impl<'a> Eq for Cursor<'a> {}
392
393impl<'a> PartialEq for Cursor<'a> {
394    fn eq(&self, other: &Self) -> bool {
395        ptr::eq(self.ptr, other.ptr)
396    }
397}
398
399impl<'a> PartialOrd for Cursor<'a> {
400    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
401        if same_buffer(*self, *other) {
402            Some(cmp_assuming_same_buffer(*self, *other))
403        } else {
404            None
405        }
406    }
407}
408
409pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool {
410    ptr::eq(a.scope, b.scope)
411}
412
413pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool {
414    ptr::eq(start_of_buffer(a), start_of_buffer(b))
415}
416
417fn start_of_buffer(cursor: Cursor) -> *const Entry {
418    unsafe {
419        match &*cursor.scope {
420            Entry::End(offset, _) => cursor.scope.offset(*offset),
421            _ => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
422        }
423    }
424}
425
426pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering {
427    a.ptr.cmp(&b.ptr)
428}
429
430pub(crate) fn open_span_of_group(cursor: Cursor) -> Span {
431    match cursor.entry() {
432        Entry::Group(group, _) => group.span_open(),
433        _ => cursor.span(),
434    }
435}