syn/
buffer.rs

1//! A stably addressed token buffer supporting efficient traversal based on a
2//! cheaply copyable cursor.
3
4// This module is heavily commented as it contains most of the unsafe code in
5// Syn, and caution should be used when editing it. The public-facing interface
6// is 100% safe but the implementation is fragile internally.
7
8use crate::ext::TokenStreamExt as _;
9use crate::Lifetime;
10use alloc::boxed::Box;
11use alloc::vec::Vec;
12use core::cmp::Ordering;
13use core::marker::PhantomData;
14use core::ptr;
15use proc_macro2::extra::DelimSpan;
16use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
17
18/// Internal type which is used instead of `TokenTree` to represent a token tree
19/// within a `TokenBuffer`.
20enum Entry {
21    // Mimicking types from proc-macro.
22    // Group entries contain the offset to the matching End entry.
23    Group(Group, usize),
24    Ident(Ident),
25    Punct(Punct),
26    Literal(Literal),
27    // End entries contain the offset (negative) to the start of the buffer, and
28    // offset (negative) to the matching Group entry.
29    End(isize, isize),
30}
31
32/// A buffer that can be efficiently traversed multiple times, unlike
33/// `TokenStream` which requires a deep copy in order to traverse more than
34/// once.
35pub struct TokenBuffer {
36    // NOTE: Do not implement clone on this - while the current design could be
37    // cloned, other designs which could be desirable may not be cloneable.
38    entries: Box<[Entry]>,
39}
40
41impl TokenBuffer {
42    fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) {
43        for tt in stream {
44            match tt {
45                TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
46                TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
47                TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
48                TokenTree::Group(group) => {
49                    let group_start_index = entries.len();
50                    entries.push(Entry::End(0, 0)); // we replace this below
51                    Self::recursive_new(entries, group.stream());
52                    let group_end_index = entries.len();
53                    let group_offset = group_end_index - group_start_index;
54                    entries.push(Entry::End(
55                        -(group_end_index as isize),
56                        -(group_offset as isize),
57                    ));
58                    entries[group_start_index] = Entry::Group(group, group_offset);
59                }
60            }
61        }
62    }
63
64    /// Creates a `TokenBuffer` containing all the tokens from the input
65    /// `proc_macro::TokenStream`.
66    #[cfg(feature = "proc-macro")]
67    #[cfg_attr(docsrs, doc(cfg(feature = "proc-macro")))]
68    pub fn new(stream: proc_macro::TokenStream) -> Self {
69        Self::new2(stream.into())
70    }
71
72    /// Creates a `TokenBuffer` containing all the tokens from the input
73    /// `proc_macro2::TokenStream`.
74    pub fn new2(stream: TokenStream) -> Self {
75        let mut entries = Vec::new();
76        Self::recursive_new(&mut entries, stream);
77        entries.push(Entry::End(-(entries.len() as isize), 0));
78        Self {
79            entries: entries.into_boxed_slice(),
80        }
81    }
82
83    /// Creates a cursor referencing the first token in the buffer and able to
84    /// traverse until the end of the buffer.
85    pub fn begin(&self) -> Cursor {
86        let ptr = self.entries.as_ptr();
87        unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
88    }
89}
90
91/// A cheaply copyable cursor into a `TokenBuffer`.
92///
93/// This cursor holds a shared reference into the immutable data which is used
94/// internally to represent a `TokenStream`, and can be efficiently manipulated
95/// and copied around.
96///
97/// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
98/// object and get a cursor to its first token with `begin()`.
99pub struct Cursor<'a> {
100    // The current entry which the `Cursor` is pointing at.
101    ptr: *const Entry,
102    // This is the only `Entry::End` object which this cursor is allowed to
103    // point at. All other `End` objects are skipped over in `Cursor::create`.
104    scope: *const Entry,
105    // Cursor is covariant in 'a. This field ensures that our pointers are still
106    // valid.
107    marker: PhantomData<&'a Entry>,
108}
109
110impl<'a> Cursor<'a> {
111    /// Creates a cursor referencing a static empty TokenStream.
112    pub fn empty() -> Self {
113        // It's safe in this situation for us to put an `Entry` object in global
114        // storage, despite it not actually being safe to send across threads
115        // (`Ident` is a reference into a thread-local table). This is because
116        // this entry never includes a `Ident` object.
117        //
118        // This wrapper struct allows us to break the rules and put a `Sync`
119        // object in global storage.
120        struct UnsafeSyncEntry(Entry);
121        unsafe impl Sync for UnsafeSyncEntry {}
122        static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0, 0));
123
124        Cursor {
125            ptr: &EMPTY_ENTRY.0,
126            scope: &EMPTY_ENTRY.0,
127            marker: PhantomData,
128        }
129    }
130
131    /// This create method intelligently exits non-explicitly-entered
132    /// `None`-delimited scopes when the cursor reaches the end of them,
133    /// allowing for them to be treated transparently.
134    unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
135        // NOTE: If we're looking at a `End`, we want to advance the cursor
136        // past it, unless `ptr == scope`, which means that we're at the edge of
137        // our cursor's scope. We should only have `ptr != scope` at the exit
138        // from None-delimited groups entered with `ignore_none`.
139        while let Entry::End(..) = unsafe { &*ptr } {
140            if ptr::eq(ptr, scope) {
141                break;
142            }
143            ptr = unsafe { ptr.add(1) };
144        }
145
146        Cursor {
147            ptr,
148            scope,
149            marker: PhantomData,
150        }
151    }
152
153    /// Get the current entry.
154    fn entry(self) -> &'a Entry {
155        unsafe { &*self.ptr }
156    }
157
158    /// Bump the cursor to point at the next token after the current one. This
159    /// is undefined behavior if the cursor is currently looking at an
160    /// `Entry::End`.
161    ///
162    /// If the cursor is looking at an `Entry::Group`, the bumped cursor will
163    /// point at the first token in the group (with the same scope end).
164    unsafe fn bump_ignore_group(self) -> Cursor<'a> {
165        unsafe { Cursor::create(self.ptr.add(1), self.scope) }
166    }
167
168    /// While the cursor is looking at a `None`-delimited group, move it to look
169    /// at the first token inside instead. If the group is empty, this will move
170    /// the cursor past the `None`-delimited group.
171    ///
172    /// WARNING: This mutates its argument.
173    fn ignore_none(&mut self) {
174        while let Entry::Group(group, _) = self.entry() {
175            if group.delimiter() == Delimiter::None {
176                unsafe { *self = self.bump_ignore_group() };
177            } else {
178                break;
179            }
180        }
181    }
182
183    /// Checks whether the cursor is currently pointing at the end of its valid
184    /// scope.
185    pub fn eof(self) -> bool {
186        // We're at eof if we're at the end of our scope.
187        ptr::eq(self.ptr, self.scope)
188    }
189
190    /// If the cursor is pointing at a `Ident`, returns it along with a cursor
191    /// pointing at the next `TokenTree`.
192    pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
193        self.ignore_none();
194        match self.entry() {
195            Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
196            _ => None,
197        }
198    }
199
200    /// If the cursor is pointing at a `Punct`, returns it along with a cursor
201    /// pointing at the next `TokenTree`.
202    pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> {
203        self.ignore_none();
204        match self.entry() {
205            Entry::Punct(punct) if punct.as_char() != '\'' => {
206                Some((punct.clone(), unsafe { self.bump_ignore_group() }))
207            }
208            _ => None,
209        }
210    }
211
212    /// If the cursor is pointing at a `Literal`, return it along with a cursor
213    /// pointing at the next `TokenTree`.
214    pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
215        self.ignore_none();
216        match self.entry() {
217            Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
218            _ => None,
219        }
220    }
221
222    /// If the cursor is pointing at a `Lifetime`, returns it along with a
223    /// cursor pointing at the next `TokenTree`.
224    pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> {
225        self.ignore_none();
226        match self.entry() {
227            Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
228                let next = unsafe { self.bump_ignore_group() };
229                let (ident, rest) = next.ident()?;
230                let lifetime = Lifetime {
231                    apostrophe: punct.span(),
232                    ident,
233                };
234                Some((lifetime, rest))
235            }
236            _ => None,
237        }
238    }
239
240    /// If the cursor is pointing at a `Group` with the given delimiter, returns
241    /// a cursor into that group and one pointing to the next `TokenTree`.
242    pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)> {
243        // If we're not trying to enter a none-delimited group, we want to
244        // ignore them. We have to make sure to _not_ ignore them when we want
245        // to enter them, of course. For obvious reasons.
246        if delim != Delimiter::None {
247            self.ignore_none();
248        }
249
250        if let Entry::Group(group, end_offset) = self.entry() {
251            if group.delimiter() == delim {
252                let span = group.delim_span();
253                let end_of_group = unsafe { self.ptr.add(*end_offset) };
254                let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
255                let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
256                return Some((inside_of_group, span, after_group));
257            }
258        }
259
260        None
261    }
262
263    /// If the cursor is pointing at a `Group`, returns a cursor into the group
264    /// and one pointing to the next `TokenTree`.
265    pub fn any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)> {
266        if let Entry::Group(group, end_offset) = self.entry() {
267            let delimiter = group.delimiter();
268            let span = group.delim_span();
269            let end_of_group = unsafe { self.ptr.add(*end_offset) };
270            let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
271            let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
272            return Some((inside_of_group, delimiter, span, after_group));
273        }
274
275        None
276    }
277
278    pub(crate) fn any_group_token(self) -> Option<(Group, Cursor<'a>)> {
279        if let Entry::Group(group, end_offset) = self.entry() {
280            let end_of_group = unsafe { self.ptr.add(*end_offset) };
281            let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
282            return Some((group.clone(), after_group));
283        }
284
285        None
286    }
287
288    /// Copies all remaining tokens visible from this cursor into a
289    /// `TokenStream`.
290    pub fn token_stream(self) -> TokenStream {
291        let mut tokens = TokenStream::new();
292        let mut cursor = self;
293        while let Some((tt, rest)) = cursor.token_tree() {
294            tokens.append(tt);
295            cursor = rest;
296        }
297        tokens
298    }
299
300    /// If the cursor is pointing at a `TokenTree`, returns it along with a
301    /// cursor pointing at the next `TokenTree`.
302    ///
303    /// Returns `None` if the cursor has reached the end of its stream.
304    ///
305    /// This method does not treat `None`-delimited groups as transparent, and
306    /// will return a `Group(None, ..)` if the cursor is looking at one.
307    pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
308        let (tree, len) = match self.entry() {
309            Entry::Group(group, end_offset) => (group.clone().into(), *end_offset),
310            Entry::Literal(literal) => (literal.clone().into(), 1),
311            Entry::Ident(ident) => (ident.clone().into(), 1),
312            Entry::Punct(punct) => (punct.clone().into(), 1),
313            Entry::End(..) => return None,
314        };
315
316        let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) };
317        Some((tree, rest))
318    }
319
320    /// Returns the `Span` of the current token, or `Span::call_site()` if this
321    /// cursor points to eof.
322    pub fn span(mut self) -> Span {
323        match self.entry() {
324            Entry::Group(group, _) => group.span(),
325            Entry::Literal(literal) => literal.span(),
326            Entry::Ident(ident) => ident.span(),
327            Entry::Punct(punct) => punct.span(),
328            Entry::End(_, offset) => {
329                self.ptr = unsafe { self.ptr.offset(*offset) };
330                if let Entry::Group(group, _) = self.entry() {
331                    group.span_close()
332                } else {
333                    Span::call_site()
334                }
335            }
336        }
337    }
338
339    /// Returns the `Span` of the token immediately prior to the position of
340    /// this cursor, or of the current token if there is no previous one.
341    #[cfg(any(feature = "full", feature = "derive"))]
342    pub(crate) fn prev_span(mut self) -> Span {
343        if start_of_buffer(self) < self.ptr {
344            self.ptr = unsafe { self.ptr.sub(1) };
345        }
346        self.span()
347    }
348
349    /// Skip over the next token that is not a None-delimited group, without
350    /// cloning it. Returns `None` if this cursor points to eof.
351    ///
352    /// This method treats `'lifetimes` as a single token.
353    pub(crate) fn skip(mut self) -> Option<Cursor<'a>> {
354        self.ignore_none();
355
356        let len = match self.entry() {
357            Entry::End(..) => return None,
358
359            // Treat lifetimes as a single tt for the purposes of 'skip'.
360            Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
361                match unsafe { &*self.ptr.add(1) } {
362                    Entry::Ident(_) => 2,
363                    _ => 1,
364                }
365            }
366
367            Entry::Group(_, end_offset) => *end_offset,
368            _ => 1,
369        };
370
371        Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) })
372    }
373
374    pub(crate) fn scope_delimiter(self) -> Delimiter {
375        match unsafe { &*self.scope } {
376            Entry::End(_, offset) => match unsafe { &*self.scope.offset(*offset) } {
377                Entry::Group(group, _) => group.delimiter(),
378                _ => Delimiter::None,
379            },
380            _ => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
381        }
382    }
383}
384
385impl<'a> Copy for Cursor<'a> {}
386
387impl<'a> Clone for Cursor<'a> {
388    fn clone(&self) -> Self {
389        *self
390    }
391}
392
393impl<'a> Eq for Cursor<'a> {}
394
395impl<'a> PartialEq for Cursor<'a> {
396    fn eq(&self, other: &Self) -> bool {
397        ptr::eq(self.ptr, other.ptr)
398    }
399}
400
401impl<'a> PartialOrd for Cursor<'a> {
402    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
403        if same_buffer(*self, *other) {
404            Some(cmp_assuming_same_buffer(*self, *other))
405        } else {
406            None
407        }
408    }
409}
410
411pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool {
412    ptr::eq(a.scope, b.scope)
413}
414
415pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool {
416    ptr::eq(start_of_buffer(a), start_of_buffer(b))
417}
418
419fn start_of_buffer(cursor: Cursor) -> *const Entry {
420    unsafe {
421        match &*cursor.scope {
422            Entry::End(offset, _) => cursor.scope.offset(*offset),
423            _ => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
424        }
425    }
426}
427
428pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering {
429    a.ptr.cmp(&b.ptr)
430}
431
432pub(crate) fn open_span_of_group(cursor: Cursor) -> Span {
433    match cursor.entry() {
434        Entry::Group(group, _) => group.span_open(),
435        _ => cursor.span(),
436    }
437}