syn/discouraged.rs
1//! Extensions to the parsing API with niche applicability.
2
3use crate::buffer::Cursor;
4use crate::error::Result;
5use crate::parse::{inner_unexpected, ParseBuffer, Unexpected};
6use proc_macro2::extra::DelimSpan;
7use proc_macro2::Delimiter;
8use std::cell::Cell;
9use std::mem;
10use std::rc::Rc;
11
12/// Extensions to the `ParseStream` API to support speculative parsing.
13pub trait Speculative {
14    /// Advance this parse stream to the position of a forked parse stream.
15    ///
16    /// This is the opposite operation to [`ParseStream::fork`]. You can fork a
17    /// parse stream, perform some speculative parsing, then join the original
18    /// stream to the fork to "commit" the parsing from the fork to the main
19    /// stream.
20    ///
21    /// If you can avoid doing this, you should, as it limits the ability to
22    /// generate useful errors. That said, it is often the only way to parse
23    /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
24    /// is that when the fork fails to parse an `A`, it's impossible to tell
25    /// whether that was because of a syntax error and the user meant to provide
26    /// an `A`, or that the `A`s are finished and it's time to start parsing
27    /// `B`s. Use with care.
28    ///
29    /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
30    /// parsing `B*` and removing the leading members of `A` from the
31    /// repetition, bypassing the need to involve the downsides associated with
32    /// speculative parsing.
33    ///
34    /// [`ParseStream::fork`]: ParseBuffer::fork
35    ///
36    /// # Example
37    ///
38    /// There has been chatter about the possibility of making the colons in the
39    /// turbofish syntax like `path::to::<T>` no longer required by accepting
40    /// `path::to<T>` in expression position. Specifically, according to [RFC
41    /// 2544], [`PathSegment`] parsing should always try to consume a following
42    /// `<` token as the start of generic arguments, and reset to the `<` if
43    /// that fails (e.g. the token is acting as a less-than operator).
44    ///
45    /// This is the exact kind of parsing behavior which requires the "fork,
46    /// try, commit" behavior that [`ParseStream::fork`] discourages. With
47    /// `advance_to`, we can avoid having to parse the speculatively parsed
48    /// content a second time.
49    ///
50    /// This change in behavior can be implemented in syn by replacing just the
51    /// `Parse` implementation for `PathSegment`:
52    ///
53    /// ```
54    /// # use syn::ext::IdentExt;
55    /// use syn::parse::discouraged::Speculative;
56    /// # use syn::parse::{Parse, ParseStream};
57    /// # use syn::{Ident, PathArguments, Result, Token};
58    ///
59    /// pub struct PathSegment {
60    ///     pub ident: Ident,
61    ///     pub arguments: PathArguments,
62    /// }
63    /// #
64    /// # impl<T> From<T> for PathSegment
65    /// # where
66    /// #     T: Into<Ident>,
67    /// # {
68    /// #     fn from(ident: T) -> Self {
69    /// #         PathSegment {
70    /// #             ident: ident.into(),
71    /// #             arguments: PathArguments::None,
72    /// #         }
73    /// #     }
74    /// # }
75    ///
76    /// impl Parse for PathSegment {
77    ///     fn parse(input: ParseStream) -> Result<Self> {
78    ///         if input.peek(Token![super])
79    ///             || input.peek(Token![self])
80    ///             || input.peek(Token![Self])
81    ///             || input.peek(Token![crate])
82    ///         {
83    ///             let ident = input.call(Ident::parse_any)?;
84    ///             return Ok(PathSegment::from(ident));
85    ///         }
86    ///
87    ///         let ident = input.parse()?;
88    ///         if input.peek(Token![::]) && input.peek3(Token![<]) {
89    ///             return Ok(PathSegment {
90    ///                 ident,
91    ///                 arguments: PathArguments::AngleBracketed(input.parse()?),
92    ///             });
93    ///         }
94    ///         if input.peek(Token![<]) && !input.peek(Token![<=]) {
95    ///             let fork = input.fork();
96    ///             if let Ok(arguments) = fork.parse() {
97    ///                 input.advance_to(&fork);
98    ///                 return Ok(PathSegment {
99    ///                     ident,
100    ///                     arguments: PathArguments::AngleBracketed(arguments),
101    ///                 });
102    ///             }
103    ///         }
104    ///         Ok(PathSegment::from(ident))
105    ///     }
106    /// }
107    ///
108    /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
109    /// ```
110    ///
111    /// # Drawbacks
112    ///
113    /// The main drawback of this style of speculative parsing is in error
114    /// presentation. Even if the lookahead is the "correct" parse, the error
115    /// that is shown is that of the "fallback" parse. To use the same example
116    /// as the turbofish above, take the following unfinished "turbofish":
117    ///
118    /// ```text
119    /// let _ = f<&'a fn(), for<'a> serde::>();
120    /// ```
121    ///
122    /// If this is parsed as generic arguments, we can provide the error message
123    ///
124    /// ```text
125    /// error: expected identifier
126    ///  --> src.rs:L:C
127    ///   |
128    /// L | let _ = f<&'a fn(), for<'a> serde::>();
129    ///   |                                    ^
130    /// ```
131    ///
132    /// but if parsed using the above speculative parsing, it falls back to
133    /// assuming that the `<` is a less-than when it fails to parse the generic
134    /// arguments, and tries to interpret the `&'a` as the start of a labelled
135    /// loop, resulting in the much less helpful error
136    ///
137    /// ```text
138    /// error: expected `:`
139    ///  --> src.rs:L:C
140    ///   |
141    /// L | let _ = f<&'a fn(), for<'a> serde::>();
142    ///   |               ^^
143    /// ```
144    ///
145    /// This can be mitigated with various heuristics (two examples: show both
146    /// forks' parse errors, or show the one that consumed more tokens), but
147    /// when you can control the grammar, sticking to something that can be
148    /// parsed LL(3) and without the LL(*) speculative parsing this makes
149    /// possible, displaying reasonable errors becomes much more simple.
150    ///
151    /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
152    /// [`PathSegment`]: crate::PathSegment
153    ///
154    /// # Performance
155    ///
156    /// This method performs a cheap fixed amount of work that does not depend
157    /// on how far apart the two streams are positioned.
158    ///
159    /// # Panics
160    ///
161    /// The forked stream in the argument of `advance_to` must have been
162    /// obtained by forking `self`. Attempting to advance to any other stream
163    /// will cause a panic.
164    fn advance_to(&self, fork: &Self);
165}
166
167impl<'a> Speculative for ParseBuffer<'a> {
168    fn advance_to(&self, fork: &Self) {
169        if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
170            panic!("fork was not derived from the advancing parse stream");
171        }
172
173        let (self_unexp, self_sp) = inner_unexpected(self);
174        let (fork_unexp, fork_sp) = inner_unexpected(fork);
175        if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
176            match (fork_sp, self_sp) {
177                // Unexpected set on the fork, but not on `self`, copy it over.
178                (Some((span, delimiter)), None) => {
179                    self_unexp.set(Unexpected::Some(span, delimiter));
180                }
181                // Unexpected unset. Use chain to propagate errors from fork.
182                (None, None) => {
183                    fork_unexp.set(Unexpected::Chain(self_unexp));
184
185                    // Ensure toplevel 'unexpected' tokens from the fork don't
186                    // propagate up the chain by replacing the root `unexpected`
187                    // pointer, only 'unexpected' tokens from existing group
188                    // parsers should propagate.
189                    fork.unexpected
190                        .set(Some(Rc::new(Cell::new(Unexpected::None))));
191                }
192                // Unexpected has been set on `self`. No changes needed.
193                (_, Some(_)) => {}
194            }
195        }
196
197        // See comment on `cell` in the struct definition.
198        self.cell
199            .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });
200    }
201}
202
203/// Extensions to the `ParseStream` API to support manipulating invisible
204/// delimiters the same as if they were visible.
205pub trait AnyDelimiter {
206    /// Returns the delimiter, the span of the delimiter token, and the nested
207    /// contents for further parsing.
208    fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>;
209}
210
211impl<'a> AnyDelimiter for ParseBuffer<'a> {
212    fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> {
213        self.step(|cursor| {
214            if let Some((content, delimiter, span, rest)) = cursor.any_group() {
215                let scope = span.close();
216                let nested = crate::parse::advance_step_cursor(cursor, content);
217                let unexpected = crate::parse::get_unexpected(self);
218                let content = crate::parse::new_parse_buffer(scope, nested, unexpected);
219                Ok(((delimiter, span, content), rest))
220            } else {
221                Err(cursor.error("expected any delimiter"))
222            }
223        })
224    }
225}