time/format_description/parse/
ast.rs

1//! AST for parsing format descriptions.
2
3use alloc::boxed::Box;
4use alloc::string::String;
5use alloc::vec::Vec;
6use core::iter;
7
8use super::{lexer, unused, Error, Location, Spanned, SpannedValue, Unused};
9use crate::internal_macros::bug;
10
11/// One part of a complete format description.
12pub(super) enum Item<'a> {
13    /// A literal string, formatted and parsed as-is.
14    ///
15    /// This should never be present inside a nested format description.
16    Literal(Spanned<&'a [u8]>),
17    /// A sequence of brackets. The first acts as the escape character.
18    ///
19    /// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`.
20    EscapedBracket {
21        /// The first bracket.
22        _first: Unused<Location>,
23        /// The second bracket.
24        _second: Unused<Location>,
25    },
26    /// Part of a type, along with its modifiers.
27    Component {
28        /// Where the opening bracket was in the format string.
29        _opening_bracket: Unused<Location>,
30        /// Whitespace between the opening bracket and name.
31        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
32        /// The name of the component.
33        name: Spanned<&'a [u8]>,
34        /// The modifiers for the component.
35        modifiers: Box<[Modifier<'a>]>,
36        /// Whitespace between the modifiers and closing bracket.
37        _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
38        /// Where the closing bracket was in the format string.
39        _closing_bracket: Unused<Location>,
40    },
41    /// An optional sequence of items.
42    Optional {
43        /// Where the opening bracket was in the format string.
44        opening_bracket: Location,
45        /// Whitespace between the opening bracket and "optional".
46        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
47        /// The "optional" keyword.
48        _optional_kw: Unused<Spanned<&'a [u8]>>,
49        /// Whitespace between the "optional" keyword and the opening bracket.
50        _whitespace: Unused<Spanned<&'a [u8]>>,
51        /// The items within the optional sequence.
52        nested_format_description: NestedFormatDescription<'a>,
53        /// Where the closing bracket was in the format string.
54        closing_bracket: Location,
55    },
56    /// The first matching parse of a sequence of items.
57    First {
58        /// Where the opening bracket was in the format string.
59        opening_bracket: Location,
60        /// Whitespace between the opening bracket and "first".
61        _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
62        /// The "first" keyword.
63        _first_kw: Unused<Spanned<&'a [u8]>>,
64        /// Whitespace between the "first" keyword and the opening bracket.
65        _whitespace: Unused<Spanned<&'a [u8]>>,
66        /// The sequences of items to try.
67        nested_format_descriptions: Box<[NestedFormatDescription<'a>]>,
68        /// Where the closing bracket was in the format string.
69        closing_bracket: Location,
70    },
71}
72
73/// A format description that is nested within another format description.
74pub(super) struct NestedFormatDescription<'a> {
75    /// Where the opening bracket was in the format string.
76    pub(super) _opening_bracket: Unused<Location>,
77    /// The items within the nested format description.
78    pub(super) items: Box<[Item<'a>]>,
79    /// Where the closing bracket was in the format string.
80    pub(super) _closing_bracket: Unused<Location>,
81    /// Whitespace between the closing bracket and the next item.
82    pub(super) _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
83}
84
85/// A modifier for a component.
86pub(super) struct Modifier<'a> {
87    /// Whitespace preceding the modifier.
88    pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>,
89    /// The key of the modifier.
90    pub(super) key: Spanned<&'a [u8]>,
91    /// Where the colon of the modifier was in the format string.
92    pub(super) _colon: Unused<Location>,
93    /// The value of the modifier.
94    pub(super) value: Spanned<&'a [u8]>,
95}
96
97/// Parse the provided tokens into an AST.
98pub(super) fn parse<
99    'item: 'iter,
100    'iter,
101    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
102    const VERSION: usize,
103>(
104    tokens: &'iter mut lexer::Lexed<I>,
105) -> impl Iterator<Item = Result<Item<'item>, Error>> + 'iter {
106    validate_version!(VERSION);
107    parse_inner::<_, false, VERSION>(tokens)
108}
109
110/// Parse the provided tokens into an AST. The const generic indicates whether the resulting
111/// [`Item`] will be used directly or as part of a [`NestedFormatDescription`].
112fn parse_inner<
113    'item,
114    I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
115    const NESTED: bool,
116    const VERSION: usize,
117>(
118    tokens: &mut lexer::Lexed<I>,
119) -> impl Iterator<Item = Result<Item<'item>, Error>> + '_ {
120    validate_version!(VERSION);
121    iter::from_fn(move || {
122        if NESTED && tokens.peek_closing_bracket().is_some() {
123            return None;
124        }
125
126        let next = match tokens.next()? {
127            Ok(token) => token,
128            Err(err) => return Some(Err(err)),
129        };
130
131        Some(match next {
132            lexer::Token::Literal(Spanned { value: _, span: _ }) if NESTED => {
133                bug!("literal should not be present in nested description")
134            }
135            lexer::Token::Literal(value) => Ok(Item::Literal(value)),
136            lexer::Token::Bracket {
137                kind: lexer::BracketKind::Opening,
138                location,
139            } => {
140                if version!(..=1) {
141                    if let Some(second_location) = tokens.next_if_opening_bracket() {
142                        Ok(Item::EscapedBracket {
143                            _first: unused(location),
144                            _second: unused(second_location),
145                        })
146                    } else {
147                        parse_component::<_, VERSION>(location, tokens)
148                    }
149                } else {
150                    parse_component::<_, VERSION>(location, tokens)
151                }
152            }
153            lexer::Token::Bracket {
154                kind: lexer::BracketKind::Closing,
155                location: _,
156            } if NESTED => {
157                bug!("closing bracket should be caught by the `if` statement")
158            }
159            lexer::Token::Bracket {
160                kind: lexer::BracketKind::Closing,
161                location: _,
162            } => {
163                bug!("closing bracket should have been consumed by `parse_component`")
164            }
165            lexer::Token::ComponentPart {
166                kind: _, // whitespace is significant in nested components
167                value,
168            } if NESTED => Ok(Item::Literal(value)),
169            lexer::Token::ComponentPart { kind: _, value: _ } => {
170                bug!("component part should have been consumed by `parse_component`")
171            }
172        })
173    })
174}
175
176/// Parse a component. This assumes that the opening bracket has already been consumed.
177fn parse_component<
178    'a,
179    I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
180    const VERSION: usize,
181>(
182    opening_bracket: Location,
183    tokens: &mut lexer::Lexed<I>,
184) -> Result<Item<'a>, Error> {
185    validate_version!(VERSION);
186    let leading_whitespace = tokens.next_if_whitespace();
187
188    let Some(name) = tokens.next_if_not_whitespace() else {
189        let span = match leading_whitespace {
190            Some(Spanned { value: _, span }) => span,
191            None => opening_bracket.to_self(),
192        };
193        return Err(Error {
194            _inner: unused(span.error("expected component name")),
195            public: crate::error::InvalidFormatDescription::MissingComponentName {
196                index: span.start.byte as _,
197            },
198        });
199    };
200
201    if *name == b"optional" {
202        let Some(whitespace) = tokens.next_if_whitespace() else {
203            return Err(Error {
204                _inner: unused(name.span.error("expected whitespace after `optional`")),
205                public: crate::error::InvalidFormatDescription::Expected {
206                    what: "whitespace after `optional`",
207                    index: name.span.end.byte as _,
208                },
209            });
210        };
211
212        let nested = parse_nested::<_, VERSION>(whitespace.span.end, tokens)?;
213
214        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
215            return Err(Error {
216                _inner: unused(opening_bracket.error("unclosed bracket")),
217                public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
218                    index: opening_bracket.byte as _,
219                },
220            });
221        };
222
223        return Ok(Item::Optional {
224            opening_bracket,
225            _leading_whitespace: unused(leading_whitespace),
226            _optional_kw: unused(name),
227            _whitespace: unused(whitespace),
228            nested_format_description: nested,
229            closing_bracket,
230        });
231    }
232
233    if *name == b"first" {
234        let Some(whitespace) = tokens.next_if_whitespace() else {
235            return Err(Error {
236                _inner: unused(name.span.error("expected whitespace after `first`")),
237                public: crate::error::InvalidFormatDescription::Expected {
238                    what: "whitespace after `first`",
239                    index: name.span.end.byte as _,
240                },
241            });
242        };
243
244        let mut nested_format_descriptions = Vec::new();
245        while let Ok(description) = parse_nested::<_, VERSION>(whitespace.span.end, tokens) {
246            nested_format_descriptions.push(description);
247        }
248
249        let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
250            return Err(Error {
251                _inner: unused(opening_bracket.error("unclosed bracket")),
252                public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
253                    index: opening_bracket.byte as _,
254                },
255            });
256        };
257
258        return Ok(Item::First {
259            opening_bracket,
260            _leading_whitespace: unused(leading_whitespace),
261            _first_kw: unused(name),
262            _whitespace: unused(whitespace),
263            nested_format_descriptions: nested_format_descriptions.into_boxed_slice(),
264            closing_bracket,
265        });
266    }
267
268    let mut modifiers = Vec::new();
269    let trailing_whitespace = loop {
270        let Some(whitespace) = tokens.next_if_whitespace() else {
271            break None;
272        };
273
274        // This is not necessary for proper parsing, but provides a much better error when a nested
275        // description is used where it's not allowed.
276        if let Some(location) = tokens.next_if_opening_bracket() {
277            return Err(Error {
278                _inner: unused(
279                    location
280                        .to_self()
281                        .error("modifier must be of the form `key:value`"),
282                ),
283                public: crate::error::InvalidFormatDescription::InvalidModifier {
284                    value: String::from("["),
285                    index: location.byte as _,
286                },
287            });
288        }
289
290        let Some(Spanned { value, span }) = tokens.next_if_not_whitespace() else {
291            break Some(whitespace);
292        };
293
294        let Some(colon_index) = value.iter().position(|&b| b == b':') else {
295            return Err(Error {
296                _inner: unused(span.error("modifier must be of the form `key:value`")),
297                public: crate::error::InvalidFormatDescription::InvalidModifier {
298                    value: String::from_utf8_lossy(value).into_owned(),
299                    index: span.start.byte as _,
300                },
301            });
302        };
303        let key = &value[..colon_index];
304        let value = &value[colon_index + 1..];
305
306        if key.is_empty() {
307            return Err(Error {
308                _inner: unused(span.shrink_to_start().error("expected modifier key")),
309                public: crate::error::InvalidFormatDescription::InvalidModifier {
310                    value: String::new(),
311                    index: span.start.byte as _,
312                },
313            });
314        }
315        if value.is_empty() {
316            return Err(Error {
317                _inner: unused(span.shrink_to_end().error("expected modifier value")),
318                public: crate::error::InvalidFormatDescription::InvalidModifier {
319                    value: String::new(),
320                    index: span.shrink_to_end().start.byte as _,
321                },
322            });
323        }
324
325        modifiers.push(Modifier {
326            _leading_whitespace: unused(whitespace),
327            key: key.spanned(span.shrink_to_before(colon_index as _)),
328            _colon: unused(span.start.offset(colon_index as _)),
329            value: value.spanned(span.shrink_to_after(colon_index as _)),
330        });
331    };
332
333    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
334        return Err(Error {
335            _inner: unused(opening_bracket.error("unclosed bracket")),
336            public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
337                index: opening_bracket.byte as _,
338            },
339        });
340    };
341
342    Ok(Item::Component {
343        _opening_bracket: unused(opening_bracket),
344        _leading_whitespace: unused(leading_whitespace),
345        name,
346        modifiers: modifiers.into_boxed_slice(),
347        _trailing_whitespace: unused(trailing_whitespace),
348        _closing_bracket: unused(closing_bracket),
349    })
350}
351
352/// Parse a nested format description. The location provided is the the most recent one consumed.
353fn parse_nested<'a, I: Iterator<Item = Result<lexer::Token<'a>, Error>>, const VERSION: usize>(
354    last_location: Location,
355    tokens: &mut lexer::Lexed<I>,
356) -> Result<NestedFormatDescription<'a>, Error> {
357    validate_version!(VERSION);
358    let Some(opening_bracket) = tokens.next_if_opening_bracket() else {
359        return Err(Error {
360            _inner: unused(last_location.error("expected opening bracket")),
361            public: crate::error::InvalidFormatDescription::Expected {
362                what: "opening bracket",
363                index: last_location.byte as _,
364            },
365        });
366    };
367    let items = parse_inner::<_, true, VERSION>(tokens).collect::<Result<_, _>>()?;
368    let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
369        return Err(Error {
370            _inner: unused(opening_bracket.error("unclosed bracket")),
371            public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
372                index: opening_bracket.byte as _,
373            },
374        });
375    };
376    let trailing_whitespace = tokens.next_if_whitespace();
377
378    Ok(NestedFormatDescription {
379        _opening_bracket: unused(opening_bracket),
380        items,
381        _closing_bracket: unused(closing_bracket),
382        _trailing_whitespace: unused(trailing_whitespace),
383    })
384}