icu_locid/parser/
langid.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5pub use super::errors::ParserError;
6use crate::extensions::unicode::{Attribute, Key, Value};
7use crate::extensions::ExtensionType;
8use crate::parser::SubtagIterator;
9use crate::shortvec::ShortBoxSlice;
10use crate::LanguageIdentifier;
11use crate::{extensions, subtags};
12use tinystr::TinyAsciiStr;
13
14#[derive(PartialEq, Clone, Copy)]
15pub enum ParserMode {
16    LanguageIdentifier,
17    Locale,
18    Partial,
19}
20
21#[derive(PartialEq, Clone, Copy)]
22enum ParserPosition {
23    Script,
24    Region,
25    Variant,
26}
27
28pub fn parse_language_identifier_from_iter(
29    iter: &mut SubtagIterator,
30    mode: ParserMode,
31) -> Result<LanguageIdentifier, ParserError> {
32    let mut script = None;
33    let mut region = None;
34    let mut variants = ShortBoxSlice::new();
35
36    let language = if let Some(subtag) = iter.next() {
37        subtags::Language::try_from_bytes(subtag)?
38    } else {
39        return Err(ParserError::InvalidLanguage);
40    };
41
42    let mut position = ParserPosition::Script;
43
44    while let Some(subtag) = iter.peek() {
45        if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 {
46            break;
47        }
48
49        if position == ParserPosition::Script {
50            if let Ok(s) = subtags::Script::try_from_bytes(subtag) {
51                script = Some(s);
52                position = ParserPosition::Region;
53            } else if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
54                region = Some(s);
55                position = ParserPosition::Variant;
56            } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
57                if let Err(idx) = variants.binary_search(&v) {
58                    variants.insert(idx, v);
59                }
60                position = ParserPosition::Variant;
61            } else if mode == ParserMode::Partial {
62                break;
63            } else {
64                return Err(ParserError::InvalidSubtag);
65            }
66        } else if position == ParserPosition::Region {
67            if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
68                region = Some(s);
69                position = ParserPosition::Variant;
70            } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
71                if let Err(idx) = variants.binary_search(&v) {
72                    variants.insert(idx, v);
73                }
74                position = ParserPosition::Variant;
75            } else if mode == ParserMode::Partial {
76                break;
77            } else {
78                return Err(ParserError::InvalidSubtag);
79            }
80        } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
81            if let Err(idx) = variants.binary_search(&v) {
82                variants.insert(idx, v);
83            } else {
84                return Err(ParserError::InvalidSubtag);
85            }
86        } else if mode == ParserMode::Partial {
87            break;
88        } else {
89            return Err(ParserError::InvalidSubtag);
90        }
91        iter.next();
92    }
93
94    Ok(LanguageIdentifier {
95        language,
96        script,
97        region,
98        variants: subtags::Variants::from_short_slice_unchecked(variants),
99    })
100}
101
102pub fn parse_language_identifier(
103    t: &[u8],
104    mode: ParserMode,
105) -> Result<LanguageIdentifier, ParserError> {
106    let mut iter = SubtagIterator::new(t);
107    parse_language_identifier_from_iter(&mut iter, mode)
108}
109
110#[allow(clippy::type_complexity)]
111pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(
112    mut iter: SubtagIterator,
113    mode: ParserMode,
114) -> Result<
115    (
116        subtags::Language,
117        Option<subtags::Script>,
118        Option<subtags::Region>,
119        Option<subtags::Variant>,
120        Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
121    ),
122    ParserError,
123> {
124    let language;
125    let mut script = None;
126    let mut region = None;
127    let mut variant = None;
128    let mut keyword = None;
129
130    if let (i, Some((start, end))) = iter.next_manual() {
131        iter = i;
132        match subtags::Language::try_from_bytes_manual_slice(iter.slice, start, end) {
133            Ok(l) => language = l,
134            Err(e) => return Err(e),
135        }
136    } else {
137        return Err(ParserError::InvalidLanguage);
138    }
139
140    let mut position = ParserPosition::Script;
141
142    while let Some((start, end)) = iter.peek_manual() {
143        if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 {
144            break;
145        }
146
147        if matches!(position, ParserPosition::Script) {
148            if let Ok(s) = subtags::Script::try_from_bytes_manual_slice(iter.slice, start, end) {
149                script = Some(s);
150                position = ParserPosition::Region;
151            } else if let Ok(r) =
152                subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end)
153            {
154                region = Some(r);
155                position = ParserPosition::Variant;
156            } else if let Ok(v) =
157                subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end)
158            {
159                // We cannot handle multiple variants in a const context
160                debug_assert!(variant.is_none());
161                variant = Some(v);
162                position = ParserPosition::Variant;
163            } else if matches!(mode, ParserMode::Partial) {
164                break;
165            } else {
166                return Err(ParserError::InvalidSubtag);
167            }
168        } else if matches!(position, ParserPosition::Region) {
169            if let Ok(s) = subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end) {
170                region = Some(s);
171                position = ParserPosition::Variant;
172            } else if let Ok(v) =
173                subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end)
174            {
175                // We cannot handle multiple variants in a const context
176                debug_assert!(variant.is_none());
177                variant = Some(v);
178                position = ParserPosition::Variant;
179            } else if matches!(mode, ParserMode::Partial) {
180                break;
181            } else {
182                return Err(ParserError::InvalidSubtag);
183            }
184        } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end)
185        {
186            debug_assert!(matches!(position, ParserPosition::Variant));
187            if variant.is_some() {
188                // We cannot handle multiple variants in a const context
189                return Err(ParserError::InvalidSubtag);
190            }
191            variant = Some(v);
192        } else if matches!(mode, ParserMode::Partial) {
193            break;
194        } else {
195            return Err(ParserError::InvalidSubtag);
196        }
197
198        iter = iter.next_manual().0;
199    }
200
201    if matches!(mode, ParserMode::Locale) {
202        if let Some((start, end)) = iter.peek_manual() {
203            match ExtensionType::try_from_bytes_manual_slice(iter.slice, start, end) {
204                Ok(ExtensionType::Unicode) => {
205                    iter = iter.next_manual().0;
206                    if let Some((start, end)) = iter.peek_manual() {
207                        if Attribute::try_from_bytes_manual_slice(iter.slice, start, end).is_ok() {
208                            // We cannot handle Attributes in a const context
209                            return Err(ParserError::InvalidSubtag);
210                        }
211                    }
212
213                    let mut key = None;
214                    let mut current_type = None;
215
216                    while let Some((start, end)) = iter.peek_manual() {
217                        let slen = end - start;
218                        if slen == 2 {
219                            if key.is_some() {
220                                // We cannot handle more than one Key in a const context
221                                return Err(ParserError::InvalidSubtag);
222                            }
223                            match Key::try_from_bytes_manual_slice(iter.slice, start, end) {
224                                Ok(k) => key = Some(k),
225                                Err(e) => return Err(e),
226                            };
227                        } else if key.is_some() {
228                            match Value::parse_subtag_from_bytes_manual_slice(
229                                iter.slice, start, end,
230                            ) {
231                                Ok(Some(t)) => {
232                                    if current_type.is_some() {
233                                        // We cannot handle more than one type in a const context
234                                        return Err(ParserError::InvalidSubtag);
235                                    }
236                                    current_type = Some(t);
237                                }
238                                Ok(None) => {}
239                                Err(e) => return Err(e),
240                            }
241                        } else {
242                            break;
243                        }
244                        iter = iter.next_manual().0
245                    }
246                    if let Some(k) = key {
247                        keyword = Some((k, current_type));
248                    }
249                }
250                // We cannot handle Transform, Private, Other extensions in a const context
251                Ok(_) => return Err(ParserError::InvalidSubtag),
252                Err(e) => return Err(e),
253            }
254        }
255    }
256
257    Ok((language, script, region, variant, keyword))
258}
259
260#[allow(clippy::type_complexity)]
261pub const fn parse_language_identifier_with_single_variant(
262    t: &[u8],
263    mode: ParserMode,
264) -> Result<
265    (
266        subtags::Language,
267        Option<subtags::Script>,
268        Option<subtags::Region>,
269        Option<subtags::Variant>,
270    ),
271    ParserError,
272> {
273    let iter = SubtagIterator::new(t);
274    match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) {
275        Ok((l, s, r, v, _)) => Ok((l, s, r, v)),
276        Err(e) => Err(e),
277    }
278}