icu_locale_core/
data.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::extensions::unicode as unicode_ext;
6use crate::subtags::{Language, Region, Script, Subtag, Variant};
7#[cfg(feature = "alloc")]
8use crate::ParseError;
9use crate::{LanguageIdentifier, Locale};
10use core::cmp::Ordering;
11use core::default::Default;
12use core::fmt;
13use core::hash::Hash;
14#[cfg(feature = "alloc")]
15use core::str::FromStr;
16
17/// A locale type optimized for use in fallbacking and the ICU4X data pipeline.
18///
19/// [`DataLocale`] contains less functionality than [`Locale`] but more than
20/// [`LanguageIdentifier`] for better size and performance while still meeting
21/// the needs of the ICU4X data pipeline.
22///
23/// You can create a [`DataLocale`] from a borrowed [`Locale`], which is more
24/// efficient than cloning the [`Locale`], but less efficient than converting an owned
25/// [`Locale`]:
26///
27/// ```
28/// use icu_locale_core::locale;
29/// use icu_provider::DataLocale;
30///
31/// let locale1 = locale!("en-u-ca-buddhist");
32/// let data_locale = DataLocale::from(&locale1);
33/// ```
34///
35/// [`DataLocale`] only supports `-u-sd` keywords, to reflect the current state of CLDR data
36/// lookup and fallback. This may change in the future.
37///
38/// ```
39/// use icu_locale_core::{locale, Locale};
40/// use icu_provider::DataLocale;
41///
42/// let locale = "hi-IN-t-en-h0-hybrid-u-attr-ca-buddhist-sd-inas"
43///     .parse::<Locale>()
44///     .unwrap();
45///
46/// assert_eq!(
47///     DataLocale::from(locale),
48///     DataLocale::from(locale!("hi-IN-u-sd-inas"))
49/// );
50/// ```
51#[derive(#[automatically_derived]
impl ::core::clone::Clone for DataLocale {
    #[inline]
    fn clone(&self) -> DataLocale {
        let _: ::core::clone::AssertParamIsClone<Language>;
        let _: ::core::clone::AssertParamIsClone<Option<Script>>;
        let _: ::core::clone::AssertParamIsClone<Option<Region>>;
        let _: ::core::clone::AssertParamIsClone<Option<Variant>>;
        let _: ::core::clone::AssertParamIsClone<Option<Subtag>>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for DataLocale { }Copy, #[automatically_derived]
impl ::core::cmp::PartialEq for DataLocale {
    #[inline]
    fn eq(&self, other: &DataLocale) -> bool {
        self.language == other.language && self.script == other.script &&
                    self.region == other.region && self.variant == other.variant
            && self.subdivision == other.subdivision
    }
}PartialEq, #[automatically_derived]
impl ::core::hash::Hash for DataLocale {
    #[inline]
    fn hash<__H: ::core::hash::Hasher>(&self, state: &mut __H) -> () {
        ::core::hash::Hash::hash(&self.language, state);
        ::core::hash::Hash::hash(&self.script, state);
        ::core::hash::Hash::hash(&self.region, state);
        ::core::hash::Hash::hash(&self.variant, state);
        ::core::hash::Hash::hash(&self.subdivision, state)
    }
}Hash, #[automatically_derived]
impl ::core::cmp::Eq for DataLocale {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<Language>;
        let _: ::core::cmp::AssertParamIsEq<Option<Script>>;
        let _: ::core::cmp::AssertParamIsEq<Option<Region>>;
        let _: ::core::cmp::AssertParamIsEq<Option<Variant>>;
        let _: ::core::cmp::AssertParamIsEq<Option<Subtag>>;
    }
}Eq)]
52#[non_exhaustive]
53pub struct DataLocale {
54    /// Language subtag
55    pub language: Language,
56    /// Script subtag
57    pub script: Option<Script>,
58    /// Region subtag
59    pub region: Option<Region>,
60    /// Variant subtag
61    pub variant: Option<Variant>,
62    /// Subivision (-u-sd-) subtag
63    pub subdivision: Option<Subtag>,
64}
65
66impl Default for DataLocale {
67    fn default() -> Self {
68        Self {
69            language: Language::UNKNOWN,
70            script: None,
71            region: None,
72            variant: None,
73            subdivision: None,
74        }
75    }
76}
77
78impl DataLocale {
79    /// `const` version of `Default::default`
80    pub const fn default() -> Self {
81        DataLocale {
82            language: Language::UNKNOWN,
83            script: None,
84            region: None,
85            variant: None,
86            subdivision: None,
87        }
88    }
89}
90
91impl Default for &DataLocale {
92    fn default() -> Self {
93        static DEFAULT: DataLocale = DataLocale::default();
94        &DEFAULT
95    }
96}
97
98impl fmt::Debug for DataLocale {
99    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100        f.write_fmt(format_args!("DataLocale{{{0}}}", self))write!(f, "DataLocale{{{self}}}")
101    }
102}
103
104impl writeable::Writeable for DataLocale {
    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W)
        -> core::fmt::Result {
        let mut initial = true;
        self.for_each_subtag_str(&mut |subtag|
                    {
                        if initial {
                            initial = false;
                        } else { sink.write_char('-')?; }
                        sink.write_str(subtag)
                    })
    }
    #[inline]
    fn writeable_length_hint(&self) -> writeable::LengthHint {
        let mut result = writeable::LengthHint::exact(0);
        let mut initial = true;
        self.for_each_subtag_str::<core::convert::Infallible,
                _>(&mut |subtag|
                        {
                            if initial { initial = false; } else { result += 1; }
                            result += subtag.len();
                            Ok(())
                        }).expect("infallible");
        result
    }
    fn writeable_borrow(&self) -> Option<&str> {
        let selff = self;
        if selff.script.is_none() && selff.region.is_none() &&
                    selff.variant.is_none() && selff.subdivision.is_none() {
            Some(selff.language.as_str())
        } else { None }
    }
}
/// This trait is implemented for compatibility with [`fmt!`](alloc::fmt).
/// To create a string, [`Writeable::write_to_string`] is usually more efficient.
impl core::fmt::Display for DataLocale {
    #[inline]
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        ::writeable::Writeable::write_to(&self, f)
    }
}impl_writeable_for_each_subtag_str_no_test!(DataLocale, selff, selff.script.is_none() && selff.region.is_none() && selff.variant.is_none() && selff.subdivision.is_none() => Some(selff.language.as_str()));
105
106impl From<LanguageIdentifier> for DataLocale {
107    fn from(langid: LanguageIdentifier) -> Self {
108        Self::from(&langid)
109    }
110}
111
112impl From<Locale> for DataLocale {
113    fn from(locale: Locale) -> Self {
114        Self::from(&locale)
115    }
116}
117
118impl From<&LanguageIdentifier> for DataLocale {
119    fn from(langid: &LanguageIdentifier) -> Self {
120        Self {
121            language: langid.language,
122            script: langid.script,
123            region: langid.region,
124            variant: langid.variants.iter().copied().next(),
125            subdivision: None,
126        }
127    }
128}
129
130impl From<&Locale> for DataLocale {
131    fn from(locale: &Locale) -> Self {
132        let mut r = Self::from(&locale.id);
133
134        r.subdivision = locale
135            .extensions
136            .unicode
137            .keywords
138            .get(&const {
        use crate::extensions::unicode::Key;
        match Key::try_from_utf8("sd".as_bytes()) {
            Ok(r) => r,
            _ => {
                ::core::panicking::panic_fmt(format_args!("Invalid extensions::unicode::Key: sd"));
            }
        }
    }unicode_ext::key!("sd"))
139            .and_then(|v| v.as_single_subtag().copied());
140        r
141    }
142}
143
144/// ✨ *Enabled with the `alloc` Cargo feature.*
145#[cfg(feature = "alloc")]
146impl FromStr for DataLocale {
147    type Err = ParseError;
148    #[inline]
149    fn from_str(s: &str) -> Result<Self, Self::Err> {
150        Self::try_from_str(s)
151    }
152}
153
154impl DataLocale {
155    #[inline]
156    /// Parses a [`DataLocale`].
157    ///
158    /// ✨ *Enabled with the `alloc` Cargo feature.*
159    #[cfg(feature = "alloc")]
160    pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
161        Self::try_from_utf8(s.as_bytes())
162    }
163
164    /// Parses a [`DataLocale`] from a UTF-8 byte slice.
165    ///
166    /// ✨ *Enabled with the `alloc` Cargo feature.*
167    #[cfg(feature = "alloc")]
168    pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
169        let locale = Locale::try_from_utf8(code_units)?;
170        if locale.id.variants.len() > 1
171            || !locale.extensions.transform.is_empty()
172            || !locale.extensions.private.is_empty()
173            || !locale.extensions.other.is_empty()
174            || !locale.extensions.unicode.attributes.is_empty()
175        {
176            return Err(ParseError::InvalidExtension);
177        }
178
179        let unicode_extensions_count = locale.extensions.unicode.keywords.iter().count();
180
181        if unicode_extensions_count != 0
182            && (unicode_extensions_count != 1
183                || !locale
184                    .extensions
185                    .unicode
186                    .keywords
187                    .contains_key(&unicode_ext::key!("sd")))
188        {
189            return Err(ParseError::InvalidExtension);
190        }
191
192        Ok(locale.into())
193    }
194
195    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
196    where
197        F: FnMut(&str) -> Result<(), E>,
198    {
199        f(self.language.as_str())?;
200        if let Some(ref script) = self.script {
201            f(script.as_str())?;
202        }
203        if let Some(ref region) = self.region {
204            f(region.as_str())?;
205        }
206        if let Some(ref single_variant) = self.variant {
207            f(single_variant.as_str())?;
208        }
209        if let Some(ref subdivision) = self.subdivision {
210            f("u")?;
211            f("sd")?;
212            f(subdivision.as_str())?;
213        }
214        Ok(())
215    }
216
217    fn as_tuple(
218        &self,
219    ) -> (
220        Language,
221        Option<Script>,
222        Option<Region>,
223        Option<Variant>,
224        Option<Subtag>,
225    ) {
226        (
227            self.language,
228            self.script,
229            self.region,
230            self.variant,
231            self.subdivision,
232        )
233    }
234
235    /// Returns an ordering suitable for use in [`BTreeSet`].
236    ///
237    /// [`BTreeSet`]: alloc::collections::BTreeSet
238    pub fn total_cmp(&self, other: &Self) -> Ordering {
239        self.as_tuple().cmp(&other.as_tuple())
240    }
241
242    /// Compare this [`DataLocale`] with BCP-47 bytes.
243    ///
244    /// The return value is equivalent to what would happen if you first converted this
245    /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison.
246    ///
247    /// This function is case-sensitive and results in a *total order*, so it is appropriate for
248    /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
249    ///
250    /// # Examples
251    ///
252    /// ```
253    /// use core::cmp::Ordering;
254    /// use icu_provider::DataLocale;
255    ///
256    /// let bcp47_strings: &[&str] = &[
257    ///     "ca",
258    ///     "ca-ES",
259    ///     "ca-ES-u-sd-esct",
260    ///     "ca-ES-valencia",
261    ///     "cat",
262    ///     "pl-Latn-PL",
263    ///     "und",
264    ///     "und-fonipa",
265    ///     "zh",
266    /// ];
267    ///
268    /// for ab in bcp47_strings.windows(2) {
269    ///     let a = ab[0];
270    ///     let b = ab[1];
271    ///     assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b);
272    ///     let a_loc: DataLocale = a.parse().unwrap();
273    ///     assert_eq!(
274    ///         a_loc.strict_cmp(a.as_bytes()),
275    ///         Ordering::Equal,
276    ///         "strict_cmp: {} == {}",
277    ///         a_loc,
278    ///         a
279    ///     );
280    ///     assert_eq!(
281    ///         a_loc.strict_cmp(b.as_bytes()),
282    ///         Ordering::Less,
283    ///         "strict_cmp: {} < {}",
284    ///         a_loc,
285    ///         b
286    ///     );
287    ///     let b_loc: DataLocale = b.parse().unwrap();
288    ///     assert_eq!(
289    ///         b_loc.strict_cmp(b.as_bytes()),
290    ///         Ordering::Equal,
291    ///         "strict_cmp: {} == {}",
292    ///         b_loc,
293    ///         b
294    ///     );
295    ///     assert_eq!(
296    ///         b_loc.strict_cmp(a.as_bytes()),
297    ///         Ordering::Greater,
298    ///         "strict_cmp: {} > {}",
299    ///         b_loc,
300    ///         a
301    ///     );
302    /// }
303    /// ```
304    ///
305    /// Comparison against invalid strings:
306    ///
307    /// ```
308    /// use icu_provider::DataLocale;
309    ///
310    /// let invalid_strings: &[&str] = &[
311    ///     // Less than "ca-ES"
312    ///     "CA",
313    ///     "ar-x-gbp-FOO",
314    ///     // Greater than "ca-AR"
315    ///     "ca_ES",
316    ///     "ca-ES-x-gbp-FOO",
317    /// ];
318    ///
319    /// let data_locale = "ca-ES".parse::<DataLocale>().unwrap();
320    ///
321    /// for s in invalid_strings.iter() {
322    ///     let expected_ordering = "ca-AR".cmp(s);
323    ///     let actual_ordering = data_locale.strict_cmp(s.as_bytes());
324    ///     assert_eq!(expected_ordering, actual_ordering, "{}", s);
325    /// }
326    /// ```
327    pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
328        writeable::cmp_utf8(self, other)
329    }
330
331    /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion.
332    ///
333    /// # Examples
334    ///
335    /// ```
336    /// use icu_provider::DataLocale;
337    ///
338    /// assert!("und".parse::<DataLocale>().unwrap().is_unknown());
339    /// assert!(!"de-u-sd-denw".parse::<DataLocale>().unwrap().is_unknown());
340    /// assert!(!"und-ES".parse::<DataLocale>().unwrap().is_unknown());
341    /// ```
342    pub fn is_unknown(&self) -> bool {
343        self.language.is_unknown()
344            && self.script.is_none()
345            && self.region.is_none()
346            && self.variant.is_none()
347            && self.subdivision.is_none()
348    }
349
350    /// Converts this `DataLocale` into a [`Locale`].
351    pub fn into_locale(self) -> Locale {
352        Locale {
353            id: LanguageIdentifier {
354                language: self.language,
355                script: self.script,
356                region: self.region,
357                variants: self
358                    .variant
359                    .map(crate::subtags::Variants::from_variant)
360                    .unwrap_or_default(),
361            },
362            extensions: {
363                let mut extensions = crate::extensions::Extensions::default();
364                if let Some(sd) = self.subdivision {
365                    extensions.unicode = unicode_ext::Unicode {
366                        keywords: unicode_ext::Keywords::new_single(
367                            const {
        use crate::extensions::unicode::Key;
        match Key::try_from_utf8("sd".as_bytes()) {
            Ok(r) => r,
            _ => {
                ::core::panicking::panic_fmt(format_args!("Invalid extensions::unicode::Key: sd"));
            }
        }
    }unicode_ext::key!("sd"),
368                            unicode_ext::Value::from_subtag(Some(sd)),
369                        ),
370                        ..Default::default()
371                    }
372                }
373                extensions
374            },
375        }
376    }
377}
378
379#[test]
380fn test_data_locale_to_string() {
381    struct TestCase {
382        pub locale: &'static str,
383        pub expected: &'static str,
384    }
385
386    for cas in [
387        TestCase {
388            locale: "und",
389            expected: "und",
390        },
391        TestCase {
392            locale: "und-u-sd-sdd",
393            expected: "und-u-sd-sdd",
394        },
395        TestCase {
396            locale: "en-ZA-u-sd-zaa",
397            expected: "en-ZA-u-sd-zaa",
398        },
399    ] {
400        let locale = cas.locale.parse::<DataLocale>().unwrap();
401        writeable::assert_writeable_eq!(locale, cas.expected);
402    }
403}
404
405#[test]
406fn test_data_locale_from_string() {
407    #[derive(Debug)]
408    struct TestCase {
409        pub input: &'static str,
410        pub success: bool,
411    }
412
413    for cas in [
414        TestCase {
415            input: "und",
416            success: true,
417        },
418        TestCase {
419            input: "und-u-cu-gbp",
420            success: false,
421        },
422        TestCase {
423            input: "en-ZA-u-sd-zaa",
424            success: true,
425        },
426        TestCase {
427            input: "en...",
428            success: false,
429        },
430    ] {
431        let data_locale = match (DataLocale::from_str(cas.input), cas.success) {
432            (Ok(l), true) => l,
433            (Err(_), false) => {
434                continue;
435            }
436            (Ok(_), false) => {
437                panic!("DataLocale parsed but it was supposed to fail: {cas:?}");
438            }
439            (Err(_), true) => {
440                panic!("DataLocale was supposed to parse but it failed: {cas:?}");
441            }
442        };
443        writeable::assert_writeable_eq!(data_locale, cas.input);
444    }
445}