Skip to main content

icu_locale_core/
data.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::extensions::unicode as unicode_ext;
6use crate::preferences::{extensions::unicode::keywords::RegionalSubdivision, LocalePreferences};
7use crate::subtags::{Language, Region, Script, Subtag, Variant};
8#[cfg(feature = "alloc")]
9use crate::ParseError;
10use crate::{LanguageIdentifier, Locale};
11use core::cmp::Ordering;
12use core::default::Default;
13use core::fmt;
14use core::hash::Hash;
15#[cfg(feature = "alloc")]
16use core::str::FromStr;
17
18/// A locale type optimized for use in fallbacking and the ICU4X data pipeline.
19///
20/// [`DataLocale`] contains less functionality than [`Locale`] but more than
21/// [`LanguageIdentifier`] for better size and performance while still meeting
22/// the needs of the ICU4X data pipeline.
23///
24/// In general, you should not need to construct one of these directly. If you do,
25/// even though there is a direct `From<Locale>` conversion, you should
26/// convert through the [`LocalePreferences`] type:
27///
28/// ```
29/// use icu_locale_core::locale;
30/// use icu_locale_core::preferences::LocalePreferences;
31/// use icu_provider::DataLocale;
32/// use writeable::assert_writeable_eq;
33///
34/// // Locale: American English with British user preferences
35/// let locale = locale!("en-US-u-rg-gbzzzz");
36///
37/// // For language-priority fallback, the region override is ignored
38/// let data_locale =
39///     LocalePreferences::from(&locale).to_data_locale_language_priority();
40/// assert_writeable_eq!(data_locale, "en-US");
41///
42/// // The direct conversion implicitly uses language-priority fallback
43/// // (which is incorrect for some use cases).
44/// assert_eq!(data_locale, DataLocale::from(&locale));
45///
46/// // For region-priority fallback, the region override is applied
47/// let data_locale =
48///     LocalePreferences::from(&locale).to_data_locale_region_priority();
49/// assert_writeable_eq!(data_locale, "en-GB");
50/// ```
51///
52/// [`DataLocale`] only supports `-u-sd` keywords, to reflect the current state of CLDR data
53/// lookup and fallback. This may change in the future.
54///
55/// ```
56/// use icu_locale_core::{locale, Locale};
57/// use icu_provider::DataLocale;
58///
59/// let locale = "hi-IN-t-en-h0-hybrid-u-attr-ca-buddhist-sd-inas"
60///     .parse::<Locale>()
61///     .unwrap();
62///
63/// assert_eq!(
64///     DataLocale::from(locale),
65///     DataLocale::from(locale!("hi-IN-u-sd-inas"))
66/// );
67/// ```
68///
69/// [`LocalePreferences`]: crate::preferences::LocalePreferences
70#[derive(#[automatically_derived]
impl ::core::clone::Clone for DataLocale {
    #[inline]
    fn clone(&self) -> DataLocale {
        let _: ::core::clone::AssertParamIsClone<Language>;
        let _: ::core::clone::AssertParamIsClone<Option<Script>>;
        let _: ::core::clone::AssertParamIsClone<Option<Region>>;
        let _: ::core::clone::AssertParamIsClone<Option<Variant>>;
        let _: ::core::clone::AssertParamIsClone<Option<Subtag>>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for DataLocale { }Copy)]
71#[non_exhaustive]
72pub struct DataLocale {
73    /// Language subtag
74    pub language: Language,
75    /// Script subtag
76    pub script: Option<Script>,
77    /// Region subtag
78    pub region: Option<Region>,
79    /// Variant subtag
80    pub variant: Option<Variant>,
81    /// Subivision (-u-sd-) subtag
82    // TODO(3.0): Use `SubdivisionSuffix` type
83    pub subdivision: Option<Subtag>,
84}
85
86impl PartialEq for DataLocale {
87    fn eq(&self, other: &Self) -> bool {
88        self.as_tuple() == other.as_tuple()
89    }
90}
91
92impl Eq for DataLocale {}
93
94impl Hash for DataLocale {
95    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
96        self.as_tuple().hash(state);
97    }
98}
99
100impl Default for DataLocale {
101    fn default() -> Self {
102        Self {
103            language: Language::UNKNOWN,
104            script: None,
105            region: None,
106            variant: None,
107            subdivision: None,
108        }
109    }
110}
111
112impl DataLocale {
113    /// `const` version of `Default::default`
114    pub const fn default() -> Self {
115        DataLocale {
116            language: Language::UNKNOWN,
117            script: None,
118            region: None,
119            variant: None,
120            subdivision: None,
121        }
122    }
123}
124
125impl Default for &DataLocale {
126    fn default() -> Self {
127        static DEFAULT: DataLocale = DataLocale::default();
128        &DEFAULT
129    }
130}
131
132impl fmt::Debug for DataLocale {
133    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
134        f.write_fmt(format_args!("DataLocale{{{0}}}", self))write!(f, "DataLocale{{{self}}}")
135    }
136}
137
138impl writeable::Writeable for DataLocale {
    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W)
        -> core::fmt::Result {
        let mut initial = true;
        self.for_each_subtag_str(&mut |subtag|
                    {
                        if initial {
                            initial = false;
                        } else { sink.write_char('-')?; }
                        sink.write_str(subtag)
                    })
    }
    #[inline]
    fn writeable_length_hint(&self) -> writeable::LengthHint {
        let mut result = writeable::LengthHint::exact(0);
        let mut initial = true;
        self.for_each_subtag_str::<core::convert::Infallible,
                _>(&mut |subtag|
                        {
                            if initial { initial = false; } else { result += 1; }
                            result += subtag.len();
                            Ok(())
                        }).expect("infallible");
        result
    }
    fn writeable_borrow(&self) -> Option<&str> {
        let selff = self;
        if selff.script.is_none() && selff.region.is_none() &&
                    selff.variant.is_none() && selff.subdivision.is_none() {
            Some(selff.language.as_str())
        } else { None }
    }
}
/// This trait is implemented for compatibility with [`fmt!`](alloc::fmt).
/// To create a string, [`Writeable::write_to_string`] is usually more efficient.
impl core::fmt::Display for DataLocale {
    #[inline]
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        ::writeable::Writeable::write_to(&self, f)
    }
}impl_writeable_for_each_subtag_str_no_test!(DataLocale, selff, selff.script.is_none() && selff.region.is_none() && selff.variant.is_none() && selff.subdivision.is_none() => Some(selff.language.as_str()));
139
140impl From<LanguageIdentifier> for DataLocale {
141    fn from(langid: LanguageIdentifier) -> Self {
142        Self::from(&langid)
143    }
144}
145
146impl From<Locale> for DataLocale {
147    fn from(locale: Locale) -> Self {
148        Self::from(&locale)
149    }
150}
151
152impl From<&LanguageIdentifier> for DataLocale {
153    fn from(langid: &LanguageIdentifier) -> Self {
154        Self {
155            language: langid.language,
156            script: langid.script,
157            region: langid.region,
158            variant: langid.variants.iter().copied().next(),
159            subdivision: None,
160        }
161    }
162}
163
164impl From<&Locale> for DataLocale {
165    fn from(locale: &Locale) -> Self {
166        LocalePreferences::from(locale).to_data_locale_language_priority()
167    }
168}
169
170/// ✨ *Enabled with the `alloc` Cargo feature.*
171#[cfg(feature = "alloc")]
172impl FromStr for DataLocale {
173    type Err = ParseError;
174    #[inline]
175    fn from_str(s: &str) -> Result<Self, Self::Err> {
176        Self::try_from_str(s)
177    }
178}
179
180impl DataLocale {
181    #[inline]
182    /// Parses a [`DataLocale`].
183    ///
184    /// ✨ *Enabled with the `alloc` Cargo feature.*
185    #[cfg(feature = "alloc")]
186    pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
187        Self::try_from_utf8(s.as_bytes())
188    }
189
190    /// Parses a [`DataLocale`] from a UTF-8 byte slice.
191    ///
192    /// ✨ *Enabled with the `alloc` Cargo feature.*
193    #[cfg(feature = "alloc")]
194    pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
195        let locale = Locale::try_from_utf8(code_units)?;
196        if locale.id.variants.len() > 1
197            || !locale.extensions.transform.is_empty()
198            || !locale.extensions.private.is_empty()
199            || !locale.extensions.other.is_empty()
200            || !locale.extensions.unicode.attributes.is_empty()
201        {
202            return Err(ParseError::InvalidExtension);
203        }
204
205        let unicode_extensions_count = locale.extensions.unicode.keywords.iter().count();
206
207        if unicode_extensions_count != 0
208            && (unicode_extensions_count != 1
209                || !locale
210                    .extensions
211                    .unicode
212                    .keywords
213                    .contains_key(&RegionalSubdivision::UNICODE_EXTENSION_KEY))
214        {
215            return Err(ParseError::InvalidExtension);
216        }
217
218        Ok(locale.into())
219    }
220
221    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
222    where
223        F: FnMut(&str) -> Result<(), E>,
224    {
225        f(self.language.as_str())?;
226        if let Some(ref script) = self.script {
227            f(script.as_str())?;
228        }
229        if let Some(ref region) = self.region {
230            f(region.as_str())?;
231        }
232        if let Some(ref single_variant) = self.variant {
233            f(single_variant.as_str())?;
234        }
235        if let Some(extensions) = self.extensions() {
236            extensions.for_each_subtag_str(f)?;
237        }
238        Ok(())
239    }
240
241    fn region_and_subdivision(&self) -> Option<unicode_ext::SubdivisionId> {
242        self.subdivision
243            .and_then(|s| unicode_ext::SubdivisionId::try_from_str(s.as_str()).ok())
244            .or_else(|| {
245                self.region.map(|region| unicode_ext::SubdivisionId {
246                    region,
247                    suffix: unicode_ext::SubdivisionSuffix::UNKNOWN,
248                })
249            })
250    }
251
252    fn as_tuple(
253        &self,
254    ) -> (
255        Language,
256        Option<Script>,
257        Option<unicode_ext::SubdivisionId>,
258        Option<Variant>,
259    ) {
260        (
261            self.language,
262            self.script,
263            self.region_and_subdivision(),
264            self.variant,
265        )
266    }
267
268    pub(crate) const fn from_parts(
269        language: Language,
270        script: Option<Script>,
271        region: Option<unicode_ext::SubdivisionId>,
272        variant: Option<Variant>,
273    ) -> Self {
274        Self {
275            language,
276            script,
277            region: if let Some(r) = region {
278                Some(r.region)
279            } else {
280                None
281            },
282            variant,
283            subdivision: if let Some(r) = region {
284                Some(r.into_subtag())
285            } else {
286                None
287            },
288        }
289    }
290
291    /// Returns an ordering suitable for use in [`BTreeSet`].
292    ///
293    /// [`BTreeSet`]: alloc::collections::BTreeSet
294    pub fn total_cmp(&self, other: &Self) -> Ordering {
295        self.as_tuple().cmp(&other.as_tuple())
296    }
297
298    /// Compare this [`DataLocale`] with BCP-47 bytes.
299    ///
300    /// The return value is equivalent to what would happen if you first converted this
301    /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison.
302    ///
303    /// This function is case-sensitive and results in a *total order*, so it is appropriate for
304    /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
305    ///
306    /// # Examples
307    ///
308    /// ```
309    /// use core::cmp::Ordering;
310    /// use icu_provider::DataLocale;
311    ///
312    /// let bcp47_strings: &[&str] = &[
313    ///     "ca",
314    ///     "ca-ES",
315    ///     "ca-ES-u-sd-esct",
316    ///     "ca-ES-valencia",
317    ///     "cat",
318    ///     "pl-Latn-PL",
319    ///     "und",
320    ///     "und-fonipa",
321    ///     "zh",
322    /// ];
323    ///
324    /// for ab in bcp47_strings.windows(2) {
325    ///     let a = ab[0];
326    ///     let b = ab[1];
327    ///     assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b);
328    ///     let a_loc: DataLocale = a.parse().unwrap();
329    ///     assert_eq!(
330    ///         a_loc.strict_cmp(a.as_bytes()),
331    ///         Ordering::Equal,
332    ///         "strict_cmp: {} == {}",
333    ///         a_loc,
334    ///         a
335    ///     );
336    ///     assert_eq!(
337    ///         a_loc.strict_cmp(b.as_bytes()),
338    ///         Ordering::Less,
339    ///         "strict_cmp: {} < {}",
340    ///         a_loc,
341    ///         b
342    ///     );
343    ///     let b_loc: DataLocale = b.parse().unwrap();
344    ///     assert_eq!(
345    ///         b_loc.strict_cmp(b.as_bytes()),
346    ///         Ordering::Equal,
347    ///         "strict_cmp: {} == {}",
348    ///         b_loc,
349    ///         b
350    ///     );
351    ///     assert_eq!(
352    ///         b_loc.strict_cmp(a.as_bytes()),
353    ///         Ordering::Greater,
354    ///         "strict_cmp: {} > {}",
355    ///         b_loc,
356    ///         a
357    ///     );
358    /// }
359    /// ```
360    ///
361    /// Comparison against invalid strings:
362    ///
363    /// ```
364    /// use icu_provider::DataLocale;
365    ///
366    /// let invalid_strings: &[&str] = &[
367    ///     // Less than "ca-ES"
368    ///     "CA",
369    ///     "ar-x-gbp-FOO",
370    ///     // Greater than "ca-AR"
371    ///     "ca_ES",
372    ///     "ca-ES-x-gbp-FOO",
373    /// ];
374    ///
375    /// let data_locale = "ca-ES".parse::<DataLocale>().unwrap();
376    ///
377    /// for s in invalid_strings.iter() {
378    ///     let expected_ordering = "ca-AR".cmp(s);
379    ///     let actual_ordering = data_locale.strict_cmp(s.as_bytes());
380    ///     assert_eq!(expected_ordering, actual_ordering, "{}", s);
381    /// }
382    /// ```
383    pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
384        writeable::cmp_utf8(self, other)
385    }
386
387    /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion.
388    ///
389    /// # Examples
390    ///
391    /// ```
392    /// use icu_provider::DataLocale;
393    ///
394    /// assert!("und".parse::<DataLocale>().unwrap().is_unknown());
395    /// assert!(!"de-u-sd-denw".parse::<DataLocale>().unwrap().is_unknown());
396    /// assert!(!"und-ES".parse::<DataLocale>().unwrap().is_unknown());
397    /// ```
398    pub fn is_unknown(&self) -> bool {
399        self.language.is_unknown()
400            && self.script.is_none()
401            && self.region.is_none()
402            && self.variant.is_none()
403            && self.subdivision.is_none()
404    }
405
406    /// Converts this `DataLocale` into a [`Locale`].
407    pub fn into_locale(self) -> Locale {
408        Locale {
409            id: LanguageIdentifier {
410                language: self.language,
411                script: self.script,
412                region: self.region,
413                variants: self
414                    .variant
415                    .map(crate::subtags::Variants::from_variant)
416                    .unwrap_or_default(),
417            },
418            extensions: self.extensions().unwrap_or_default(),
419        }
420    }
421
422    fn extensions(&self) -> Option<crate::extensions::Extensions> {
423        Some(crate::extensions::Extensions {
424            unicode: unicode_ext::Unicode {
425                keywords: unicode_ext::Keywords::new_single(
426                    RegionalSubdivision::UNICODE_EXTENSION_KEY,
427                    RegionalSubdivision(
428                        self.region_and_subdivision()
429                            .filter(|sd| !sd.suffix.is_unknown())?,
430                    )
431                    .into(),
432                ),
433                ..Default::default()
434            },
435            ..Default::default()
436        })
437    }
438}
439
440#[test]
441fn test_data_locale_to_string() {
442    struct TestCase {
443        pub locale: &'static str,
444        pub expected: &'static str,
445    }
446
447    for cas in [
448        TestCase {
449            locale: "und",
450            expected: "und",
451        },
452        TestCase {
453            locale: "und-u-sd-sdd",
454            expected: "und-SD-u-sd-sdd",
455        },
456        TestCase {
457            locale: "en-ZA-u-sd-zaa",
458            expected: "en-ZA-u-sd-zaa",
459        },
460        TestCase {
461            locale: "en-ZA-u-sd-sdd",
462            expected: "en-ZA",
463        },
464    ] {
465        let locale = cas.locale.parse::<DataLocale>().unwrap();
466        writeable::assert_writeable_eq!(locale, cas.expected);
467    }
468}
469
470#[test]
471fn test_data_locale_from_string() {
472    #[derive(Debug)]
473    struct TestCase {
474        pub input: &'static str,
475        pub success: bool,
476    }
477
478    for cas in [
479        TestCase {
480            input: "und",
481            success: true,
482        },
483        TestCase {
484            input: "und-u-cu-gbp",
485            success: false,
486        },
487        TestCase {
488            input: "en-ZA-u-sd-zaa",
489            success: true,
490        },
491        TestCase {
492            input: "en...",
493            success: false,
494        },
495    ] {
496        let data_locale = match (DataLocale::from_str(cas.input), cas.success) {
497            (Ok(l), true) => l,
498            (Err(_), false) => {
499                continue;
500            }
501            (Ok(_), false) => {
502                panic!("DataLocale parsed but it was supposed to fail: {cas:?}");
503            }
504            (Err(_), true) => {
505                panic!("DataLocale was supposed to parse but it failed: {cas:?}");
506            }
507        };
508        writeable::assert_writeable_eq!(data_locale, cas.input);
509    }
510}