Skip to main content

icu_locale_core/extensions/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Unicode Extensions provide a mechanism to extend the [`LanguageIdentifier`] with
6//! additional bits of information - a combination of a [`LanguageIdentifier`] and [`Extensions`]
7//! is called [`Locale`].
8//!
9//! There are four types of extensions:
10//!
11//!  * [`Unicode Extensions`] - marked as `u`.
12//!  * [`Transform Extensions`] - marked as `t`.
13//!  * [`Private Use Extensions`] - marked as `x`.
14//!  * [`Other Extensions`] - marked as any `a-z` except of `u`, `t` and `x`.
15//!
16//! One can think of extensions as a bag of extra information on top of basic 4 [`subtags`].
17//!
18//! Notice: `Other` extension type is currently not supported.
19//!
20//! # Examples
21//!
22//! ```
23//! use icu::locale::extensions::unicode::{Key, Value};
24//! use icu::locale::Locale;
25//!
26//! let loc: Locale = "en-US-u-ca-buddhist-t-en-us-h0-hybrid-x-foo"
27//!     .parse()
28//!     .expect("Failed to parse.");
29//!
30//! assert_eq!(loc.id.language, "en".parse().unwrap());
31//! assert_eq!(loc.id.script, None);
32//! assert_eq!(loc.id.region, Some("US".parse().unwrap()));
33//! assert_eq!(loc.id.variants.len(), 0);
34//!
35//! let key: Key = "ca".parse().expect("Parsing key failed.");
36//! let value: Value = "buddhist".parse().expect("Parsing value failed.");
37//! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
38//! ```
39//!
40//! # Syntactic vs Semantic Extension Handling
41//!
42//! This module is useful when you need to work with Locale extensions at a syntactic level,
43//! perhaps for parsing or generating locale identifiers that include any syntactically valid
44//! extensions.
45//! For handling and validating known CLDR values with semantic meaning, see the
46//! [`crate::preferences::extensions`] module.
47//!
48//! [`LanguageIdentifier`]: super::LanguageIdentifier
49//! [`Locale`]: super::Locale
50//! [`subtags`]: super::subtags
51//! [`Other Extensions`]: other
52//! [`Private Use Extensions`]: private
53//! [`Transform Extensions`]: transform
54//! [`Unicode Extensions`]: unicode
55pub mod other;
56pub mod private;
57pub mod transform;
58pub mod unicode;
59
60use core::cmp::Ordering;
61
62use other::Other;
63use private::{Private, PRIVATE_EXT_CHAR};
64use transform::{Transform, TRANSFORM_EXT_CHAR};
65use unicode::{Unicode, UNICODE_EXT_CHAR};
66
67#[cfg(feature = "alloc")]
68use alloc::vec::Vec;
69
70use crate::parser::ParseError;
71#[cfg(feature = "alloc")]
72use crate::parser::SubtagIterator;
73use crate::subtags;
74
75/// Defines the type of extension.
76#[derive(#[automatically_derived]
impl ::core::fmt::Debug for ExtensionType {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            ExtensionType::Transform =>
                ::core::fmt::Formatter::write_str(f, "Transform"),
            ExtensionType::Unicode =>
                ::core::fmt::Formatter::write_str(f, "Unicode"),
            ExtensionType::Private =>
                ::core::fmt::Formatter::write_str(f, "Private"),
            ExtensionType::Other(__self_0) =>
                ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Other",
                    &__self_0),
        }
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for ExtensionType {
    #[inline]
    fn eq(&self, other: &ExtensionType) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr &&
            match (self, other) {
                (ExtensionType::Other(__self_0),
                    ExtensionType::Other(__arg1_0)) => __self_0 == __arg1_0,
                _ => true,
            }
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for ExtensionType {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<u8>;
    }
}Eq, #[automatically_derived]
impl ::core::clone::Clone for ExtensionType {
    #[inline]
    fn clone(&self) -> ExtensionType {
        let _: ::core::clone::AssertParamIsClone<u8>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::hash::Hash for ExtensionType {
    #[inline]
    fn hash<__H: ::core::hash::Hasher>(&self, state: &mut __H) {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        ::core::hash::Hash::hash(&__self_discr, state);
        match self {
            ExtensionType::Other(__self_0) =>
                ::core::hash::Hash::hash(__self_0, state),
            _ => {}
        }
    }
}Hash, #[automatically_derived]
impl ::core::cmp::PartialOrd for ExtensionType {
    #[inline]
    fn partial_cmp(&self, other: &ExtensionType)
        -> ::core::option::Option<::core::cmp::Ordering> {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match (self, other) {
            (ExtensionType::Other(__self_0), ExtensionType::Other(__arg1_0))
                => ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            _ =>
                ::core::cmp::PartialOrd::partial_cmp(&__self_discr,
                    &__arg1_discr),
        }
    }
}PartialOrd, #[automatically_derived]
impl ::core::cmp::Ord for ExtensionType {
    #[inline]
    fn cmp(&self, other: &ExtensionType) -> ::core::cmp::Ordering {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match ::core::cmp::Ord::cmp(&__self_discr, &__arg1_discr) {
            ::core::cmp::Ordering::Equal =>
                match (self, other) {
                    (ExtensionType::Other(__self_0),
                        ExtensionType::Other(__arg1_0)) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    _ => ::core::cmp::Ordering::Equal,
                },
            cmp => cmp,
        }
    }
}Ord, #[automatically_derived]
impl ::core::marker::Copy for ExtensionType { }Copy)]
77#[non_exhaustive]
78pub enum ExtensionType {
79    /// Transform Extension Type marked as `t`.
80    Transform,
81    /// Unicode Extension Type marked as `u`.
82    Unicode,
83    /// Private Extension Type marked as `x`.
84    Private,
85    /// All other extension types.
86    Other(u8),
87}
88
89impl ExtensionType {
90    #[allow(dead_code)]
91    pub(crate) const fn try_from_byte_slice(key: &[u8]) -> Result<Self, ParseError> {
92        if let [b] = key {
93            Self::try_from_byte(*b)
94        } else {
95            Err(ParseError::InvalidExtension)
96        }
97    }
98
99    pub(crate) const fn try_from_byte(key: u8) -> Result<Self, ParseError> {
100        let key = key.to_ascii_lowercase();
101        match key as char {
102            UNICODE_EXT_CHAR => Ok(Self::Unicode),
103            TRANSFORM_EXT_CHAR => Ok(Self::Transform),
104            PRIVATE_EXT_CHAR => Ok(Self::Private),
105            'a'..='z' => Ok(Self::Other(key)),
106            _ => Err(ParseError::InvalidExtension),
107        }
108    }
109
110    pub(crate) const fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
111        let &[first] = code_units else {
112            return Err(ParseError::InvalidExtension);
113        };
114
115        Self::try_from_byte(first)
116    }
117}
118
119/// A map of extensions associated with a given [`Locale`](crate::Locale).
120#[derive(#[automatically_derived]
impl ::core::fmt::Debug for Extensions {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field4_finish(f, "Extensions",
            "unicode", &self.unicode, "transform", &self.transform, "private",
            &self.private, "other", &&self.other)
    }
}Debug, #[automatically_derived]
impl ::core::default::Default for Extensions {
    #[inline]
    fn default() -> Extensions {
        Extensions {
            unicode: ::core::default::Default::default(),
            transform: ::core::default::Default::default(),
            private: ::core::default::Default::default(),
            other: ::core::default::Default::default(),
        }
    }
}Default, #[automatically_derived]
impl ::core::cmp::PartialEq for Extensions {
    #[inline]
    fn eq(&self, other: &Extensions) -> bool {
        self.unicode == other.unicode && self.transform == other.transform &&
                self.private == other.private && self.other == other.other
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for Extensions {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<Unicode>;
        let _: ::core::cmp::AssertParamIsEq<Transform>;
        let _: ::core::cmp::AssertParamIsEq<Private>;
        let _: ::core::cmp::AssertParamIsEq<&'static [Other]>;
    }
}Eq, #[automatically_derived]
impl ::core::clone::Clone for Extensions {
    #[inline]
    fn clone(&self) -> Extensions {
        Extensions {
            unicode: ::core::clone::Clone::clone(&self.unicode),
            transform: ::core::clone::Clone::clone(&self.transform),
            private: ::core::clone::Clone::clone(&self.private),
            other: ::core::clone::Clone::clone(&self.other),
        }
    }
}Clone, #[automatically_derived]
impl ::core::hash::Hash for Extensions {
    #[inline]
    fn hash<__H: ::core::hash::Hasher>(&self, state: &mut __H) {
        ::core::hash::Hash::hash(&self.unicode, state);
        ::core::hash::Hash::hash(&self.transform, state);
        ::core::hash::Hash::hash(&self.private, state);
        ::core::hash::Hash::hash(&self.other, state)
    }
}Hash)]
121#[non_exhaustive]
122pub struct Extensions {
123    /// A representation of the data for a Unicode extension, when present in the locale identifier.
124    pub unicode: Unicode,
125    /// A representation of the data for a transform extension, when present in the locale identifier.
126    pub transform: Transform,
127    /// A representation of the data for a private-use extension, when present in the locale identifier.
128    pub private: Private,
129    /// A sequence of any other extensions that are present in the locale identifier but are not formally
130    /// [defined](https://unicode.org/reports/tr35/) and represented explicitly as [`Unicode`], [`Transform`],
131    /// and [`Private`] are.
132    #[cfg(feature = "alloc")]
133    pub other: Vec<Other>,
134    /// A sequence of any other extensions that are present in the locale identifier but are not formally
135    /// [defined](https://unicode.org/reports/tr35/) and represented explicitly as [`Unicode`], [`Transform`],
136    /// and [`Private`] are.
137    #[cfg(not(feature = "alloc"))]
138    pub other: &'static [Other],
139}
140
141impl Extensions {
142    /// Returns a new empty map of extensions. Same as [`default()`](Default::default()), but is `const`.
143    ///
144    /// # Examples
145    ///
146    /// ```
147    /// use icu::locale::extensions::Extensions;
148    ///
149    /// assert_eq!(Extensions::new(), Extensions::default());
150    /// ```
151    #[inline]
152    pub const fn new() -> Self {
153        Self {
154            unicode: Unicode::new(),
155            transform: Transform::new(),
156            private: Private::new(),
157            #[cfg(feature = "alloc")]
158            other: Vec::new(),
159            #[cfg(not(feature = "alloc"))]
160            other: &[],
161        }
162    }
163
164    /// Function to create a new map of extensions containing exactly one unicode extension, callable in `const`
165    /// context.
166    #[inline]
167    pub const fn from_unicode(unicode: Unicode) -> Self {
168        Self {
169            unicode,
170            transform: Transform::new(),
171            private: Private::new(),
172            #[cfg(feature = "alloc")]
173            other: Vec::new(),
174            #[cfg(not(feature = "alloc"))]
175            other: &[],
176        }
177    }
178
179    /// Returns whether there are no extensions present.
180    ///
181    /// # Examples
182    ///
183    /// ```
184    /// use icu::locale::Locale;
185    ///
186    /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
187    ///
188    /// assert!(!loc.extensions.is_empty());
189    /// ```
190    pub fn is_empty(&self) -> bool {
191        self.unicode.is_empty()
192            && self.transform.is_empty()
193            && self.private.is_empty()
194            && self.other.is_empty()
195    }
196
197    #[expect(clippy::type_complexity)]
198    #[cfg_attr(not(feature = "alloc"), expect(clippy::needless_borrow))]
199    pub(crate) fn as_tuple(
200        &self,
201    ) -> (
202        (&unicode::Attributes, &unicode::Keywords),
203        (
204            Option<(
205                subtags::Language,
206                Option<subtags::Script>,
207                Option<subtags::Region>,
208                &subtags::Variants,
209            )>,
210            &transform::Fields,
211        ),
212        &Private,
213        &[Other],
214    ) {
215        (
216            self.unicode.as_tuple(),
217            self.transform.as_tuple(),
218            &self.private,
219            &self.other,
220        )
221    }
222
223    /// Returns an ordering suitable for use in [`BTreeSet`].
224    ///
225    /// The ordering may or may not be equivalent to string ordering, and it
226    /// may or may not be stable across ICU4X releases.
227    ///
228    /// [`BTreeSet`]: alloc::collections::BTreeSet
229    pub fn total_cmp(&self, other: &Self) -> Ordering {
230        self.as_tuple().cmp(&other.as_tuple())
231    }
232
233    /// Retains the specified extension types, clearing all others.
234    ///
235    /// ✨ *Enabled with the `alloc` Cargo feature.*
236    ///
237    /// # Examples
238    ///
239    /// ```
240    /// use icu::locale::extensions::ExtensionType;
241    /// use icu::locale::Locale;
242    ///
243    /// let loc: Locale =
244    ///     "und-a-hello-t-mul-u-world-z-zzz-x-extra".parse().unwrap();
245    ///
246    /// let mut only_unicode = loc.clone();
247    /// only_unicode
248    ///     .extensions
249    ///     .retain_by_type(|t| t == ExtensionType::Unicode);
250    /// assert_eq!(only_unicode, "und-u-world".parse().unwrap());
251    ///
252    /// let mut only_t_z = loc.clone();
253    /// only_t_z.extensions.retain_by_type(|t| {
254    ///     t == ExtensionType::Transform || t == ExtensionType::Other(b'z')
255    /// });
256    /// assert_eq!(only_t_z, "und-t-mul-z-zzz".parse().unwrap());
257    /// ```
258    #[cfg(feature = "alloc")]
259    pub fn retain_by_type<F>(&mut self, mut predicate: F)
260    where
261        F: FnMut(ExtensionType) -> bool,
262    {
263        if !predicate(ExtensionType::Unicode) {
264            self.unicode.clear();
265        }
266        if !predicate(ExtensionType::Transform) {
267            self.transform.clear();
268        }
269        if !predicate(ExtensionType::Private) {
270            self.private.clear();
271        }
272        #[cfg(feature = "alloc")]
273        self.other
274            .retain(|o| predicate(ExtensionType::Other(o.get_ext_byte())));
275    }
276
277    #[cfg(feature = "alloc")]
278    pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
279        let mut unicode = None;
280        let mut transform = None;
281        let mut private = None;
282        let mut other = Vec::new();
283
284        while let Some(subtag) = iter.next() {
285            if subtag.is_empty() {
286                return Err(ParseError::InvalidExtension);
287            }
288
289            let &[subtag] = subtag else {
290                return Err(ParseError::InvalidExtension);
291            };
292
293            match ExtensionType::try_from_byte(subtag) {
294                Ok(ExtensionType::Unicode) => {
295                    if unicode.is_some() {
296                        return Err(ParseError::DuplicatedExtension);
297                    }
298                    unicode = Some(Unicode::try_from_iter(iter)?);
299                }
300                Ok(ExtensionType::Transform) => {
301                    if transform.is_some() {
302                        return Err(ParseError::DuplicatedExtension);
303                    }
304                    transform = Some(Transform::try_from_iter(iter)?);
305                }
306                Ok(ExtensionType::Private) => {
307                    if private.is_some() {
308                        return Err(ParseError::DuplicatedExtension);
309                    }
310                    private = Some(Private::try_from_iter(iter)?);
311                }
312                Ok(ExtensionType::Other(ext)) => {
313                    if other.iter().any(|o: &Other| o.get_ext_byte() == ext) {
314                        return Err(ParseError::DuplicatedExtension);
315                    }
316                    let parsed = Other::try_from_iter(ext, iter)?;
317                    if let Err(idx) = other.binary_search(&parsed) {
318                        other.insert(idx, parsed);
319                    } else {
320                        return Err(ParseError::InvalidExtension);
321                    }
322                }
323                _ => return Err(ParseError::InvalidExtension),
324            }
325        }
326
327        Ok(Self {
328            unicode: unicode.unwrap_or_default(),
329            transform: transform.unwrap_or_default(),
330            private: private.unwrap_or_default(),
331            other,
332        })
333    }
334
335    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
336    where
337        F: FnMut(&str) -> Result<(), E>,
338    {
339        let mut wrote_tu = false;
340        // Alphabetic by singleton
341        self.other.iter().try_for_each(|other| {
342            if other.get_ext() > TRANSFORM_EXT_CHAR && !wrote_tu {
343                // Since 't' and 'u' are next to each other in alphabetical
344                // order, write both now.
345                self.transform.for_each_subtag_str(f, true)?;
346                self.unicode.for_each_subtag_str(f, true)?;
347                wrote_tu = true;
348            }
349            other.for_each_subtag_str(f, true)?;
350            Ok(())
351        })?;
352
353        if !wrote_tu {
354            self.transform.for_each_subtag_str(f, true)?;
355            self.unicode.for_each_subtag_str(f, true)?;
356        }
357
358        // Private must be written last, since it allows single character
359        // keys. Extensions must also be written in alphabetical order,
360        // which would seem to imply that other extensions `y` and `z` are
361        // invalid, but this is not specified.
362        self.private.for_each_subtag_str(f, true)?;
363        Ok(())
364    }
365}
366
367impl writeable::Writeable for Extensions {
    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W)
        -> core::fmt::Result {
        let mut initial = true;
        self.for_each_subtag_str(&mut |subtag|
                    {
                        if initial {
                            initial = false;
                        } else { sink.write_char('-')?; }
                        sink.write_str(subtag)
                    })
    }
    #[inline]
    fn writeable_length_hint(&self) -> writeable::LengthHint {
        let mut result = writeable::LengthHint::exact(0);
        let mut initial = true;
        self.for_each_subtag_str::<core::convert::Infallible,
                _>(&mut |subtag|
                        {
                            if initial { initial = false; } else { result += 1; }
                            result += subtag.len();
                            Ok(())
                        }).expect("infallible");
        result
    }
}
/// This trait is implemented for compatibility with [`fmt!`](alloc::fmt).
/// To create a string, [`Writeable::write_to_string`] is usually more efficient.
impl core::fmt::Display for Extensions {
    #[inline]
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        ::writeable::Writeable::write_to(&self, f)
    }
}impl_writeable_for_each_subtag_str_no_test!(Extensions);
368
369#[test]
370fn test_writeable() {
371    use crate::Locale;
372    use writeable::assert_writeable_eq;
373    assert_writeable_eq!(Extensions::new(), "");
374    assert_writeable_eq!(
375        "my-t-my-d0-zawgyi".parse::<Locale>().unwrap().extensions,
376        "t-my-d0-zawgyi",
377    );
378    assert_writeable_eq!(
379        "ar-SA-u-ca-islamic-civil"
380            .parse::<Locale>()
381            .unwrap()
382            .extensions,
383        "u-ca-islamic-civil",
384    );
385    assert_writeable_eq!(
386        "en-001-x-foo-bar".parse::<Locale>().unwrap().extensions,
387        "x-foo-bar",
388    );
389    assert_writeable_eq!(
390        "und-t-m0-true".parse::<Locale>().unwrap().extensions,
391        "t-m0-true",
392    );
393    assert_writeable_eq!(
394        "und-a-foo-t-foo-u-foo-w-foo-z-foo-x-foo"
395            .parse::<Locale>()
396            .unwrap()
397            .extensions,
398        "a-foo-t-foo-u-foo-w-foo-z-foo-x-foo",
399    );
400}