icu_locale_core/extensions/transform/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Transform Extensions provide information on content transformations in a given locale.
6//!
7//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
8//! optional [`LanguageIdentifier`].
9//!
10//! [`LanguageIdentifier`]: super::super::LanguageIdentifier
11//!
12//! # Examples
13//!
14//! ```
15//! use icu::locale::extensions::transform::{Fields, Key, Transform, Value};
16//! use icu::locale::{LanguageIdentifier, Locale};
17//!
18//! let mut loc: Locale =
19//!     "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed.");
20//!
21//! let lang: LanguageIdentifier =
22//!     "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
23//!
24//! let key: Key = "h0".parse().expect("Parsing key failed.");
25//! let value: Value = "hybrid".parse().expect("Parsing value failed.");
26//!
27//! assert_eq!(loc.extensions.transform.lang, Some(lang));
28//! assert!(loc.extensions.transform.fields.contains_key(&key));
29//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
30//!
31//! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid");
32//! ```
33mod fields;
34mod key;
35mod value;
36
37use core::cmp::Ordering;
38#[cfg(feature = "alloc")]
39use core::str::FromStr;
40
41pub use fields::Fields;
42#[doc(inline)]
43pub use key::{key, Key};
44pub use value::Value;
45
46#[cfg(feature = "alloc")]
47use super::ExtensionType;
48#[cfg(feature = "alloc")]
49use crate::parser::SubtagIterator;
50#[cfg(feature = "alloc")]
51use crate::parser::{parse_language_identifier_from_iter, ParseError, ParserMode};
52#[cfg(feature = "alloc")]
53use crate::shortvec::ShortBoxSlice;
54use crate::subtags;
55#[cfg(feature = "alloc")]
56use crate::subtags::Language;
57use crate::LanguageIdentifier;
58#[cfg(feature = "alloc")]
59use litemap::LiteMap;
60
61pub(crate) const TRANSFORM_EXT_CHAR: char = 't';
62pub(crate) const TRANSFORM_EXT_STR: &str = "t";
63
64/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
65/// Identifier`] specification.
66///
67/// Transform extension carries information about source language or script of
68/// transformed content, including content that has been transliterated, transcribed,
69/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
70///
71/// # Examples
72///
73/// ```
74/// use icu::locale::extensions::transform::{Key, Value};
75/// use icu::locale::{LanguageIdentifier, Locale};
76///
77/// let mut loc: Locale =
78///     "de-t-en-us-h0-hybrid".parse().expect("Parsing failed.");
79///
80/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
81///
82/// assert_eq!(loc.extensions.transform.lang, Some(en_us));
83/// let key: Key = "h0".parse().expect("Parsing key failed.");
84/// let value: Value = "hybrid".parse().expect("Parsing value failed.");
85/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
86/// ```
87/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
88/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
89/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
90#[derive(#[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::clone::Clone for Transform {
    #[inline]
    fn clone(&self) -> Transform {
        Transform {
            lang: ::core::clone::Clone::clone(&self.lang),
            fields: ::core::clone::Clone::clone(&self.fields),
        }
    }
}Clone, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::PartialEq for Transform {
    #[inline]
    fn eq(&self, other: &Transform) -> bool {
        self.lang == other.lang && self.fields == other.fields
    }
}PartialEq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::Eq for Transform {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<Option<LanguageIdentifier>>;
        let _: ::core::cmp::AssertParamIsEq<Fields>;
    }
}Eq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::fmt::Debug for Transform {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field2_finish(f, "Transform",
            "lang", &self.lang, "fields", &&self.fields)
    }
}Debug, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::default::Default for Transform {
    #[inline]
    fn default() -> Transform {
        Transform {
            lang: ::core::default::Default::default(),
            fields: ::core::default::Default::default(),
        }
    }
}Default, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::hash::Hash for Transform {
    #[inline]
    fn hash<__H: ::core::hash::Hasher>(&self, state: &mut __H) -> () {
        ::core::hash::Hash::hash(&self.lang, state);
        ::core::hash::Hash::hash(&self.fields, state)
    }
}Hash)]
91#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
92pub struct Transform {
93    /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
94    pub lang: Option<LanguageIdentifier>,
95    /// The key-value pairs present in this locale extension, with each extension key subtag
96    /// associated to its provided value subtag.
97    pub fields: Fields,
98}
99
100impl Transform {
101    /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// use icu::locale::extensions::transform::Transform;
107    ///
108    /// assert_eq!(Transform::new(), Transform::default());
109    /// ```
110    #[inline]
111    pub const fn new() -> Self {
112        Self {
113            lang: None,
114            fields: Fields::new(),
115        }
116    }
117
118    /// A constructor which takes a str slice, parses it and
119    /// produces a well-formed [`Transform`].
120    ///
121    /// ✨ *Enabled with the `alloc` Cargo feature.*
122    #[inline]
123    #[cfg(feature = "alloc")]
124    pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
125        Self::try_from_utf8(s.as_bytes())
126    }
127
128    /// See [`Self::try_from_str`]
129    ///
130    /// ✨ *Enabled with the `alloc` Cargo feature.*
131    #[cfg(feature = "alloc")]
132    pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
133        let mut iter = SubtagIterator::new(code_units);
134
135        let ext = iter.next().ok_or(ParseError::InvalidExtension)?;
136        if let ExtensionType::Transform = ExtensionType::try_from_byte_slice(ext)? {
137            return Self::try_from_iter(&mut iter);
138        }
139
140        Err(ParseError::InvalidExtension)
141    }
142
143    /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
144    ///
145    /// # Examples
146    ///
147    /// ```
148    /// use icu::locale::Locale;
149    ///
150    /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed.");
151    ///
152    /// assert!(!loc.extensions.transform.is_empty());
153    /// ```
154    pub fn is_empty(&self) -> bool {
155        self.lang.is_none() && self.fields.is_empty()
156    }
157
158    /// Clears the transform extension, effectively removing it from the locale.
159    ///
160    /// # Examples
161    ///
162    /// ```
163    /// use icu::locale::Locale;
164    ///
165    /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap();
166    /// loc.extensions.transform.clear();
167    /// assert_eq!(loc, "en-US".parse().unwrap());
168    /// ```
169    pub fn clear(&mut self) {
170        self.lang = None;
171        self.fields.clear();
172    }
173
174    #[expect(clippy::type_complexity)]
175    pub(crate) fn as_tuple(
176        &self,
177    ) -> (
178        Option<(
179            subtags::Language,
180            Option<subtags::Script>,
181            Option<subtags::Region>,
182            &subtags::Variants,
183        )>,
184        &Fields,
185    ) {
186        (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields)
187    }
188
189    /// Returns an ordering suitable for use in [`BTreeSet`].
190    ///
191    /// The ordering may or may not be equivalent to string ordering, and it
192    /// may or may not be stable across ICU4X releases.
193    ///
194    /// [`BTreeSet`]: alloc::collections::BTreeSet
195    pub fn total_cmp(&self, other: &Self) -> Ordering {
196        self.as_tuple().cmp(&other.as_tuple())
197    }
198
199    #[cfg(feature = "alloc")]
200    pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
201        let mut tlang = None;
202        let mut tfields = LiteMap::new();
203
204        if let Some(subtag) = iter.peek() {
205            if Language::try_from_utf8(subtag).is_ok() {
206                tlang = Some(parse_language_identifier_from_iter(
207                    iter,
208                    ParserMode::Partial,
209                )?);
210            }
211        }
212
213        let mut current_tkey = None;
214        let mut current_tvalue = ShortBoxSlice::new();
215        let mut has_current_tvalue = false;
216
217        while let Some(subtag) = iter.peek() {
218            if let Some(tkey) = current_tkey {
219                if let Ok(val) = Value::parse_subtag(subtag) {
220                    has_current_tvalue = true;
221                    if let Some(val) = val {
222                        current_tvalue.push(val);
223                    }
224                } else {
225                    if !has_current_tvalue {
226                        return Err(ParseError::InvalidExtension);
227                    }
228                    tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
229                    current_tkey = None;
230                    current_tvalue = ShortBoxSlice::new();
231                    has_current_tvalue = false;
232                    continue;
233                }
234            } else if let Ok(tkey) = Key::try_from_utf8(subtag) {
235                current_tkey = Some(tkey);
236            } else {
237                break;
238            }
239
240            iter.next();
241        }
242
243        if let Some(tkey) = current_tkey {
244            if !has_current_tvalue {
245                return Err(ParseError::InvalidExtension);
246            }
247            tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
248        }
249
250        if tlang.is_none() && tfields.is_empty() {
251            Err(ParseError::InvalidExtension)
252        } else {
253            Ok(Self {
254                lang: tlang,
255                fields: tfields.into(),
256            })
257        }
258    }
259
260    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
261    where
262        F: FnMut(&str) -> Result<(), E>,
263    {
264        if self.is_empty() {
265            return Ok(());
266        }
267        if with_ext {
268            f(TRANSFORM_EXT_STR)?;
269        }
270        if let Some(lang) = &self.lang {
271            lang.for_each_subtag_str_lowercased(f)?;
272        }
273        self.fields.for_each_subtag_str(f)
274    }
275}
276
277/// ✨ *Enabled with the `alloc` Cargo feature.*
278#[cfg(feature = "alloc")]
279impl FromStr for Transform {
280    type Err = ParseError;
281
282    #[inline]
283    fn from_str(s: &str) -> Result<Self, Self::Err> {
284        Self::try_from_str(s)
285    }
286}
287
288/// This trait is implemented for compatibility with [`fmt!`](alloc::fmt).
/// To create a string, [`Writeable::write_to_string`] is usually more efficient.
impl core::fmt::Display for Transform {
    #[inline]
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        ::writeable::Writeable::write_to(&self, f)
    }
}writeable::impl_display_with_writeable!(Transform, #[cfg(feature = "alloc")]);
289
290impl writeable::Writeable for Transform {
291    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
292        if self.is_empty() {
293            return Ok(());
294        }
295        sink.write_char(TRANSFORM_EXT_CHAR)?;
296        if let Some(lang) = &self.lang {
297            sink.write_char('-')?;
298            lang.write_lowercased_to(sink)?;
299        }
300        if !self.fields.is_empty() {
301            sink.write_char('-')?;
302            writeable::Writeable::write_to(&self.fields, sink)?;
303        }
304        Ok(())
305    }
306
307    fn writeable_length_hint(&self) -> writeable::LengthHint {
308        if self.is_empty() {
309            return writeable::LengthHint::exact(0);
310        }
311        let mut result = writeable::LengthHint::exact(1);
312        if let Some(lang) = &self.lang {
313            result += writeable::Writeable::writeable_length_hint(lang) + 1;
314        }
315        if !self.fields.is_empty() {
316            result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
317        }
318        result
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325
326    #[test]
327    fn test_transform_extension_fromstr() {
328        let te: Transform = "t-en-us-h0-hybrid"
329            .parse()
330            .expect("Failed to parse Transform");
331        assert_eq!(te.to_string(), "t-en-us-h0-hybrid");
332
333        let te: Result<Transform, _> = "t".parse();
334        assert!(te.is_err());
335    }
336}