Skip to main content

icu_locale_core/extensions/transform/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Transform Extensions provide information on content transformations in a given locale.
6//!
7//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
8//! optional [`LanguageIdentifier`].
9//!
10//! [`LanguageIdentifier`]: super::super::LanguageIdentifier
11//!
12//! # Examples
13//!
14//! ```
15//! use icu::locale::extensions::transform::{Fields, Key, Transform, Value};
16//! use icu::locale::{LanguageIdentifier, Locale};
17//!
18//! let mut loc: Locale =
19//!     "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed.");
20//!
21//! let lang: LanguageIdentifier =
22//!     "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
23//!
24//! let key: Key = "h0".parse().expect("Parsing key failed.");
25//! let value: Value = "hybrid".parse().expect("Parsing value failed.");
26//!
27//! assert_eq!(loc.extensions.transform.lang, Some(lang));
28//! assert!(loc.extensions.transform.fields.contains_key(&key));
29//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
30//!
31//! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid");
32//! ```
33mod fields;
34mod key;
35mod value;
36
37use core::cmp::Ordering;
38#[cfg(feature = "alloc")]
39use core::str::FromStr;
40
41pub use fields::Fields;
42#[doc(inline)]
43pub use key::{key, Key};
44pub use value::Value;
45
46#[cfg(feature = "alloc")]
47use super::ExtensionType;
48#[cfg(feature = "alloc")]
49use crate::parser::SubtagIterator;
50#[cfg(feature = "alloc")]
51use crate::parser::{parse_language_identifier_from_iter, ParseError, ParserMode};
52#[cfg(feature = "alloc")]
53use crate::shortvec::ShortBoxSlice;
54use crate::subtags;
55use crate::LanguageIdentifier;
56#[cfg(feature = "alloc")]
57use litemap::LiteMap;
58
59pub(crate) const TRANSFORM_EXT_CHAR: char = 't';
60pub(crate) const TRANSFORM_EXT_STR: &str = "t";
61
62/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
63/// Identifier`] specification.
64///
65/// Transform extension carries information about source language or script of
66/// transformed content, including content that has been transliterated, transcribed,
67/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
68///
69/// # Examples
70///
71/// ```
72/// use icu::locale::extensions::transform::{Key, Value};
73/// use icu::locale::{LanguageIdentifier, Locale};
74///
75/// let mut loc: Locale =
76///     "de-t-en-us-h0-hybrid".parse().expect("Parsing failed.");
77///
78/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
79///
80/// assert_eq!(loc.extensions.transform.lang, Some(en_us));
81/// let key: Key = "h0".parse().expect("Parsing key failed.");
82/// let value: Value = "hybrid".parse().expect("Parsing value failed.");
83/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
84/// ```
85/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
86/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
87/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
88#[derive(#[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::clone::Clone for Transform {
    #[inline]
    fn clone(&self) -> Transform {
        Transform {
            lang: ::core::clone::Clone::clone(&self.lang),
            fields: ::core::clone::Clone::clone(&self.fields),
        }
    }
}Clone, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::PartialEq for Transform {
    #[inline]
    fn eq(&self, other: &Transform) -> bool {
        self.lang == other.lang && self.fields == other.fields
    }
}PartialEq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::Eq for Transform {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<Option<LanguageIdentifier>>;
        let _: ::core::cmp::AssertParamIsEq<Fields>;
    }
}Eq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::fmt::Debug for Transform {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field2_finish(f, "Transform",
            "lang", &self.lang, "fields", &&self.fields)
    }
}Debug, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::default::Default for Transform {
    #[inline]
    fn default() -> Transform {
        Transform {
            lang: ::core::default::Default::default(),
            fields: ::core::default::Default::default(),
        }
    }
}Default, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::hash::Hash for Transform {
    #[inline]
    fn hash<__H: ::core::hash::Hasher>(&self, state: &mut __H) {
        ::core::hash::Hash::hash(&self.lang, state);
        ::core::hash::Hash::hash(&self.fields, state)
    }
}Hash)]
89#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
90pub struct Transform {
91    /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
92    pub lang: Option<LanguageIdentifier>,
93    /// The key-value pairs present in this locale extension, with each extension key subtag
94    /// associated to its provided value subtag.
95    pub fields: Fields,
96}
97
98impl Transform {
99    /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
100    ///
101    /// # Examples
102    ///
103    /// ```
104    /// use icu::locale::extensions::transform::Transform;
105    ///
106    /// assert_eq!(Transform::new(), Transform::default());
107    /// ```
108    #[inline]
109    pub const fn new() -> Self {
110        Self {
111            lang: None,
112            fields: Fields::new(),
113        }
114    }
115
116    /// A constructor which takes a str slice, parses it and
117    /// produces a well-formed [`Transform`].
118    ///
119    /// ✨ *Enabled with the `alloc` Cargo feature.*
120    #[inline]
121    #[cfg(feature = "alloc")]
122    pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
123        Self::try_from_utf8(s.as_bytes())
124    }
125
126    /// See [`Self::try_from_str`]
127    ///
128    /// ✨ *Enabled with the `alloc` Cargo feature.*
129    #[cfg(feature = "alloc")]
130    pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
131        let mut iter = SubtagIterator::new(code_units);
132
133        let ext = iter.next().ok_or(ParseError::InvalidExtension)?;
134        if let ExtensionType::Transform = ExtensionType::try_from_byte_slice(ext)? {
135            return Self::try_from_iter(&mut iter);
136        }
137
138        Err(ParseError::InvalidExtension)
139    }
140
141    /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
142    ///
143    /// # Examples
144    ///
145    /// ```
146    /// use icu::locale::Locale;
147    ///
148    /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed.");
149    ///
150    /// assert!(!loc.extensions.transform.is_empty());
151    /// ```
152    pub fn is_empty(&self) -> bool {
153        self.lang.is_none() && self.fields.is_empty()
154    }
155
156    /// Clears the transform extension, effectively removing it from the locale.
157    ///
158    /// # Examples
159    ///
160    /// ```
161    /// use icu::locale::Locale;
162    ///
163    /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap();
164    /// loc.extensions.transform.clear();
165    /// assert_eq!(loc, "en-US".parse().unwrap());
166    /// ```
167    pub fn clear(&mut self) {
168        self.lang = None;
169        self.fields.clear();
170    }
171
172    #[expect(clippy::type_complexity)]
173    pub(crate) fn as_tuple(
174        &self,
175    ) -> (
176        Option<(
177            subtags::Language,
178            Option<subtags::Script>,
179            Option<subtags::Region>,
180            &subtags::Variants,
181        )>,
182        &Fields,
183    ) {
184        (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields)
185    }
186
187    /// Returns an ordering suitable for use in [`BTreeSet`].
188    ///
189    /// The ordering may or may not be equivalent to string ordering, and it
190    /// may or may not be stable across ICU4X releases.
191    ///
192    /// [`BTreeSet`]: alloc::collections::BTreeSet
193    pub fn total_cmp(&self, other: &Self) -> Ordering {
194        self.as_tuple().cmp(&other.as_tuple())
195    }
196
197    #[cfg(feature = "alloc")]
198    pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
199        let mut tlang = None;
200        let mut tfields = LiteMap::new();
201
202        if let Some(subtag) = iter.peek() {
203            if subtags::Language::try_from_utf8(subtag).is_ok() {
204                tlang = Some(parse_language_identifier_from_iter(
205                    iter,
206                    ParserMode::Partial,
207                )?);
208            }
209        }
210
211        let mut current_tkey = None;
212        let mut current_tvalue = ShortBoxSlice::new();
213        let mut has_current_tvalue = false;
214
215        while let Some(subtag) = iter.peek() {
216            if let Some(tkey) = current_tkey {
217                if let Ok(val) = Value::parse_subtag(subtag) {
218                    has_current_tvalue = true;
219                    if let Some(val) = val {
220                        current_tvalue.push(val);
221                    }
222                } else {
223                    if !has_current_tvalue {
224                        return Err(ParseError::InvalidExtension);
225                    }
226                    tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
227                    current_tkey = None;
228                    current_tvalue = ShortBoxSlice::new();
229                    has_current_tvalue = false;
230                    continue;
231                }
232            } else if let Ok(tkey) = Key::try_from_utf8(subtag) {
233                current_tkey = Some(tkey);
234            } else {
235                break;
236            }
237
238            iter.next();
239        }
240
241        if let Some(tkey) = current_tkey {
242            if !has_current_tvalue {
243                return Err(ParseError::InvalidExtension);
244            }
245            tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
246        }
247
248        if tlang.is_none() && tfields.is_empty() {
249            Err(ParseError::InvalidExtension)
250        } else {
251            Ok(Self {
252                lang: tlang,
253                fields: tfields.into(),
254            })
255        }
256    }
257
258    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
259    where
260        F: FnMut(&str) -> Result<(), E>,
261    {
262        if self.is_empty() {
263            return Ok(());
264        }
265        if with_ext {
266            f(TRANSFORM_EXT_STR)?;
267        }
268        if let Some(lang) = &self.lang {
269            lang.for_each_subtag_str_lowercased(f)?;
270        }
271        self.fields.for_each_subtag_str(f)
272    }
273}
274
275/// ✨ *Enabled with the `alloc` Cargo feature.*
276#[cfg(feature = "alloc")]
277impl FromStr for Transform {
278    type Err = ParseError;
279
280    #[inline]
281    fn from_str(s: &str) -> Result<Self, Self::Err> {
282        Self::try_from_str(s)
283    }
284}
285
286/// This trait is implemented for compatibility with [`fmt!`](alloc::fmt).
/// To create a string, [`Writeable::write_to_string`] is usually more efficient.
impl core::fmt::Display for Transform {
    #[inline]
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        ::writeable::Writeable::write_to(&self, f)
    }
}writeable::impl_display_with_writeable!(Transform, #[cfg(feature = "alloc")]);
287
288impl writeable::Writeable for Transform {
289    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
290        if self.is_empty() {
291            return Ok(());
292        }
293        sink.write_char(TRANSFORM_EXT_CHAR)?;
294        if let Some(lang) = &self.lang {
295            sink.write_char('-')?;
296            lang.write_lowercased_to(sink)?;
297        }
298        if !self.fields.is_empty() {
299            sink.write_char('-')?;
300            writeable::Writeable::write_to(&self.fields, sink)?;
301        }
302        Ok(())
303    }
304
305    fn writeable_length_hint(&self) -> writeable::LengthHint {
306        if self.is_empty() {
307            return writeable::LengthHint::exact(0);
308        }
309        let mut result = writeable::LengthHint::exact(1);
310        if let Some(lang) = &self.lang {
311            result += writeable::Writeable::writeable_length_hint(lang) + 1;
312        }
313        if !self.fields.is_empty() {
314            result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
315        }
316        result
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323
324    #[test]
325    fn test_transform_extension_fromstr() {
326        let te: Transform = "t-en-us-h0-hybrid"
327            .parse()
328            .expect("Failed to parse Transform");
329        assert_eq!(te.to_string(), "t-en-us-h0-hybrid");
330
331        let te: Result<Transform, _> = "t".parse();
332        assert!(te.is_err());
333    }
334}