icu_locid/extensions/transform/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Transform Extensions provide information on content transformations in a given locale.
6//!
7//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
8//! optional [`LanguageIdentifier`].
9//!
10//! [`LanguageIdentifier`]: super::super::LanguageIdentifier
11//!
12//! # Examples
13//!
14//! ```
15//! use icu::locid::extensions::transform::{Fields, Key, Transform, Value};
16//! use icu::locid::{LanguageIdentifier, Locale};
17//!
18//! let mut loc: Locale =
19//!     "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed.");
20//!
21//! let lang: LanguageIdentifier =
22//!     "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
23//!
24//! let key: Key = "h0".parse().expect("Parsing key failed.");
25//! let value: Value = "hybrid".parse().expect("Parsing value failed.");
26//!
27//! assert_eq!(loc.extensions.transform.lang, Some(lang));
28//! assert!(loc.extensions.transform.fields.contains_key(&key));
29//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
30//!
31//! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid");
32//! ```
33mod fields;
34mod key;
35mod value;
36
37use core::cmp::Ordering;
38
39pub use fields::Fields;
40#[doc(inline)]
41pub use key::{key, Key};
42pub use value::Value;
43
44use crate::parser::SubtagIterator;
45use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode};
46use crate::shortvec::ShortBoxSlice;
47use crate::subtags::{self, Language};
48use crate::LanguageIdentifier;
49use litemap::LiteMap;
50
51/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
52/// Identifier`] specification.
53///
54/// Transform extension carries information about source language or script of
55/// transformed content, including content that has been transliterated, transcribed,
56/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
57///
58/// # Examples
59///
60/// ```
61/// use icu::locid::extensions::transform::{Key, Value};
62/// use icu::locid::{LanguageIdentifier, Locale};
63///
64/// let mut loc: Locale =
65///     "de-t-en-us-h0-hybrid".parse().expect("Parsing failed.");
66///
67/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
68///
69/// assert_eq!(loc.extensions.transform.lang, Some(en_us));
70/// let key: Key = "h0".parse().expect("Parsing key failed.");
71/// let value: Value = "hybrid".parse().expect("Parsing value failed.");
72/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
73/// ```
74/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
75/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
76/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
77#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
78#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
79pub struct Transform {
80    /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
81    pub lang: Option<LanguageIdentifier>,
82    /// The key-value pairs present in this locale extension, with each extension key subtag
83    /// associated to its provided value subtag.
84    pub fields: Fields,
85}
86
87impl Transform {
88    /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
89    ///
90    /// # Examples
91    ///
92    /// ```
93    /// use icu::locid::extensions::transform::Transform;
94    ///
95    /// assert_eq!(Transform::new(), Transform::default());
96    /// ```
97    #[inline]
98    pub const fn new() -> Self {
99        Self {
100            lang: None,
101            fields: Fields::new(),
102        }
103    }
104
105    /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
106    ///
107    /// # Examples
108    ///
109    /// ```
110    /// use icu::locid::Locale;
111    ///
112    /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed.");
113    ///
114    /// assert!(!loc.extensions.transform.is_empty());
115    /// ```
116    pub fn is_empty(&self) -> bool {
117        self.lang.is_none() && self.fields.is_empty()
118    }
119
120    /// Clears the transform extension, effectively removing it from the locale.
121    ///
122    /// # Examples
123    ///
124    /// ```
125    /// use icu::locid::Locale;
126    ///
127    /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap();
128    /// loc.extensions.transform.clear();
129    /// assert_eq!(loc, "en-US".parse().unwrap());
130    /// ```
131    pub fn clear(&mut self) {
132        self.lang = None;
133        self.fields.clear();
134    }
135
136    #[allow(clippy::type_complexity)]
137    pub(crate) fn as_tuple(
138        &self,
139    ) -> (
140        Option<(
141            subtags::Language,
142            Option<subtags::Script>,
143            Option<subtags::Region>,
144            &subtags::Variants,
145        )>,
146        &Fields,
147    ) {
148        (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields)
149    }
150
151    /// Returns an ordering suitable for use in [`BTreeSet`].
152    ///
153    /// The ordering may or may not be equivalent to string ordering, and it
154    /// may or may not be stable across ICU4X releases.
155    ///
156    /// [`BTreeSet`]: alloc::collections::BTreeSet
157    pub fn total_cmp(&self, other: &Self) -> Ordering {
158        self.as_tuple().cmp(&other.as_tuple())
159    }
160
161    pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
162        let mut tlang = None;
163        let mut tfields = LiteMap::new();
164
165        if let Some(subtag) = iter.peek() {
166            if Language::try_from_bytes(subtag).is_ok() {
167                tlang = Some(parse_language_identifier_from_iter(
168                    iter,
169                    ParserMode::Partial,
170                )?);
171            }
172        }
173
174        let mut current_tkey = None;
175        let mut current_tvalue = ShortBoxSlice::new();
176        let mut has_current_tvalue = false;
177
178        while let Some(subtag) = iter.peek() {
179            if let Some(tkey) = current_tkey {
180                if let Ok(val) = Value::parse_subtag(subtag) {
181                    has_current_tvalue = true;
182                    if let Some(val) = val {
183                        current_tvalue.push(val);
184                    }
185                } else {
186                    if !has_current_tvalue {
187                        return Err(ParserError::InvalidExtension);
188                    }
189                    tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
190                    current_tkey = None;
191                    current_tvalue = ShortBoxSlice::new();
192                    has_current_tvalue = false;
193                    continue;
194                }
195            } else if let Ok(tkey) = Key::try_from_bytes(subtag) {
196                current_tkey = Some(tkey);
197            } else {
198                break;
199            }
200
201            iter.next();
202        }
203
204        if let Some(tkey) = current_tkey {
205            if !has_current_tvalue {
206                return Err(ParserError::InvalidExtension);
207            }
208            tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
209        }
210
211        Ok(Self {
212            lang: tlang,
213            fields: tfields.into(),
214        })
215    }
216
217    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
218    where
219        F: FnMut(&str) -> Result<(), E>,
220    {
221        if self.is_empty() {
222            return Ok(());
223        }
224        f("t")?;
225        if let Some(lang) = &self.lang {
226            lang.for_each_subtag_str_lowercased(f)?;
227        }
228        self.fields.for_each_subtag_str(f)
229    }
230}
231
232writeable::impl_display_with_writeable!(Transform);
233
234impl writeable::Writeable for Transform {
235    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
236        if self.is_empty() {
237            return Ok(());
238        }
239        sink.write_str("t")?;
240        if let Some(lang) = &self.lang {
241            sink.write_char('-')?;
242            lang.write_lowercased_to(sink)?;
243        }
244        if !self.fields.is_empty() {
245            sink.write_char('-')?;
246            writeable::Writeable::write_to(&self.fields, sink)?;
247        }
248        Ok(())
249    }
250
251    fn writeable_length_hint(&self) -> writeable::LengthHint {
252        if self.is_empty() {
253            return writeable::LengthHint::exact(0);
254        }
255        let mut result = writeable::LengthHint::exact(1);
256        if let Some(lang) = &self.lang {
257            result += writeable::Writeable::writeable_length_hint(lang) + 1;
258        }
259        if !self.fields.is_empty() {
260            result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
261        }
262        result
263    }
264}