icu_locid/extensions/transform/mod.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Transform Extensions provide information on content transformations in a given locale.
6//!
7//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
8//! optional [`LanguageIdentifier`].
9//!
10//! [`LanguageIdentifier`]: super::super::LanguageIdentifier
11//!
12//! # Examples
13//!
14//! ```
15//! use icu::locid::extensions::transform::{Fields, Key, Transform, Value};
16//! use icu::locid::{LanguageIdentifier, Locale};
17//!
18//! let mut loc: Locale =
19//! "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed.");
20//!
21//! let lang: LanguageIdentifier =
22//! "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
23//!
24//! let key: Key = "h0".parse().expect("Parsing key failed.");
25//! let value: Value = "hybrid".parse().expect("Parsing value failed.");
26//!
27//! assert_eq!(loc.extensions.transform.lang, Some(lang));
28//! assert!(loc.extensions.transform.fields.contains_key(&key));
29//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
30//!
31//! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid");
32//! ```
33mod fields;
34mod key;
35mod value;
36
37use core::cmp::Ordering;
38
39pub use fields::Fields;
40#[doc(inline)]
41pub use key::{key, Key};
42pub use value::Value;
43
44use crate::parser::SubtagIterator;
45use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode};
46use crate::shortvec::ShortBoxSlice;
47use crate::subtags::{self, Language};
48use crate::LanguageIdentifier;
49use litemap::LiteMap;
50
51/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
52/// Identifier`] specification.
53///
54/// Transform extension carries information about source language or script of
55/// transformed content, including content that has been transliterated, transcribed,
56/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
57///
58/// # Examples
59///
60/// ```
61/// use icu::locid::extensions::transform::{Key, Value};
62/// use icu::locid::{LanguageIdentifier, Locale};
63///
64/// let mut loc: Locale =
65/// "de-t-en-us-h0-hybrid".parse().expect("Parsing failed.");
66///
67/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
68///
69/// assert_eq!(loc.extensions.transform.lang, Some(en_us));
70/// let key: Key = "h0".parse().expect("Parsing key failed.");
71/// let value: Value = "hybrid".parse().expect("Parsing value failed.");
72/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
73/// ```
74/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
75/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
76/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
77#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
78#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
79pub struct Transform {
80 /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
81 pub lang: Option<LanguageIdentifier>,
82 /// The key-value pairs present in this locale extension, with each extension key subtag
83 /// associated to its provided value subtag.
84 pub fields: Fields,
85}
86
87impl Transform {
88 /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
89 ///
90 /// # Examples
91 ///
92 /// ```
93 /// use icu::locid::extensions::transform::Transform;
94 ///
95 /// assert_eq!(Transform::new(), Transform::default());
96 /// ```
97 #[inline]
98 pub const fn new() -> Self {
99 Self {
100 lang: None,
101 fields: Fields::new(),
102 }
103 }
104
105 /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
106 ///
107 /// # Examples
108 ///
109 /// ```
110 /// use icu::locid::Locale;
111 ///
112 /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed.");
113 ///
114 /// assert!(!loc.extensions.transform.is_empty());
115 /// ```
116 pub fn is_empty(&self) -> bool {
117 self.lang.is_none() && self.fields.is_empty()
118 }
119
120 /// Clears the transform extension, effectively removing it from the locale.
121 ///
122 /// # Examples
123 ///
124 /// ```
125 /// use icu::locid::Locale;
126 ///
127 /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap();
128 /// loc.extensions.transform.clear();
129 /// assert_eq!(loc, "en-US".parse().unwrap());
130 /// ```
131 pub fn clear(&mut self) {
132 self.lang = None;
133 self.fields.clear();
134 }
135
136 #[allow(clippy::type_complexity)]
137 pub(crate) fn as_tuple(
138 &self,
139 ) -> (
140 Option<(
141 subtags::Language,
142 Option<subtags::Script>,
143 Option<subtags::Region>,
144 &subtags::Variants,
145 )>,
146 &Fields,
147 ) {
148 (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields)
149 }
150
151 /// Returns an ordering suitable for use in [`BTreeSet`].
152 ///
153 /// The ordering may or may not be equivalent to string ordering, and it
154 /// may or may not be stable across ICU4X releases.
155 ///
156 /// [`BTreeSet`]: alloc::collections::BTreeSet
157 pub fn total_cmp(&self, other: &Self) -> Ordering {
158 self.as_tuple().cmp(&other.as_tuple())
159 }
160
161 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
162 let mut tlang = None;
163 let mut tfields = LiteMap::new();
164
165 if let Some(subtag) = iter.peek() {
166 if Language::try_from_bytes(subtag).is_ok() {
167 tlang = Some(parse_language_identifier_from_iter(
168 iter,
169 ParserMode::Partial,
170 )?);
171 }
172 }
173
174 let mut current_tkey = None;
175 let mut current_tvalue = ShortBoxSlice::new();
176 let mut has_current_tvalue = false;
177
178 while let Some(subtag) = iter.peek() {
179 if let Some(tkey) = current_tkey {
180 if let Ok(val) = Value::parse_subtag(subtag) {
181 has_current_tvalue = true;
182 if let Some(val) = val {
183 current_tvalue.push(val);
184 }
185 } else {
186 if !has_current_tvalue {
187 return Err(ParserError::InvalidExtension);
188 }
189 tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
190 current_tkey = None;
191 current_tvalue = ShortBoxSlice::new();
192 has_current_tvalue = false;
193 continue;
194 }
195 } else if let Ok(tkey) = Key::try_from_bytes(subtag) {
196 current_tkey = Some(tkey);
197 } else {
198 break;
199 }
200
201 iter.next();
202 }
203
204 if let Some(tkey) = current_tkey {
205 if !has_current_tvalue {
206 return Err(ParserError::InvalidExtension);
207 }
208 tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
209 }
210
211 Ok(Self {
212 lang: tlang,
213 fields: tfields.into(),
214 })
215 }
216
217 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
218 where
219 F: FnMut(&str) -> Result<(), E>,
220 {
221 if self.is_empty() {
222 return Ok(());
223 }
224 f("t")?;
225 if let Some(lang) = &self.lang {
226 lang.for_each_subtag_str_lowercased(f)?;
227 }
228 self.fields.for_each_subtag_str(f)
229 }
230}
231
232writeable::impl_display_with_writeable!(Transform);
233
234impl writeable::Writeable for Transform {
235 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
236 if self.is_empty() {
237 return Ok(());
238 }
239 sink.write_str("t")?;
240 if let Some(lang) = &self.lang {
241 sink.write_char('-')?;
242 lang.write_lowercased_to(sink)?;
243 }
244 if !self.fields.is_empty() {
245 sink.write_char('-')?;
246 writeable::Writeable::write_to(&self.fields, sink)?;
247 }
248 Ok(())
249 }
250
251 fn writeable_length_hint(&self) -> writeable::LengthHint {
252 if self.is_empty() {
253 return writeable::LengthHint::exact(0);
254 }
255 let mut result = writeable::LengthHint::exact(1);
256 if let Some(lang) = &self.lang {
257 result += writeable::Writeable::writeable_length_hint(lang) + 1;
258 }
259 if !self.fields.is_empty() {
260 result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
261 }
262 result
263 }
264}