icu_locale_core/extensions/unicode/keywords.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use core::borrow::Borrow;
6use core::cmp::Ordering;
7#[cfg(feature = "alloc")]
8use core::iter::FromIterator;
9#[cfg(feature = "alloc")]
10use core::str::FromStr;
11use litemap::LiteMap;
12
13use super::Key;
14use super::Value;
15#[cfg(feature = "alloc")]
16use crate::parser::ParseError;
17#[cfg(feature = "alloc")]
18use crate::parser::SubtagIterator;
19use crate::shortvec::ShortBoxSlice;
20
21/// A list of [`Key`]-[`Value`] pairs representing functional information
22/// about locale's internationalization preferences.
23///
24/// Here are examples of fields used in Unicode:
25/// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`)
26/// - `ca` - Calendar (`buddhist`, `gregory`, ...)
27/// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...)
28///
29/// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML.
30///
31/// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_
32///
33/// # Examples
34///
35/// Manually build up a [`Keywords`] object:
36///
37/// ```
38/// use icu::locale::extensions::unicode::{key, value, Keywords};
39///
40/// let keywords = [(key!("hc"), value!("h23"))]
41/// .into_iter()
42/// .collect::<Keywords>();
43///
44/// assert_eq!(&keywords.to_string(), "hc-h23");
45/// ```
46///
47/// Access a [`Keywords`] object from a [`Locale`]:
48///
49/// ```
50/// use icu::locale::{
51/// extensions::unicode::{key, value},
52/// Locale,
53/// };
54///
55/// let loc: Locale = "und-u-hc-h23-kc-true".parse().expect("Valid BCP-47");
56///
57/// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca")), None);
58/// assert_eq!(
59/// loc.extensions.unicode.keywords.get(&key!("hc")),
60/// Some(&value!("h23"))
61/// );
62/// assert_eq!(
63/// loc.extensions.unicode.keywords.get(&key!("kc")),
64/// Some(&value!("true"))
65/// );
66///
67/// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc");
68/// ```
69///
70/// [`Locale`]: crate::Locale
71#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
72pub struct Keywords(LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>);
73
74impl Keywords {
75 /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`.
76 ///
77 /// # Examples
78 ///
79 /// ```
80 /// use icu::locale::extensions::unicode::Keywords;
81 ///
82 /// assert_eq!(Keywords::new(), Keywords::default());
83 /// ```
84 #[inline]
85 pub const fn new() -> Self {
86 Self(LiteMap::new())
87 }
88
89 /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context.
90 #[inline]
91 pub const fn new_single(key: Key, value: Value) -> Self {
92 Self(LiteMap::from_sorted_store_unchecked(
93 ShortBoxSlice::new_single((key, value)),
94 ))
95 }
96
97 /// A constructor which takes a str slice, parses it and
98 /// produces a well-formed [`Keywords`].
99 ///
100 /// ✨ *Enabled with the `alloc` Cargo feature.*
101 #[inline]
102 #[cfg(feature = "alloc")]
103 pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
104 Self::try_from_utf8(s.as_bytes())
105 }
106
107 /// See [`Self::try_from_str`]
108 ///
109 /// ✨ *Enabled with the `alloc` Cargo feature.*
110 #[cfg(feature = "alloc")]
111 pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
112 let mut iter = SubtagIterator::new(code_units);
113 Self::try_from_iter(&mut iter)
114 }
115
116 /// Returns `true` if there are no keywords.
117 ///
118 /// # Examples
119 ///
120 /// ```
121 /// use icu::locale::locale;
122 /// use icu::locale::Locale;
123 ///
124 /// let loc1 = Locale::try_from_str("und-t-h0-hybrid").unwrap();
125 /// let loc2 = locale!("und-u-ca-buddhist");
126 ///
127 /// assert!(loc1.extensions.unicode.keywords.is_empty());
128 /// assert!(!loc2.extensions.unicode.keywords.is_empty());
129 /// ```
130 pub fn is_empty(&self) -> bool {
131 self.0.is_empty()
132 }
133
134 /// Returns `true` if the list contains a [`Value`] for the specified [`Key`].
135 ///
136 ///
137 /// # Examples
138 ///
139 /// ```
140 /// use icu::locale::extensions::unicode::{key, value, Keywords};
141 ///
142 /// let keywords = [(key!("ca"), value!("gregory"))]
143 /// .into_iter()
144 /// .collect::<Keywords>();
145 ///
146 /// assert!(&keywords.contains_key(&key!("ca")));
147 /// ```
148 pub fn contains_key<Q>(&self, key: &Q) -> bool
149 where
150 Key: Borrow<Q>,
151 Q: Ord,
152 {
153 self.0.contains_key(key)
154 }
155
156 /// Returns a reference to the [`Value`] corresponding to the [`Key`].
157 ///
158 ///
159 /// # Examples
160 ///
161 /// ```
162 /// use icu::locale::extensions::unicode::{key, value, Keywords};
163 ///
164 /// let keywords = [(key!("ca"), value!("buddhist"))]
165 /// .into_iter()
166 /// .collect::<Keywords>();
167 ///
168 /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("buddhist")));
169 /// ```
170 pub fn get<Q>(&self, key: &Q) -> Option<&Value>
171 where
172 Key: Borrow<Q>,
173 Q: Ord,
174 {
175 self.0.get(key)
176 }
177
178 /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`].
179 ///
180 /// Returns `None` if the key doesn't exist or if the key has no value.
181 ///
182 /// ✨ *Enabled with the `alloc` Cargo feature.*
183 ///
184 /// # Examples
185 ///
186 /// ```
187 /// use icu::locale::extensions::unicode::{key, value, Keywords};
188 ///
189 /// let mut keywords = [(key!("ca"), value!("buddhist"))]
190 /// .into_iter()
191 /// .collect::<Keywords>();
192 ///
193 /// if let Some(value) = keywords.get_mut(&key!("ca")) {
194 /// *value = value!("gregory");
195 /// }
196 /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("gregory")));
197 /// ```
198 #[cfg(feature = "alloc")]
199 pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value>
200 where
201 Key: Borrow<Q>,
202 Q: Ord,
203 {
204 self.0.get_mut(key)
205 }
206
207 /// Sets the specified keyword, returning the old value if it already existed.
208 ///
209 /// ✨ *Enabled with the `alloc` Cargo feature.*
210 ///
211 /// # Examples
212 ///
213 /// ```
214 /// use icu::locale::extensions::unicode::{key, value};
215 /// use icu::locale::Locale;
216 ///
217 /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
218 /// .parse()
219 /// .expect("valid BCP-47 identifier");
220 /// let old_value = loc
221 /// .extensions
222 /// .unicode
223 /// .keywords
224 /// .set(key!("ca"), value!("japanese"));
225 ///
226 /// assert_eq!(old_value, Some(value!("buddhist")));
227 /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12".parse().unwrap());
228 /// ```
229 #[cfg(feature = "alloc")]
230 pub fn set(&mut self, key: Key, value: Value) -> Option<Value> {
231 self.0.insert(key, value)
232 }
233
234 /// Removes the specified keyword, returning the old value if it existed.
235 ///
236 /// ✨ *Enabled with the `alloc` Cargo feature.*
237 ///
238 /// # Examples
239 ///
240 /// ```
241 /// use icu::locale::extensions::unicode::key;
242 /// use icu::locale::Locale;
243 ///
244 /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
245 /// .parse()
246 /// .expect("valid BCP-47 identifier");
247 /// loc.extensions.unicode.keywords.remove(key!("ca"));
248 /// assert_eq!(loc, "und-u-hello-hc-h12".parse().unwrap());
249 /// ```
250 #[cfg(feature = "alloc")]
251 pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> {
252 self.0.remove(key.borrow())
253 }
254
255 /// Clears all Unicode extension keywords, leaving Unicode attributes.
256 ///
257 /// Returns the old Unicode extension keywords.
258 ///
259 /// # Examples
260 ///
261 /// ```
262 /// use icu::locale::Locale;
263 ///
264 /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12".parse().unwrap();
265 /// loc.extensions.unicode.keywords.clear();
266 /// assert_eq!(loc, "und-u-hello".parse().unwrap());
267 /// ```
268 pub fn clear(&mut self) -> Self {
269 core::mem::take(self)
270 }
271
272 /// Retains a subset of keywords as specified by the predicate function.
273 ///
274 /// ✨ *Enabled with the `alloc` Cargo feature.*
275 ///
276 /// # Examples
277 ///
278 /// ```
279 /// use icu::locale::extensions::unicode::key;
280 /// use icu::locale::Locale;
281 ///
282 /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric".parse().unwrap();
283 ///
284 /// loc.extensions
285 /// .unicode
286 /// .keywords
287 /// .retain_by_key(|&k| k == key!("hc"));
288 /// assert_eq!(loc, "und-u-hc-h12".parse().unwrap());
289 ///
290 /// loc.extensions
291 /// .unicode
292 /// .keywords
293 /// .retain_by_key(|&k| k == key!("ms"));
294 /// assert_eq!(loc, Locale::UNKNOWN);
295 /// ```
296 #[cfg(feature = "alloc")]
297 pub fn retain_by_key<F>(&mut self, mut predicate: F)
298 where
299 F: FnMut(&Key) -> bool,
300 {
301 self.0.retain(|k, _| predicate(k))
302 }
303
304 /// Compare this [`Keywords`] with BCP-47 bytes.
305 ///
306 /// The return value is equivalent to what would happen if you first converted this
307 /// [`Keywords`] to a BCP-47 string and then performed a byte comparison.
308 ///
309 /// This function is case-sensitive and results in a *total order*, so it is appropriate for
310 /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
311 ///
312 /// # Examples
313 ///
314 /// ```
315 /// use icu::locale::Locale;
316 /// use std::cmp::Ordering;
317 ///
318 /// let bcp47_strings: &[&str] =
319 /// &["ca-hebrew", "ca-japanese", "ca-japanese-nu-latn", "nu-latn"];
320 ///
321 /// for ab in bcp47_strings.windows(2) {
322 /// let a = ab[0];
323 /// let b = ab[1];
324 /// assert!(a.cmp(b) == Ordering::Less);
325 /// let a_kwds = format!("und-u-{}", a)
326 /// .parse::<Locale>()
327 /// .unwrap()
328 /// .extensions
329 /// .unicode
330 /// .keywords;
331 /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal);
332 /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less);
333 /// }
334 /// ```
335 pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
336 writeable::cmp_utf8(self, other)
337 }
338
339 #[cfg(feature = "alloc")]
340 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
341 let mut keywords = LiteMap::new();
342
343 let mut current_keyword = None;
344 let mut current_value = ShortBoxSlice::new();
345
346 while let Some(subtag) = iter.peek() {
347 let slen = subtag.len();
348 if slen == 2 {
349 if let Some(kw) = current_keyword.take() {
350 keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
351 current_value = ShortBoxSlice::new();
352 }
353 current_keyword = Some(Key::try_from_utf8(subtag)?);
354 } else if current_keyword.is_some() {
355 match Value::parse_subtag_from_utf8(subtag) {
356 Ok(Some(t)) => current_value.push(t),
357 Ok(None) => {}
358 Err(_) => break,
359 }
360 } else {
361 break;
362 }
363 iter.next();
364 }
365
366 if let Some(kw) = current_keyword.take() {
367 keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
368 }
369
370 Ok(keywords.into())
371 }
372
373 /// Produce an ordered iterator over key-value pairs
374 pub fn iter(&self) -> impl Iterator<Item = (&Key, &Value)> {
375 self.0.iter()
376 }
377
378 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
379 where
380 F: FnMut(&str) -> Result<(), E>,
381 {
382 for (k, v) in self.0.iter() {
383 f(k.as_str())?;
384 v.for_each_subtag_str(f)?;
385 }
386 Ok(())
387 }
388
389 /// Extends the `Keywords` with values from another `Keywords`.
390 ///
391 /// # Example
392 ///
393 /// ```
394 /// use icu::locale::extensions::unicode::Keywords;
395 ///
396 /// let mut kw: Keywords = "ab-cd-ca-buddhist".parse().unwrap();
397 /// let kw2: Keywords = "ca-gregory-hc-h12".parse().unwrap();
398 ///
399 /// kw.extend_from_keywords(kw2);
400 ///
401 /// assert_eq!(kw, "ab-cd-ca-gregory-hc-h12".parse().unwrap());
402 /// ```
403 #[cfg(feature = "alloc")]
404 pub fn extend_from_keywords(&mut self, other: Keywords) {
405 for (key, value) in other.0 {
406 self.0.insert(key, value);
407 }
408 }
409
410 /// This needs to be its own method to help with type inference in helpers.rs
411 #[cfg(test)]
412 pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self {
413 v.into_iter().collect()
414 }
415}
416
417impl From<LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>> for Keywords {
418 fn from(map: LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>) -> Self {
419 Self(map)
420 }
421}
422
423/// ✨ *Enabled with the `alloc` Cargo feature.*
424#[cfg(feature = "alloc")]
425impl FromIterator<(Key, Value)> for Keywords {
426 fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self {
427 LiteMap::from_iter(iter).into()
428 }
429}
430
431/// ✨ *Enabled with the `alloc` Cargo feature.*
432#[cfg(feature = "alloc")]
433impl FromStr for Keywords {
434 type Err = ParseError;
435
436 #[inline]
437 fn from_str(s: &str) -> Result<Self, Self::Err> {
438 Self::try_from_str(s)
439 }
440}
441
442impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm");
443
444#[cfg(test)]
445mod tests {
446 use super::*;
447
448 #[test]
449 fn test_keywords_fromstr() {
450 let kw: Keywords = "hc-h12".parse().expect("Failed to parse Keywords");
451 assert_eq!(kw.to_string(), "hc-h12");
452 }
453}