icu_locid_transform/fallback/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Tools for locale fallback, enabling arbitrary input locales to be mapped into the nearest
6//! locale with data.
7
8use crate::provider::*;
9use icu_locid::extensions::unicode::Value;
10use icu_locid::subtags::Variants;
11use icu_provider::prelude::*;
12
13#[doc(inline)]
14pub use icu_provider::fallback::*;
15
16mod algorithms;
17
18/// Implements the algorithm defined in *[UTS #35: Locale Inheritance and Matching]*.
19///
20/// Note that this implementation performs some additional steps compared to the *UTS #35*
21/// algorithm. See *[the design doc]* for a detailed description and [#2243](
22/// https://github.com/unicode-org/icu4x/issues/2243) to track alignment with *UTS #35*.
23///
24/// If running fallback in a loop, use [`DataLocale::is_und()`] to break from the loop.
25///
26/// # Examples
27///
28/// ```
29/// use icu::locid::locale;
30/// use icu::locid_transform::fallback::LocaleFallbacker;
31///
32/// // Set up a LocaleFallbacker with data.
33/// let fallbacker = LocaleFallbacker::new();
34///
35/// // Create a LocaleFallbackerIterator with a default configuration.
36/// // By default, uses language priority with no additional extension keywords.
37/// let mut fallback_iterator = fallbacker
38///     .for_config(Default::default())
39///     .fallback_for(locale!("hi-Latn-IN").into());
40///
41/// // Run the algorithm and check the results.
42/// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN").into());
43/// fallback_iterator.step();
44/// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn").into());
45/// fallback_iterator.step();
46/// assert_eq!(fallback_iterator.get(), &locale!("en-IN").into());
47/// fallback_iterator.step();
48/// assert_eq!(fallback_iterator.get(), &locale!("en-001").into());
49/// fallback_iterator.step();
50/// assert_eq!(fallback_iterator.get(), &locale!("en").into());
51/// fallback_iterator.step();
52/// assert_eq!(fallback_iterator.get(), &locale!("und").into());
53/// ```
54///
55/// [UTS #35: Locale Inheritance and Matching]: https://www.unicode.org/reports/tr35/#Locale_Inheritance
56/// [the design doc]: https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit
57/// [language identifier]: icu::locid::LanguageIdentifier
58#[doc(hidden)]
59#[derive(Debug, Clone, PartialEq)]
60pub struct LocaleFallbacker {
61    likely_subtags: DataPayload<LocaleFallbackLikelySubtagsV1Marker>,
62    parents: DataPayload<LocaleFallbackParentsV1Marker>,
63    collation_supplement: Option<DataPayload<CollationFallbackSupplementV1Marker>>,
64}
65
66/// Borrowed version of [`LocaleFallbacker`].
67#[derive(Debug, Clone, Copy, PartialEq)]
68pub struct LocaleFallbackerBorrowed<'a> {
69    likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
70    parents: &'a LocaleFallbackParentsV1<'a>,
71    collation_supplement: Option<&'a LocaleFallbackSupplementV1<'a>>,
72}
73
74/// A [`LocaleFallbackerBorrowed`] with an associated [`LocaleFallbackConfig`].
75#[derive(Debug, Clone, Copy, PartialEq)]
76pub struct LocaleFallbackerWithConfig<'a> {
77    likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
78    parents: &'a LocaleFallbackParentsV1<'a>,
79    supplement: Option<&'a LocaleFallbackSupplementV1<'a>>,
80    config: LocaleFallbackConfig,
81}
82
83/// Inner iteration type. Does not own the item under fallback.
84#[derive(Debug)]
85struct LocaleFallbackIteratorInner<'a> {
86    likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
87    parents: &'a LocaleFallbackParentsV1<'a>,
88    supplement: Option<&'a LocaleFallbackSupplementV1<'a>>,
89    config: LocaleFallbackConfig,
90    backup_extension: Option<Value>,
91    backup_subdivision: Option<Value>,
92    backup_variants: Option<Variants>,
93}
94
95/// Iteration type for locale fallback operations.
96///
97/// Because the `Iterator` trait does not allow items to borrow from the iterator, this class does
98/// not implement that trait. Instead, use `.step()` and `.get()`.
99#[derive(Debug)]
100pub struct LocaleFallbackIterator<'a, 'b> {
101    current: DataLocale,
102    inner: LocaleFallbackIteratorInner<'a>,
103    phantom: core::marker::PhantomData<&'b ()>,
104}
105
106impl LocaleFallbacker {
107    /// Creates a [`LocaleFallbacker`] with compiled fallback data (likely subtags and parent locales).
108    ///
109    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
110    ///
111    /// [📚 Help choosing a constructor](icu_provider::constructors)
112    #[cfg(feature = "compiled_data")]
113    #[allow(clippy::new_ret_no_self)] // keeping constructors together
114    pub const fn new<'a>() -> LocaleFallbackerBorrowed<'a> {
115        let tickstatic = LocaleFallbackerBorrowed {
116            likely_subtags: crate::provider::Baked::SINGLETON_FALLBACK_LIKELYSUBTAGS_V1,
117            parents: crate::provider::Baked::SINGLETON_FALLBACK_PARENTS_V1,
118            collation_supplement: Some(crate::provider::Baked::SINGLETON_FALLBACK_SUPPLEMENT_CO_V1),
119        };
120        // Safety: we're transmuting down from LocaleFallbackerBorrowed<'static> to LocaleFallbackerBorrowed<'a>
121        // ZeroMaps use associated types in a way that confuse the compiler which gives up and marks them
122        // as invariant. However, they are covariant, and in non-const code this covariance can be safely triggered
123        // using Yokeable::transform. In const code we must transmute. In the long run we should
124        // be able to `transform()` in const code, and also we will have hopefully improved map polymorphism (#3128)
125        unsafe { core::mem::transmute(tickstatic) }
126    }
127
128    icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError,
129        #[cfg(skip)]
130        functions: [
131            new,
132            try_new_with_any_provider,
133            try_new_with_buffer_provider,
134            try_new_unstable,
135            Self
136    ]);
137
138    #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
139    pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
140    where
141        P: DataProvider<LocaleFallbackLikelySubtagsV1Marker>
142            + DataProvider<LocaleFallbackParentsV1Marker>
143            + DataProvider<CollationFallbackSupplementV1Marker>
144            + ?Sized,
145    {
146        let likely_subtags = provider.load(Default::default())?.take_payload()?;
147        let parents = provider.load(Default::default())?.take_payload()?;
148        let collation_supplement = match DataProvider::<CollationFallbackSupplementV1Marker>::load(
149            provider,
150            Default::default(),
151        ) {
152            Ok(response) => Some(response.take_payload()?),
153            // It is expected that not all keys are present
154            Err(DataError {
155                kind: DataErrorKind::MissingDataKey,
156                ..
157            }) => None,
158            Err(e) => return Err(e),
159        };
160        Ok(LocaleFallbacker {
161            likely_subtags,
162            parents,
163            collation_supplement,
164        })
165    }
166
167    /// Creates a [`LocaleFallbacker`] without fallback data. Using this constructor may result in
168    /// surprising behavior, especially in multi-script languages.
169    pub fn new_without_data() -> Self {
170        LocaleFallbacker {
171            likely_subtags: DataPayload::from_owned(Default::default()),
172            parents: DataPayload::from_owned(Default::default()),
173            collation_supplement: None,
174        }
175    }
176
177    /// Associates a configuration with this fallbacker.
178    #[inline]
179    pub fn for_config(&self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig {
180        self.as_borrowed().for_config(config)
181    }
182
183    /// Derives a configuration from a [`DataKey`] and associates it
184    /// with this fallbacker.
185    #[inline]
186    #[doc(hidden)] // will be removed in 2.0
187    pub fn for_key(&self, data_key: DataKey) -> LocaleFallbackerWithConfig {
188        self.for_config(data_key.fallback_config())
189    }
190
191    /// Creates a borrowed version of this fallbacker for performance.
192    pub fn as_borrowed(&self) -> LocaleFallbackerBorrowed {
193        LocaleFallbackerBorrowed {
194            likely_subtags: self.likely_subtags.get(),
195            parents: self.parents.get(),
196            collation_supplement: self.collation_supplement.as_ref().map(|p| p.get()),
197        }
198    }
199}
200
201impl<'a> LocaleFallbackerBorrowed<'a> {
202    /// Associates a configuration with this fallbacker.
203    #[inline]
204    pub const fn for_config(self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig<'a> {
205        LocaleFallbackerWithConfig {
206            likely_subtags: self.likely_subtags,
207            parents: self.parents,
208            supplement: match config.fallback_supplement {
209                Some(LocaleFallbackSupplement::Collation) => self.collation_supplement,
210                _ => None,
211            },
212            config,
213        }
214    }
215}
216
217impl LocaleFallbackerBorrowed<'static> {
218    /// Cheaply converts a [`LocaleFallbackerBorrowed<'static>`] into a [`LocaleFallbacker`].
219    ///
220    /// Note: Due to branching and indirection, using [`LocaleFallbacker`] might inhibit some
221    /// compile-time optimizations that are possible with [`LocaleFallbackerBorrowed`].
222    pub const fn static_to_owned(self) -> LocaleFallbacker {
223        LocaleFallbacker {
224            likely_subtags: DataPayload::from_static_ref(self.likely_subtags),
225            parents: DataPayload::from_static_ref(self.parents),
226            collation_supplement: match self.collation_supplement {
227                None => None,
228                Some(x) => Some(DataPayload::from_static_ref(x)),
229            },
230        }
231    }
232}
233
234impl<'a> LocaleFallbackerWithConfig<'a> {
235    /// Creates an iterator based on a [`DataLocale`].
236    ///
237    /// If you have a [`Locale`](icu_locid::Locale), call `.into()` to get a [`DataLocale`].
238    ///
239    /// When first initialized, the locale is normalized according to the fallback algorithm.
240    pub fn fallback_for(&self, mut locale: DataLocale) -> LocaleFallbackIterator<'a, 'static> {
241        self.normalize(&mut locale);
242        LocaleFallbackIterator {
243            current: locale,
244            inner: LocaleFallbackIteratorInner {
245                likely_subtags: self.likely_subtags,
246                parents: self.parents,
247                supplement: self.supplement,
248                config: self.config,
249                backup_extension: None,
250                backup_subdivision: None,
251                backup_variants: None,
252            },
253            phantom: core::marker::PhantomData,
254        }
255    }
256}
257
258impl LocaleFallbackIterator<'_, '_> {
259    /// Borrows the current [`DataLocale`] under fallback.
260    pub fn get(&self) -> &DataLocale {
261        &self.current
262    }
263
264    /// Takes the current [`DataLocale`] under fallback.
265    pub fn take(self) -> DataLocale {
266        self.current
267    }
268
269    /// Performs one step of the locale fallback algorithm.
270    ///
271    /// The fallback is completed once the inner [`DataLocale`] becomes `und`.
272    pub fn step(&mut self) -> &mut Self {
273        self.inner.step(&mut self.current);
274        self
275    }
276}