icu_locale_core/
langid.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use core::cmp::Ordering;
6#[cfg(feature = "alloc")]
7use core::str::FromStr;
8
9use crate::parser;
10use crate::subtags;
11use crate::ParseError;
12#[cfg(feature = "alloc")]
13use alloc::borrow::Cow;
14
15/// A core struct representing a [`Unicode BCP47 Language Identifier`].
16///
17/// # Ordering
18///
19/// This type deliberately does not implement `Ord` or `PartialOrd` because there are
20/// multiple possible orderings. Depending on your use case, two orderings are available:
21///
22/// 1. A string ordering, suitable for stable serialization: [`LanguageIdentifier::strict_cmp`]
23/// 2. A struct ordering, suitable for use with a BTreeSet: [`LanguageIdentifier::total_cmp`]
24///
25/// See issue: <https://github.com/unicode-org/icu4x/issues/1215>
26///
27/// # Parsing
28///
29/// Unicode recognizes three levels of standard conformance for any language identifier:
30///
31///  * *well-formed* - syntactically correct
32///  * *valid* - well-formed and only uses registered language, region, script and variant subtags...
33///  * *canonical* - valid and no deprecated codes or structure.
34///
35/// At the moment parsing normalizes a well-formed language identifier converting
36/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
37///
38/// Any syntactically invalid subtags will cause the parsing to fail with an error.
39///
40/// This operation normalizes syntax to be well-formed. No legacy subtag replacements is performed.
41/// For validation and canonicalization, see `LocaleCanonicalizer`.
42///
43/// # Serde
44///
45/// This type implements `serde::Serialize` and `serde::Deserialize` if the
46/// `"serde"` Cargo feature is enabled on the crate.
47///
48/// The value will be serialized as a string and parsed when deserialized.
49/// For tips on efficient storage and retrieval of locales, see [`crate::zerovec`].
50///
51/// # Examples
52///
53/// Simple example:
54///
55/// ```
56/// use icu::locale::{
57///     langid,
58///     subtags::{language, region},
59/// };
60///
61/// let li = langid!("en-US");
62///
63/// assert_eq!(li.language, language!("en"));
64/// assert_eq!(li.script, None);
65/// assert_eq!(li.region, Some(region!("US")));
66/// assert_eq!(li.variants.len(), 0);
67/// ```
68///
69/// More complex example:
70///
71/// ```
72/// use icu::locale::{
73///     langid,
74///     subtags::{language, region, script, variant},
75/// };
76///
77/// let li = langid!("eN-latn-Us-Valencia");
78///
79/// assert_eq!(li.language, language!("en"));
80/// assert_eq!(li.script, Some(script!("Latn")));
81/// assert_eq!(li.region, Some(region!("US")));
82/// assert_eq!(li.variants.first(), Some(&variant!("valencia")));
83/// ```
84///
85/// [`Unicode BCP47 Language Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_language_identifier
86#[derive(#[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::PartialEq for LanguageIdentifier {
    #[inline]
    fn eq(&self, other: &LanguageIdentifier) -> bool {
        self.language == other.language && self.script == other.script &&
                self.region == other.region && self.variants == other.variants
    }
}PartialEq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::Eq for LanguageIdentifier {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<subtags::Language>;
        let _: ::core::cmp::AssertParamIsEq<Option<subtags::Script>>;
        let _: ::core::cmp::AssertParamIsEq<Option<subtags::Region>>;
        let _: ::core::cmp::AssertParamIsEq<subtags::Variants>;
    }
}Eq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::clone::Clone for LanguageIdentifier {
    #[inline]
    fn clone(&self) -> LanguageIdentifier {
        LanguageIdentifier {
            language: ::core::clone::Clone::clone(&self.language),
            script: ::core::clone::Clone::clone(&self.script),
            region: ::core::clone::Clone::clone(&self.region),
            variants: ::core::clone::Clone::clone(&self.variants),
        }
    }
}Clone, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::hash::Hash for LanguageIdentifier {
    #[inline]
    fn hash<__H: ::core::hash::Hasher>(&self, state: &mut __H) {
        ::core::hash::Hash::hash(&self.language, state);
        ::core::hash::Hash::hash(&self.script, state);
        ::core::hash::Hash::hash(&self.region, state);
        ::core::hash::Hash::hash(&self.variants, state)
    }
}Hash)] // no Ord or PartialOrd: see docs
87#[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro)
88pub struct LanguageIdentifier {
89    /// Language subtag of the language identifier.
90    pub language: subtags::Language,
91    /// Script subtag of the language identifier.
92    pub script: Option<subtags::Script>,
93    /// Region subtag of the language identifier.
94    pub region: Option<subtags::Region>,
95    /// Variant subtags of the language identifier.
96    pub variants: subtags::Variants,
97}
98
99impl LanguageIdentifier {
100    /// The unknown language identifier "und".
101    pub const UNKNOWN: Self = const {
        match crate::LanguageIdentifier::try_from_utf8_with_single_variant("und".as_bytes())
            {
            Ok((language, script, region, variant)) =>
                crate::LanguageIdentifier {
                    language,
                    script,
                    region,
                    variants: match variant {
                        Some(v) => crate::subtags::Variants::from_variant(v),
                        None => crate::subtags::Variants::new(),
                    },
                },
            _ => {
                ::core::panicking::panic_fmt(format_args!("Invalid language code: und . Note langid! macro can only support up to a single variant tag. Use runtime parsing instead."));
            }
        }
    }crate::langid!("und");
102
103    /// A constructor which takes a utf8 slice, parses it and
104    /// produces a well-formed [`LanguageIdentifier`].
105    ///
106    /// ✨ *Enabled with the `alloc` Cargo feature.*
107    ///
108    /// # Examples
109    ///
110    /// ```
111    /// use icu::locale::LanguageIdentifier;
112    ///
113    /// LanguageIdentifier::try_from_str("en-US").expect("Parsing failed");
114    /// ```
115    #[inline]
116    #[cfg(feature = "alloc")]
117    pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
118        Self::try_from_utf8(s.as_bytes())
119    }
120
121    /// See [`Self::try_from_str`]
122    ///
123    /// ✨ *Enabled with the `alloc` Cargo feature.*
124    #[cfg(feature = "alloc")]
125    pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
126        crate::parser::parse_language_identifier(code_units, parser::ParserMode::LanguageIdentifier)
127    }
128
129    #[doc(hidden)] // macro use
130    #[expect(clippy::type_complexity)]
131    // The return type should be `Result<Self, ParseError>` once the `const_precise_live_drops`
132    // is stabilized ([rust-lang#73255](https://github.com/rust-lang/rust/issues/73255)).
133    pub const fn try_from_utf8_with_single_variant(
134        code_units: &[u8],
135    ) -> Result<
136        (
137            subtags::Language,
138            Option<subtags::Script>,
139            Option<subtags::Region>,
140            Option<subtags::Variant>,
141        ),
142        ParseError,
143    > {
144        crate::parser::parse_language_identifier_with_single_variant(
145            code_units,
146            parser::ParserMode::LanguageIdentifier,
147        )
148    }
149
150    /// A constructor which takes a utf8 slice which may contain extension keys,
151    /// parses it and produces a well-formed [`LanguageIdentifier`].
152    ///
153    /// ✨ *Enabled with the `alloc` Cargo feature.*
154    ///
155    /// # Examples
156    ///
157    /// ```
158    /// use icu::locale::{langid, LanguageIdentifier};
159    ///
160    /// let li = LanguageIdentifier::try_from_locale_bytes(b"en-US-x-posix")
161    ///     .expect("Parsing failed.");
162    ///
163    /// assert_eq!(li, langid!("en-US"));
164    /// ```
165    ///
166    /// This method should be used for input that may be a locale identifier.
167    /// All extensions will be lost.
168    #[cfg(feature = "alloc")]
169    pub fn try_from_locale_bytes(v: &[u8]) -> Result<Self, ParseError> {
170        parser::parse_language_identifier(v, parser::ParserMode::Locale)
171    }
172
173    /// Whether this [`LanguageIdentifier`] equals [`LanguageIdentifier::UNKNOWN`].
174    pub const fn is_unknown(&self) -> bool {
175        self.language.is_unknown()
176            && self.script.is_none()
177            && self.region.is_none()
178            && self.variants.is_empty()
179    }
180
181    /// Normalize the language identifier (operating on UTF-8 formatted byte slices)
182    ///
183    /// This operation will normalize casing and the separator.
184    ///
185    /// ✨ *Enabled with the `alloc` Cargo feature.*
186    ///
187    /// # Examples
188    ///
189    /// ```
190    /// use icu::locale::LanguageIdentifier;
191    ///
192    /// assert_eq!(
193    ///     LanguageIdentifier::normalize("pL-latn-pl").as_deref(),
194    ///     Ok("pl-Latn-PL")
195    /// );
196    /// ```
197    #[cfg(feature = "alloc")]
198    pub fn normalize_utf8(input: &[u8]) -> Result<Cow<'_, str>, ParseError> {
199        let lang_id = Self::try_from_utf8(input)?;
200        Ok(writeable::to_string_or_borrow(&lang_id, input))
201    }
202
203    /// Normalize the language identifier (operating on strings)
204    ///
205    /// This operation will normalize casing and the separator.
206    ///
207    /// ✨ *Enabled with the `alloc` Cargo feature.*
208    ///
209    /// # Examples
210    ///
211    /// ```
212    /// use icu::locale::LanguageIdentifier;
213    ///
214    /// assert_eq!(
215    ///     LanguageIdentifier::normalize("pL-latn-pl").as_deref(),
216    ///     Ok("pl-Latn-PL")
217    /// );
218    /// ```
219    #[cfg(feature = "alloc")]
220    pub fn normalize(input: &str) -> Result<Cow<'_, str>, ParseError> {
221        Self::normalize_utf8(input.as_bytes())
222    }
223
224    /// Compare this [`LanguageIdentifier`] with BCP-47 bytes.
225    ///
226    /// The return value is equivalent to what would happen if you first converted this
227    /// [`LanguageIdentifier`] to a BCP-47 string and then performed a byte comparison.
228    ///
229    /// This function is case-sensitive and results in a *total order*, so it is appropriate for
230    /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
231    ///
232    /// # Examples
233    ///
234    /// Sorting a list of langids with this method requires converting one of them to a string:
235    ///
236    /// ```
237    /// use icu::locale::LanguageIdentifier;
238    /// use std::cmp::Ordering;
239    /// use writeable::Writeable;
240    ///
241    /// // Random input order:
242    /// let bcp47_strings: &[&str] = &[
243    ///     "ar-Latn",
244    ///     "zh-Hant-TW",
245    ///     "zh-TW",
246    ///     "und-fonipa",
247    ///     "zh-Hant",
248    ///     "ar-SA",
249    /// ];
250    ///
251    /// let mut langids = bcp47_strings
252    ///     .iter()
253    ///     .map(|s| s.parse().unwrap())
254    ///     .collect::<Vec<LanguageIdentifier>>();
255    /// langids.sort_by(|a, b| {
256    ///     let b = b.write_to_string();
257    ///     a.strict_cmp(b.as_bytes())
258    /// });
259    /// let strict_cmp_strings = langids
260    ///     .iter()
261    ///     .map(|l| l.to_string())
262    ///     .collect::<Vec<String>>();
263    ///
264    /// // Output ordering, sorted alphabetically
265    /// let expected_ordering: &[&str] = &[
266    ///     "ar-Latn",
267    ///     "ar-SA",
268    ///     "und-fonipa",
269    ///     "zh-Hant",
270    ///     "zh-Hant-TW",
271    ///     "zh-TW",
272    /// ];
273    ///
274    /// assert_eq!(expected_ordering, strict_cmp_strings);
275    /// ```
276    pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
277        writeable::cmp_utf8(self, other)
278    }
279
280    pub(crate) fn as_tuple(
281        &self,
282    ) -> (
283        subtags::Language,
284        Option<subtags::Script>,
285        Option<subtags::Region>,
286        &subtags::Variants,
287    ) {
288        (self.language, self.script, self.region, &self.variants)
289    }
290
291    /// Compare this [`LanguageIdentifier`] with another [`LanguageIdentifier`] field-by-field.
292    /// The result is a total ordering sufficient for use in a [`BTreeSet`].
293    ///
294    /// Unlike [`LanguageIdentifier::strict_cmp`], the ordering may or may not be equivalent
295    /// to string ordering, and it may or may not be stable across ICU4X releases.
296    ///
297    /// # Examples
298    ///
299    /// This method returns a nonsensical ordering derived from the fields of the struct:
300    ///
301    /// ```
302    /// use icu::locale::LanguageIdentifier;
303    /// use std::cmp::Ordering;
304    ///
305    /// // Input strings, sorted alphabetically
306    /// let bcp47_strings: &[&str] = &[
307    ///     "ar-Latn",
308    ///     "ar-SA",
309    ///     "und-fonipa",
310    ///     "zh-Hant",
311    ///     "zh-Hant-TW",
312    ///     "zh-TW",
313    /// ];
314    /// assert!(bcp47_strings.windows(2).all(|w| w[0] < w[1]));
315    ///
316    /// let mut langids = bcp47_strings
317    ///     .iter()
318    ///     .map(|s| s.parse().unwrap())
319    ///     .collect::<Vec<LanguageIdentifier>>();
320    /// langids.sort_by(LanguageIdentifier::total_cmp);
321    /// let total_cmp_strings = langids
322    ///     .iter()
323    ///     .map(|l| l.to_string())
324    ///     .collect::<Vec<String>>();
325    ///
326    /// // Output ordering, sorted arbitrarily
327    /// let expected_ordering: &[&str] = &[
328    ///     "ar-SA",
329    ///     "ar-Latn",
330    ///     "und-fonipa",
331    ///     "zh-TW",
332    ///     "zh-Hant",
333    ///     "zh-Hant-TW",
334    /// ];
335    ///
336    /// assert_eq!(expected_ordering, total_cmp_strings);
337    /// ```
338    ///
339    /// Use a wrapper to add a [`LanguageIdentifier`] to a [`BTreeSet`]:
340    ///
341    /// ```no_run
342    /// use icu::locale::LanguageIdentifier;
343    /// use std::cmp::Ordering;
344    /// use std::collections::BTreeSet;
345    ///
346    /// #[derive(PartialEq, Eq)]
347    /// struct LanguageIdentifierTotalOrd(LanguageIdentifier);
348    ///
349    /// impl Ord for LanguageIdentifierTotalOrd {
350    ///     fn cmp(&self, other: &Self) -> Ordering {
351    ///         self.0.total_cmp(&other.0)
352    ///     }
353    /// }
354    ///
355    /// impl PartialOrd for LanguageIdentifierTotalOrd {
356    ///     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
357    ///         Some(self.cmp(other))
358    ///     }
359    /// }
360    ///
361    /// let _: BTreeSet<LanguageIdentifierTotalOrd> = unimplemented!();
362    /// ```
363    ///
364    /// [`BTreeSet`]: alloc::collections::BTreeSet
365    pub fn total_cmp(&self, other: &Self) -> Ordering {
366        self.as_tuple().cmp(&other.as_tuple())
367    }
368
369    /// Compare this `LanguageIdentifier` with a potentially unnormalized BCP-47 string.
370    ///
371    /// The return value is equivalent to what would happen if you first parsed the
372    /// BCP-47 string to a `LanguageIdentifier` and then performed a structural comparison.
373    ///
374    /// # Examples
375    ///
376    /// ```
377    /// use icu::locale::LanguageIdentifier;
378    ///
379    /// let bcp47_strings: &[&str] = &[
380    ///     "pl-LaTn-pL",
381    ///     "uNd",
382    ///     "UnD-adlm",
383    ///     "uNd-GB",
384    ///     "UND-FONIPA",
385    ///     "ZH",
386    /// ];
387    ///
388    /// for a in bcp47_strings {
389    ///     assert!(a.parse::<LanguageIdentifier>().unwrap().normalizing_eq(a));
390    /// }
391    /// ```
392    pub fn normalizing_eq(&self, other: &str) -> bool {
393        macro_rules! subtag_matches {
394            ($T:ty, $iter:ident, $expected:expr) => {
395                $iter
396                    .next()
397                    .map(|b| <$T>::try_from_utf8(b) == Ok($expected))
398                    .unwrap_or(false)
399            };
400        }
401
402        let mut iter = parser::SubtagIterator::new(other.as_bytes());
403        if !iter.next().map(|b|
            <subtags::Language>::try_from_utf8(b) ==
                Ok(self.language)).unwrap_or(false)subtag_matches!(subtags::Language, iter, self.language) {
404            return false;
405        }
406        if let Some(ref script) = self.script {
407            if !iter.next().map(|b|
            <subtags::Script>::try_from_utf8(b) ==
                Ok(*script)).unwrap_or(false)subtag_matches!(subtags::Script, iter, *script) {
408                return false;
409            }
410        }
411        if let Some(ref region) = self.region {
412            if !iter.next().map(|b|
            <subtags::Region>::try_from_utf8(b) ==
                Ok(*region)).unwrap_or(false)subtag_matches!(subtags::Region, iter, *region) {
413                return false;
414            }
415        }
416        for variant in self.variants.iter() {
417            if !iter.next().map(|b|
            <subtags::Variant>::try_from_utf8(b) ==
                Ok(*variant)).unwrap_or(false)subtag_matches!(subtags::Variant, iter, *variant) {
418                return false;
419            }
420        }
421        iter.next().is_none()
422    }
423
424    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
425    where
426        F: FnMut(&str) -> Result<(), E>,
427    {
428        f(self.language.as_str())?;
429        if let Some(ref script) = self.script {
430            f(script.as_str())?;
431        }
432        if let Some(ref region) = self.region {
433            f(region.as_str())?;
434        }
435        for variant in self.variants.iter() {
436            f(variant.as_str())?;
437        }
438        Ok(())
439    }
440
441    /// Executes `f` on each subtag string of this `LanguageIdentifier`, with every string in
442    /// lowercase ascii form.
443    ///
444    /// The default normalization of language identifiers uses titlecase scripts and uppercase
445    /// regions. However, this differs from [RFC6497 (BCP 47 Extension T)], which specifies:
446    ///
447    /// > _The canonical form for all subtags in the extension is lowercase, with the fields
448    /// > ordered by the separators, alphabetically._
449    ///
450    /// Hence, this method is used inside [`Transform Extensions`] to be able to get the correct
451    /// normalization of the language identifier.
452    ///
453    /// As an example, the canonical form of locale **EN-LATN-CA-T-EN-LATN-CA** is
454    /// **en-Latn-CA-t-en-latn-ca**, with the script and region parts lowercased inside T extensions,
455    /// but titlecased and uppercased outside T extensions respectively.
456    ///
457    /// [RFC6497 (BCP 47 Extension T)]: https://www.ietf.org/rfc/rfc6497.txt
458    /// [`Transform extensions`]: crate::extensions::transform
459    pub(crate) fn for_each_subtag_str_lowercased<E, F>(&self, f: &mut F) -> Result<(), E>
460    where
461        F: FnMut(&str) -> Result<(), E>,
462    {
463        f(self.language.as_str())?;
464        if let Some(ref script) = self.script {
465            f(script.to_tinystr().to_ascii_lowercase().as_str())?;
466        }
467        if let Some(ref region) = self.region {
468            f(region.to_tinystr().to_ascii_lowercase().as_str())?;
469        }
470        for variant in self.variants.iter() {
471            f(variant.as_str())?;
472        }
473        Ok(())
474    }
475
476    /// Writes this `LanguageIdentifier` to a sink, replacing uppercase ascii chars with
477    /// lowercase ascii chars.
478    ///
479    /// The default normalization of language identifiers uses titlecase scripts and uppercase
480    /// regions. However, this differs from [RFC6497 (BCP 47 Extension T)], which specifies:
481    ///
482    /// > _The canonical form for all subtags in the extension is lowercase, with the fields
483    /// > ordered by the separators, alphabetically._
484    ///
485    /// Hence, this method is used inside [`Transform Extensions`] to be able to get the correct
486    /// normalization of the language identifier.
487    ///
488    /// As an example, the canonical form of locale **EN-LATN-CA-T-EN-LATN-CA** is
489    /// **en-Latn-CA-t-en-latn-ca**, with the script and region parts lowercased inside T extensions,
490    /// but titlecased and uppercased outside T extensions respectively.
491    ///
492    /// [RFC6497 (BCP 47 Extension T)]: https://www.ietf.org/rfc/rfc6497.txt
493    /// [`Transform extensions`]: crate::extensions::transform
494    pub(crate) fn write_lowercased_to<W: core::fmt::Write + ?Sized>(
495        &self,
496        sink: &mut W,
497    ) -> core::fmt::Result {
498        let mut initial = true;
499        self.for_each_subtag_str_lowercased(&mut |subtag| {
500            if initial {
501                initial = false;
502            } else {
503                sink.write_char('-')?;
504            }
505            sink.write_str(subtag)
506        })
507    }
508}
509
510impl core::fmt::Debug for LanguageIdentifier {
511    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
512        core::fmt::Display::fmt(&self, f)
513    }
514}
515
516/// ✨ *Enabled with the `alloc` Cargo feature.*
517#[cfg(feature = "alloc")]
518impl FromStr for LanguageIdentifier {
519    type Err = ParseError;
520
521    #[inline]
522    fn from_str(s: &str) -> Result<Self, Self::Err> {
523        Self::try_from_str(s)
524    }
525}
526
527impl writeable::Writeable for LanguageIdentifier {
    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W)
        -> core::fmt::Result {
        let mut initial = true;
        self.for_each_subtag_str(&mut |subtag|
                    {
                        if initial {
                            initial = false;
                        } else { sink.write_char('-')?; }
                        sink.write_str(subtag)
                    })
    }
    #[inline]
    fn writeable_length_hint(&self) -> writeable::LengthHint {
        let mut result = writeable::LengthHint::exact(0);
        let mut initial = true;
        self.for_each_subtag_str::<core::convert::Infallible,
                _>(&mut |subtag|
                        {
                            if initial { initial = false; } else { result += 1; }
                            result += subtag.len();
                            Ok(())
                        }).expect("infallible");
        result
    }
    fn writeable_borrow(&self) -> Option<&str> {
        let selff = self;
        if selff.script.is_none() && selff.region.is_none() &&
                selff.variants.is_empty() {
            Some(selff.language.as_str())
        } else { None }
    }
}
/// This trait is implemented for compatibility with [`fmt!`](alloc::fmt).
/// To create a string, [`Writeable::write_to_string`] is usually more efficient.
impl core::fmt::Display for LanguageIdentifier {
    #[inline]
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        ::writeable::Writeable::write_to(&self, f)
    }
}impl_writeable_for_each_subtag_str_no_test!(LanguageIdentifier, selff, selff.script.is_none() && selff.region.is_none() && selff.variants.is_empty() => Some(selff.language.as_str()));
528
529#[test]
530fn test_writeable() {
531    use writeable::assert_writeable_eq;
532    assert_writeable_eq!(LanguageIdentifier::UNKNOWN, "und");
533    assert_writeable_eq!("und-001".parse::<LanguageIdentifier>().unwrap(), "und-001");
534    assert_writeable_eq!(
535        "und-Mymr".parse::<LanguageIdentifier>().unwrap(),
536        "und-Mymr",
537    );
538    assert_writeable_eq!(
539        "my-Mymr-MM".parse::<LanguageIdentifier>().unwrap(),
540        "my-Mymr-MM",
541    );
542    assert_writeable_eq!(
543        "my-Mymr-MM-posix".parse::<LanguageIdentifier>().unwrap(),
544        "my-Mymr-MM-posix",
545    );
546    assert_writeable_eq!(
547        "zh-macos-posix".parse::<LanguageIdentifier>().unwrap(),
548        "zh-macos-posix",
549    );
550}
551
552/// # Examples
553///
554/// ```
555/// use icu::locale::{langid, subtags::language, LanguageIdentifier};
556///
557/// assert_eq!(LanguageIdentifier::from(language!("en")), langid!("en"));
558/// ```
559impl From<subtags::Language> for LanguageIdentifier {
560    fn from(language: subtags::Language) -> Self {
561        Self {
562            language,
563            script: None,
564            region: None,
565            variants: subtags::Variants::new(),
566        }
567    }
568}
569
570/// # Examples
571///
572/// ```
573/// use icu::locale::{langid, subtags::script, LanguageIdentifier};
574///
575/// assert_eq!(
576///     LanguageIdentifier::from(Some(script!("latn"))),
577///     langid!("und-Latn")
578/// );
579/// ```
580impl From<Option<subtags::Script>> for LanguageIdentifier {
581    fn from(script: Option<subtags::Script>) -> Self {
582        Self {
583            language: subtags::Language::UNKNOWN,
584            script,
585            region: None,
586            variants: subtags::Variants::new(),
587        }
588    }
589}
590
591/// # Examples
592///
593/// ```
594/// use icu::locale::{langid, subtags::region, LanguageIdentifier};
595///
596/// assert_eq!(
597///     LanguageIdentifier::from(Some(region!("US"))),
598///     langid!("und-US")
599/// );
600/// ```
601impl From<Option<subtags::Region>> for LanguageIdentifier {
602    fn from(region: Option<subtags::Region>) -> Self {
603        Self {
604            language: subtags::Language::UNKNOWN,
605            script: None,
606            region,
607            variants: subtags::Variants::new(),
608        }
609    }
610}
611
612/// Convert from an LSR tuple to a [`LanguageIdentifier`].
613///
614/// # Examples
615///
616/// ```
617/// use icu::locale::{
618///     langid,
619///     subtags::{language, region, script},
620///     LanguageIdentifier,
621/// };
622///
623/// let lang = language!("en");
624/// let script = script!("Latn");
625/// let region = region!("US");
626/// assert_eq!(
627///     LanguageIdentifier::from((lang, Some(script), Some(region))),
628///     langid!("en-Latn-US")
629/// );
630/// ```
631impl
632    From<(
633        subtags::Language,
634        Option<subtags::Script>,
635        Option<subtags::Region>,
636    )> for LanguageIdentifier
637{
638    fn from(
639        lsr: (
640            subtags::Language,
641            Option<subtags::Script>,
642            Option<subtags::Region>,
643        ),
644    ) -> Self {
645        Self {
646            language: lsr.0,
647            script: lsr.1,
648            region: lsr.2,
649            variants: subtags::Variants::new(),
650        }
651    }
652}
653
654/// Convert from a [`LanguageIdentifier`] to an LSR tuple.
655///
656/// # Examples
657///
658/// ```
659/// use icu::locale::{
660///     langid,
661///     subtags::{language, region, script},
662/// };
663///
664/// let lid = langid!("en-Latn-US");
665/// let (lang, script, region) = (&lid).into();
666///
667/// assert_eq!(lang, language!("en"));
668/// assert_eq!(script, Some(script!("Latn")));
669/// assert_eq!(region, Some(region!("US")));
670/// ```
671impl From<&LanguageIdentifier>
672    for (
673        subtags::Language,
674        Option<subtags::Script>,
675        Option<subtags::Region>,
676    )
677{
678    fn from(langid: &LanguageIdentifier) -> Self {
679        (langid.language, langid.script, langid.region)
680    }
681}
icu_locale_core/langid.rs

icu_locale_core/
langid.rs