Skip to main content

icu_collections/codepointtrie/
cptrie.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::codepointtrie::error::Error;
6use crate::codepointtrie::impl_const::*;
7
8#[cfg(feature = "alloc")]
9use crate::codepointinvlist::CodePointInversionList;
10use core::char::CharTryFromError;
11use core::convert::Infallible;
12use core::convert::TryFrom;
13use core::fmt::Display;
14#[cfg(feature = "alloc")]
15use core::iter::FromIterator;
16use core::num::TryFromIntError;
17use core::ops::RangeInclusive;
18use yoke::Yokeable;
19use zerofrom::ZeroFrom;
20use zerovec::ule::AsULE;
21#[cfg(feature = "alloc")]
22use zerovec::ule::UleError;
23use zerovec::ZeroSlice;
24use zerovec::ZeroVec;
25
26/// The type of trie represents whether the trie has an optimization that
27/// would make it smaller or faster.
28///
29/// Regarding performance, a trie being a small or fast type affects the number of array lookups
30/// needed for code points in the range `[0x1000, 0x10000)`. In this range, `Small` tries use 4 array lookups,
31/// while `Fast` tries use 2 array lookups.
32/// Code points before the interval (in `[0, 0x1000)`) will always use 2 array lookups.
33/// Code points after the interval (in `[0x10000, 0x10FFFF]`) will always use 4 array lookups.
34///
35/// Regarding size, `Fast` type tries are larger than `Small` type tries because the minimum size of
36/// the index array is larger. The minimum size is the "fast max" limit, which is the limit of the range
37/// of code points with 2 array lookups.
38///
39/// See the document [Unicode Properties and Code Point Tries in ICU4X](https://github.com/unicode-org/icu4x/blob/main/documents/design/properties_code_point_trie.md).
40///
41/// Also see [`UCPTrieType`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
42#[derive(#[automatically_derived]
#[allow(clippy::exhaustive_enums)]
impl ::core::clone::Clone for TrieType {
    #[inline]
    fn clone(&self) -> TrieType { *self }
}Clone, #[automatically_derived]
#[allow(clippy::exhaustive_enums)]
impl ::core::marker::Copy for TrieType { }Copy, #[automatically_derived]
#[allow(clippy::exhaustive_enums)]
impl ::core::cmp::PartialEq for TrieType {
    #[inline]
    fn eq(&self, other: &TrieType) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr
    }
}PartialEq, #[automatically_derived]
#[allow(clippy::exhaustive_enums)]
impl ::core::fmt::Debug for TrieType {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f,
            match self {
                TrieType::Fast => "Fast",
                TrieType::Small => "Small",
            })
    }
}Debug, #[automatically_derived]
#[allow(clippy::exhaustive_enums)]
impl ::core::cmp::Eq for TrieType {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {}
}Eq)]
43#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
44#[cfg_attr(feature = "databake", derive(databake::Bake))]
45#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
46#[allow(clippy::exhaustive_enums)] // based on a stable serialized form
47pub enum TrieType {
48    /// Represents the "fast" type code point tries for the
49    /// [`TrieType`] trait. The "fast max" limit is set to `0xffff`.
50    Fast = 0,
51    /// Represents the "small" type code point tries for the
52    /// [`TrieType`] trait. The "fast max" limit is set to `0x0fff`.
53    Small = 1,
54}
55
56// TrieValue trait
57
58// AsULE is AsUnalignedLittleEndian, i.e. "allowed in a zerovec"
59
60/// A trait representing the values stored in the data array of a [`CodePointTrie`].
61/// This trait is used as a type parameter in constructing a `CodePointTrie`.
62///
63/// This trait can be implemented on anything that can be represented as a u32s worth of data.
64pub trait TrieValue: Copy + Eq + PartialEq + AsULE + 'static {
65    /// Last-resort fallback value to return if we cannot read data from the trie.
66    ///
67    /// In most cases, the error value is read from the last element of the `data` array,
68    /// this value is used for empty codepointtrie arrays
69    /// Error type when converting from a u32 to this `TrieValue`.
70    type TryFromU32Error: Display;
71    /// A parsing function that is primarily motivated by deserialization contexts.
72    /// When the serialization type width is smaller than 32 bits, then it is expected
73    /// that the call site will widen the value to a `u32` first.
74    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>;
75
76    /// A method for converting back to a `u32` that can roundtrip through
77    /// [`Self::try_from_u32()`]. The default implementation of this trait
78    /// method panics in debug mode and returns 0 in release mode.
79    ///
80    /// This method is allowed to have GIGO behavior when fed a value that has
81    /// no corresponding `u32` (since such values cannot be stored in the trie)
82    fn to_u32(self) -> u32;
83}
84
85macro_rules! impl_primitive_trie_value {
86    ($primitive:ty, $error:ty) => {
87        impl TrieValue for $primitive {
88            type TryFromU32Error = $error;
89            fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
90                Self::try_from(i)
91            }
92
93            #[allow(trivial_numeric_casts)]
94            fn to_u32(self) -> u32 {
95                // bitcast when the same size, zero-extend/sign-extend
96                // when not the same size
97                self as u32
98            }
99        }
100    };
101}
102
103impl TrieValue for u8 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    #[allow(trivial_numeric_casts)]
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(u8, TryFromIntError);
104impl TrieValue for u16 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    #[allow(trivial_numeric_casts)]
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(u16, TryFromIntError);
105impl TrieValue for u32 {
    type TryFromU32Error = Infallible;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    #[allow(trivial_numeric_casts)]
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(u32, Infallible);
106impl TrieValue for i8 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    #[allow(trivial_numeric_casts)]
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(i8, TryFromIntError);
107impl TrieValue for i16 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    #[allow(trivial_numeric_casts)]
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(i16, TryFromIntError);
108impl TrieValue for i32 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    #[allow(trivial_numeric_casts)]
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(i32, TryFromIntError);
109impl TrieValue for char {
    type TryFromU32Error = CharTryFromError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    #[allow(trivial_numeric_casts)]
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(char, CharTryFromError);
110
111/// Helper function used by [`get_range`]. Converts occurrences of trie's null
112/// value into the provided `null_value`.
113///
114/// Note: the ICU version of this helper function uses a `ValueFilter` function
115/// to apply a transform on a non-null value. But currently, this implementation
116/// stops short of that functionality, and instead leaves the non-null trie value
117/// untouched. This is equivalent to having a `ValueFilter` function that is the
118/// identity function.
119fn maybe_filter_value<T: TrieValue>(value: T, trie_null_value: T, null_value: T) -> T {
120    if value == trie_null_value {
121        null_value
122    } else {
123        value
124    }
125}
126
127/// This struct represents a de-serialized [`CodePointTrie`] that was exported from
128/// ICU binary data.
129///
130/// For more information:
131/// - [ICU Site design doc](https://unicode-org.github.io/icu/design/struct/utrie)
132/// - [ICU User Guide section on Properties lookup](https://unicode-org.github.io/icu/userguide/strings/properties.html#lookup)
133// serde impls in crate::serde
134#[derive(#[automatically_derived]
impl<'trie, T: ::core::fmt::Debug + TrieValue> ::core::fmt::Debug for
    CodePointTrie<'trie, T> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field4_finish(f, "CodePointTrie",
            "header", &self.header, "index", &self.index, "data", &self.data,
            "error_value", &&self.error_value)
    }
}Debug, #[automatically_derived]
impl<'trie, T: ::core::cmp::Eq + TrieValue> ::core::cmp::Eq for
    CodePointTrie<'trie, T> {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<CodePointTrieHeader>;
        let _: ::core::cmp::AssertParamIsEq<ZeroVec<'trie, u16>>;
        let _: ::core::cmp::AssertParamIsEq<ZeroVec<'trie, T>>;
        let _: ::core::cmp::AssertParamIsEq<T>;
    }
}Eq, #[automatically_derived]
impl<'trie, T: ::core::cmp::PartialEq + TrieValue> ::core::cmp::PartialEq for
    CodePointTrie<'trie, T> {
    #[inline]
    fn eq(&self, other: &CodePointTrie<'trie, T>) -> bool {
        self.header == other.header && self.index == other.index &&
                self.data == other.data &&
            self.error_value == other.error_value
    }
}PartialEq, unsafe impl<'yoke, T: TrieValue> yoke::Yokeable<'yoke> for
    CodePointTrie<'static, T> where T: 'static {
    type Output = CodePointTrie<'yoke, T>;
    #[inline]
    fn transform(&'yoke self) -> &'yoke Self::Output {
        if false { let _: *const &'yoke Self::Output = &raw const self; }
        self
    }
    #[inline]
    fn transform_owned(self) -> Self::Output { self }
    #[inline]
    unsafe fn make(from: Self::Output) -> Self {
        ::core::mem::transmute::<Self::Output, Self>(from)
    }
    #[inline]
    fn transform_mut<F>(&'yoke mut self, f: F) where F: 'static +
        for<'_yoke> FnOnce(&'_yoke mut Self::Output) {
        let y = unsafe { &mut *(self as *mut Self as *mut Self::Output) };
        f(y)
    }
}Yokeable, impl<'zf, 'zf_inner, T: TrieValue>
    zerofrom::ZeroFrom<'zf, CodePointTrie<'zf_inner, T>> for
    CodePointTrie<'zf, T> where
    ZeroVec<'zf, T>: zerofrom::ZeroFrom<'zf, ZeroVec<'zf_inner, T>> {
    fn zero_from(this: &'zf CodePointTrie<'zf_inner, T>) -> Self {
        match *this {
            CodePointTrie {
                header: ref __binding_0,
                index: ref __binding_1,
                data: ref __binding_2,
                error_value: ref __binding_3 } => {
                CodePointTrie {
                    header: *__binding_0,
                    index: <ZeroVec<'zf, u16> as
                            zerofrom::ZeroFrom<'zf,
                            ZeroVec<'zf_inner, u16>>>::zero_from(__binding_1),
                    data: <ZeroVec<'zf, T> as
                            zerofrom::ZeroFrom<'zf,
                            ZeroVec<'zf_inner, T>>>::zero_from(__binding_2),
                    error_value: __binding_3.clone(),
                }
            }
        }
    }
}ZeroFrom)]
135pub struct CodePointTrie<'trie, T: TrieValue> {
136    /// # Safety Invariant
137    ///
138    /// The value of `header.trie_type` must not change after construction.
139    pub(crate) header: CodePointTrieHeader,
140    /// # Safety Invariant
141    ///
142    /// If `header.trie_type == TrieType::Fast`, `index.len()` must be greater
143    /// than `FAST_TYPE_FAST_INDEXING_MAX`. Otherwise, `index.len()`
144    /// must be greater than `SMALL_TYPE_FAST_INDEXING_MAX`. Furthermore,
145    /// this field must not change after construction. (Strictly: It must
146    /// not become shorter than the length requirement stated above and the
147    /// values within the prefix up to the length requirement must not change.)
148    pub(crate) index: ZeroVec<'trie, u16>,
149    /// # Safety Invariant
150    ///
151    /// If `header.trie_type == TrieType::Fast`, `data.len()` must be greater
152    /// than `FAST_TYPE_DATA_MASK` plus the largest value in
153    /// `index[0..FAST_TYPE_FAST_INDEXING_MAX + 1]`. Otherwise, `data.len()`
154    /// must be greater than `FAST_TYPE_DATA_MASK` plus the largest value in
155    /// `index[0..SMALL_TYPE_FAST_INDEXING_MAX + 1]`. Furthermore, this field
156    /// must not change after construction. (Strictly: The stated length
157    /// requirement must continue to hold.)
158    pub(crate) data: ZeroVec<'trie, T>,
159    // serde impl skips this field
160    #[zerofrom(clone)] // TrieValue is Copy, this allows us to avoid
161    // a T: ZeroFrom bound
162    pub(crate) error_value: T,
163}
164
165/// This struct contains the fixed-length header fields of a [`CodePointTrie`].
166///
167/// # Safety Invariant
168///
169/// The `trie_type` field must not change after construction.
170///
171/// (In practice, all the fields here remain unchanged during the lifetime
172/// of the trie.)
173#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
174#[cfg_attr(feature = "databake", derive(databake::Bake))]
175#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
176#[derive(#[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::marker::Copy for CodePointTrieHeader { }Copy, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::clone::Clone for CodePointTrieHeader {
    #[inline]
    fn clone(&self) -> CodePointTrieHeader {
        let _: ::core::clone::AssertParamIsClone<u32>;
        let _: ::core::clone::AssertParamIsClone<u16>;
        let _: ::core::clone::AssertParamIsClone<TrieType>;
        *self
    }
}Clone, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::fmt::Debug for CodePointTrieHeader {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        let names: &'static _ =
            &["high_start", "shifted12_high_start", "index3_null_offset",
                        "data_null_offset", "null_value", "trie_type"];
        let values: &[&dyn ::core::fmt::Debug] =
            &[&self.high_start, &self.shifted12_high_start,
                        &self.index3_null_offset, &self.data_null_offset,
                        &self.null_value, &&self.trie_type];
        ::core::fmt::Formatter::debug_struct_fields_finish(f,
            "CodePointTrieHeader", names, values)
    }
}Debug, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::Eq for CodePointTrieHeader {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<u32>;
        let _: ::core::cmp::AssertParamIsEq<u16>;
        let _: ::core::cmp::AssertParamIsEq<TrieType>;
    }
}Eq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::PartialEq for CodePointTrieHeader {
    #[inline]
    fn eq(&self, other: &CodePointTrieHeader) -> bool {
        self.high_start == other.high_start &&
                            self.shifted12_high_start == other.shifted12_high_start &&
                        self.index3_null_offset == other.index3_null_offset &&
                    self.data_null_offset == other.data_null_offset &&
                self.null_value == other.null_value &&
            self.trie_type == other.trie_type
    }
}PartialEq, unsafe impl<'yoke> yoke::Yokeable<'yoke> for CodePointTrieHeader<> where
    Self: Sized {
    type Output = Self;
    #[inline]
    fn transform(&self) -> &Self::Output { self }
    #[inline]
    fn transform_owned(self) -> Self::Output { self }
    #[inline]
    unsafe fn make(this: Self::Output) -> Self { this }
    #[inline]
    fn transform_mut<F>(&'yoke mut self, f: F) where F: 'static +
        for<'_yoke> FnOnce(&'_yoke mut Self::Output) {
        f(self)
    }
}Yokeable, impl<'zf> zerofrom::ZeroFrom<'zf, CodePointTrieHeader<>> for
    CodePointTrieHeader<> where  {
    fn zero_from(this: &'zf Self) -> Self { *this }
}ZeroFrom)]
177#[allow(clippy::exhaustive_structs)] // based on a stable serialized form
178pub struct CodePointTrieHeader {
179    /// The code point of the start of the last range of the trie. A
180    /// range is defined as a partition of the code point space such that the
181    /// value in this trie associated with all code points of the same range is
182    /// the same.
183    ///
184    /// For the property value data for many Unicode properties,
185    /// often times, `high_start` is `U+10000` or lower. In such cases, not
186    /// reserving space in the `index` array for duplicate values is a large
187    /// savings. The "highValue" associated with the `high_start` range is
188    /// stored at the second-to-last position of the `data` array.
189    /// (See `impl_const::HIGH_VALUE_NEG_DATA_OFFSET`.)
190    pub high_start: u32,
191    /// A version of the `high_start` value that is right-shifted 12 spaces,
192    /// but is rounded up to a multiple `0x1000` for easy testing from UTF-8
193    /// lead bytes.
194    pub shifted12_high_start: u16,
195    /// Offset for the null block in the "index-3" table of the `index` array.
196    /// Set to an impossibly high value (e.g., `0xffff`) if there is no
197    /// dedicated index-3 null block.
198    pub index3_null_offset: u16,
199    /// Internal data null block offset, not shifted.
200    /// Set to an impossibly high value (e.g., `0xfffff`) if there is no
201    /// dedicated data null block.
202    pub data_null_offset: u32,
203    /// The value stored in the trie that represents a null value being
204    /// associated to a code point.
205    pub null_value: u32,
206    /// The enum value representing the type of trie, where trie type is as it
207    /// is defined in ICU (ex: Fast, Small).
208    ///
209    /// # Safety Invariant
210    ///
211    /// Must not change after construction.
212    pub trie_type: TrieType,
213}
214
215impl TryFrom<u8> for TrieType {
216    type Error = Error;
217
218    fn try_from(trie_type_int: u8) -> Result<TrieType, Error> {
219        match trie_type_int {
220            0 => Ok(TrieType::Fast),
221            1 => Ok(TrieType::Small),
222            _ => Err(Error::FromDeserialized {
223                reason: "Cannot parse value for trie_type",
224            }),
225        }
226    }
227}
228
229// Helper macro that turns arithmetic into wrapping-in-release, checked-in-debug arithmetic
230//
231// This is rustc's default behavior anyway, however some projects like Android deliberately
232// enable overflow checks. CodePointTrie::get() is intended to be used in Android bionic which
233// cares about codesize and we don't want the pile of panicking infrastructure brought in by overflow
234// checks, so we force wrapping in release.
235// See #6052
236macro_rules! w(
237    // Note: these matchers are not perfect since you cannot have an operator after an expr matcher
238    // Use variables if you need complex first operands.
239    ($a:tt + $b:expr) => {
240        {
241            #[allow(unused_parens)]
242            let a = $a;
243            let b = $b;
244            debug_assert!(a.checked_add(b).is_some());
245            $a.wrapping_add($b)
246        }
247    };
248    ($a:tt - $b:expr) => {
249
250        {
251            #[allow(unused_parens)]
252            let a = $a;
253            let b = $b;
254            debug_assert!(a.checked_sub(b).is_some());
255            $a.wrapping_sub($b)
256        }
257    };
258    ($a:tt * $b:expr) => {
259        {
260            #[allow(unused_parens)]
261            let a = $a;
262            let b = $b;
263            debug_assert!(a.checked_mul(b).is_some());
264            $a.wrapping_mul($b)
265        }
266    };
267);
268
269impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
270    #[doc(hidden)] // databake internal
271    /// # Safety
272    ///
273    /// `header.trie_type`, `index`, and `data` must
274    /// satisfy the invariants for the fields of the
275    /// same names on `CodePointTrie`.
276    pub const unsafe fn from_parts_unstable_unchecked_v1(
277        header: CodePointTrieHeader,
278        index: ZeroVec<'trie, u16>,
279        data: ZeroVec<'trie, T>,
280        error_value: T,
281    ) -> Self {
282        // Field invariants upheld: The caller is responsible.
283        // In practice, this means that datagen in the databake
284        // mode upholds these invariants when constructing the
285        // `CodePointTrie` that is then baked.
286        Self {
287            header,
288            index,
289            data,
290            error_value,
291        }
292    }
293
294    /// Returns a new [`CodePointTrie`] backed by borrowed data for the `index`
295    /// array and `data` array, whose data values have width `W`.
296    pub fn try_new(
297        header: CodePointTrieHeader,
298        index: ZeroVec<'trie, u16>,
299        data: ZeroVec<'trie, T>,
300    ) -> Result<CodePointTrie<'trie, T>, Error> {
301        let error_value = Self::validate_fields(&header, &index, &data)?;
302        // Field invariants upheld: Checked by `validate_fields` above.
303        let trie: CodePointTrie<'trie, T> = CodePointTrie {
304            header,
305            index,
306            data,
307            error_value,
308        };
309        Ok(trie)
310    }
311
312    /// Checks the invariant on the fields that fast-path access relies on for
313    /// safety in order to omit slice bound checks and upon success returns the
314    /// `error_value` for the trie.
315    ///
316    /// # Safety Usable Invariant
317    ///
318    /// Iff this function returns `Ok(T)`, the arguments satisfy the invariants
319    /// for corresponding fields of `CodePointTrie`. (Other than proving that
320    /// nothing else changes the fields subsequently.)
321    pub(crate) fn validate_fields(
322        header: &CodePointTrieHeader,
323        index: &ZeroSlice<u16>,
324        data: &ZeroSlice<T>,
325    ) -> Result<T, Error> {
326        let error_value = data.last().ok_or(Error::EmptyDataVector)?;
327
328        // `CodePointTrie` lookup has two stages: fast and small (the trie types
329        // are also fast and small; they affect where the boundary between fast
330        // and small lookups is).
331        //
332        // The length requirements for `index` and `data` are checked here only
333        // for the fast lookup case so that the fast lookup can omit bound checks
334        // at the time of access. In the small lookup case, bounds are checked at
335        // the time of access.
336        //
337        // The fast lookup happens on the prefixes of `index` and `data` with
338        // more items for the small lookup case afterwards. The check here
339        // only looks at the prefixes relevant to the fast case.
340        //
341        // In the fast lookup case, the bits of the of the code point are
342        // partitioned into a bit prefix and a bit suffix. First, a value
343        // is read from `index` by indexing into it using the bit prefix.
344        // Then `data` is accessed by the value just read from `index` plus
345        // the bit suffix.
346        //
347        // Therefore, the length of `index` needs to accommodate access
348        // by the maximum possible bit prefix, and the length of `data`
349        // needs to accommodate access by the largest value stored in the part
350        // of `data` reachable by the bit prefix plus the maximum possible bit
351        // suffix.
352        //
353        // The maximum possible bit prefix depends on the trie type.
354
355        // The maximum code point that can be used for fast-path access:
356        let fast_max = match header.trie_type {
357            TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
358            TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
359        };
360        // Keep only the prefix bits:
361        let max_bit_prefix = fast_max >> FAST_TYPE_SHIFT;
362        // Attempt slice the part of `index` that the fast path can index into.
363        // Since `max_bit_prefix` is the largest possible value used for
364        // indexing (inclusive bound), we need to add one to get the exclusive
365        // bound, which is what `get_subslice` wants.
366        let fast_index = index
367            .get_subslice(0..(max_bit_prefix as usize) + 1)
368            .ok_or(Error::IndexTooShortForFastAccess)?;
369        // Invariant upheld for `index`: If we got this far, the length of `index`
370        // satisfies its length invariant on the assumption that `header.trie_type`
371        // will not change subsequently.
372
373        // Now find the largest offset in the part of `index` reachable by the
374        // bit prefix. `max` can never actually return `None`, since we already
375        // know the slice isn't empty. Hence, reusing the error kind instead of
376        // minting a new one for this check.
377        let max_offset = fast_index
378            .iter()
379            .max()
380            .ok_or(Error::IndexTooShortForFastAccess)?;
381        // `FAST_TYPE_DATA_MASK` is the maximum possible bit suffix, since the
382        // maximum is when all the bits in the suffix are set, and the mask
383        // has that many bits set.
384        if (max_offset) as usize + (FAST_TYPE_DATA_MASK as usize) >= data.len() {
385            return Err(Error::DataTooShortForFastAccess);
386        }
387
388        // Invariant upheld for `data`: If we got this far, the length of `data`
389        // satisfies `data`'s length invariant on the assumption that the contents
390        // of `fast_index` subslice of `index` and `header.trie_type` will not
391        // change subsequently.
392
393        Ok(error_value)
394    }
395
396    /// Turns this trie into a version whose trie type is encoded in the Rust type.
397    #[inline]
398    pub fn to_typed(self) -> Typed<FastCodePointTrie<'trie, T>, SmallCodePointTrie<'trie, T>> {
399        match self.header.trie_type {
400            TrieType::Fast => Typed::Fast(FastCodePointTrie { inner: self }),
401            TrieType::Small => Typed::Small(SmallCodePointTrie { inner: self }),
402        }
403    }
404
405    /// Obtains a reference to this trie as a Rust type that encodes the trie type in the Rust type.
406    #[inline]
407    pub fn as_typed_ref(
408        &self,
409    ) -> Typed<&FastCodePointTrie<'trie, T>, &SmallCodePointTrie<'trie, T>> {
410        // SAFETY: `FastCodePointTrie` and `SmallCodePointTrie` are `repr(transparent)`
411        // with `CodePointTrie`, so transmuting between the references is OK when the
412        // actual trie type agrees with the semantics of the typed wrapper.
413        match self.header.trie_type {
414            TrieType::Fast => Typed::Fast(unsafe {
415                &*(self as *const CodePointTrie<'trie, T> as *const FastCodePointTrie<'trie, T>)
416            }),
417            TrieType::Small => Typed::Small(unsafe {
418                &*(self as *const CodePointTrie<'trie, T> as *const SmallCodePointTrie<'trie, T>)
419            }),
420        }
421    }
422
423    /// Returns the position in the data array containing the trie's stored
424    /// error value.
425    #[inline(always)] // `always` was based on previous normalizer benchmarking
426    fn trie_error_val_index(&self) -> u32 {
427        // We use wrapping_sub here to avoid panicky overflow checks.
428        // len should always be > 1, but if it isn't this will just cause GIGO behavior of producing
429        // None on `.get()`
430        if true {
    if !(self.data.len() as u32 >= ERROR_VALUE_NEG_DATA_OFFSET) {
        ::core::panicking::panic("assertion failed: self.data.len() as u32 >= ERROR_VALUE_NEG_DATA_OFFSET")
    };
};debug_assert!(self.data.len() as u32 >= ERROR_VALUE_NEG_DATA_OFFSET);
431        {
    #[allow(unused_parens)]
    let a = (self.data.len() as u32);
    let b = ERROR_VALUE_NEG_DATA_OFFSET;
    if true {
        if !a.checked_sub(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_sub(b).is_some()")
        };
    };
    (self.data.len() as u32).wrapping_sub(ERROR_VALUE_NEG_DATA_OFFSET)
}w!((self.data.len() as u32) - ERROR_VALUE_NEG_DATA_OFFSET)
432    }
433
434    fn internal_small_index(&self, code_point: u32) -> u32 {
435        // We use wrapping arithmetic here to avoid overflow checks making their way into binaries
436        // with overflow checks enabled. Ultimately this code ends up as a checked index, so any
437        // bugs here will cause GIGO
438        let mut index1_pos: u32 = code_point >> SHIFT_1;
439        if self.header.trie_type == TrieType::Fast {
440            if true {
    if !(FAST_TYPE_FAST_INDEXING_MAX < code_point &&
                code_point < self.header.high_start) {
        ::core::panicking::panic("assertion failed: FAST_TYPE_FAST_INDEXING_MAX < code_point &&\n    code_point < self.header.high_start")
    };
};debug_assert!(
441                FAST_TYPE_FAST_INDEXING_MAX < code_point && code_point < self.header.high_start
442            );
443            index1_pos = {
    #[allow(unused_parens)]
    let a = index1_pos;
    let b = BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index1_pos.wrapping_add(BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH)
}w!(index1_pos + BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH);
444        } else {
445            if !(code_point < self.header.high_start &&
            self.header.high_start > SMALL_LIMIT) {
    ::core::panicking::panic("assertion failed: code_point < self.header.high_start && self.header.high_start > SMALL_LIMIT")
};assert!(code_point < self.header.high_start && self.header.high_start > SMALL_LIMIT);
446            index1_pos = {
    #[allow(unused_parens)]
    let a = index1_pos;
    let b = SMALL_INDEX_LENGTH;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index1_pos.wrapping_add(SMALL_INDEX_LENGTH)
}w!(index1_pos + SMALL_INDEX_LENGTH);
447        }
448        let index1_val = if let Some(index1_val) = self.index.get(index1_pos as usize) {
449            index1_val
450        } else {
451            return self.trie_error_val_index();
452        };
453        let index3_block_idx: u32 =
454            {
    #[allow(unused_parens)]
    let a = (index1_val as u32);
    let b = (code_point >> SHIFT_2) & INDEX_2_MASK;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    (index1_val as u32).wrapping_add((code_point >> SHIFT_2) & INDEX_2_MASK)
}w!((index1_val as u32) + (code_point >> SHIFT_2) & INDEX_2_MASK);
455        let mut index3_block: u32 =
456            if let Some(index3_block) = self.index.get(index3_block_idx as usize) {
457                index3_block as u32
458            } else {
459                return self.trie_error_val_index();
460            };
461        let mut index3_pos: u32 = (code_point >> SHIFT_3) & INDEX_3_MASK;
462        let mut data_block: u32;
463        if index3_block & 0x8000 == 0 {
464            // 16-bit indexes
465            data_block =
466                if let Some(data_block) = self.index.get({
    #[allow(unused_parens)]
    let a = index3_block;
    let b = index3_pos;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index3_block.wrapping_add(index3_pos)
}w!(index3_block + index3_pos) as usize) {
467                    data_block as u32
468                } else {
469                    return self.trie_error_val_index();
470                };
471        } else {
472            // 18-bit indexes stored in groups of 9 entries per 8 indexes.
473            index3_block = {
    #[allow(unused_parens)]
    let a = (index3_block & 0x7fff);
    let b =
        {
            #[allow(unused_parens)]
            let a = (index3_pos & !7);
            let b = index3_pos >> 3;
            if true {
                if !a.checked_add(b).is_some() {
                    ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
                };
            };
            (index3_pos & !7).wrapping_add(index3_pos >> 3)
        };
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    (index3_block &
                0x7fff).wrapping_add({
            #[allow(unused_parens)]
            let a = (index3_pos & !7);
            let b = index3_pos >> 3;
            if true {
                if !a.checked_add(b).is_some() {
                    ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
                };
            };
            (index3_pos & !7).wrapping_add(index3_pos >> 3)
        })
}w!((index3_block & 0x7fff) + w!((index3_pos & !7) + index3_pos >> 3));
474            index3_pos &= 7;
475            data_block = if let Some(data_block) = self.index.get(index3_block as usize) {
476                data_block as u32
477            } else {
478                return self.trie_error_val_index();
479            };
480            data_block = (data_block << {
    #[allow(unused_parens)]
    let a = 2u32;
    let b =
        {
            #[allow(unused_parens)]
            let a = 2u32;
            let b = index3_pos;
            if true {
                if !a.checked_mul(b).is_some() {
                    ::core::panicking::panic("assertion failed: a.checked_mul(b).is_some()")
                };
            };
            2u32.wrapping_mul(index3_pos)
        };
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    2u32.wrapping_add({
            #[allow(unused_parens)]
            let a = 2u32;
            let b = index3_pos;
            if true {
                if !a.checked_mul(b).is_some() {
                    ::core::panicking::panic("assertion failed: a.checked_mul(b).is_some()")
                };
            };
            2u32.wrapping_mul(index3_pos)
        })
}w!(2u32 + w!(2u32 * index3_pos))) & 0x30000;
481            index3_block += 1;
482            data_block =
483                if let Some(index3_val) = self.index.get({
    #[allow(unused_parens)]
    let a = index3_block;
    let b = index3_pos;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index3_block.wrapping_add(index3_pos)
}w!(index3_block + index3_pos) as usize) {
484                    data_block | (index3_val as u32)
485                } else {
486                    return self.trie_error_val_index();
487                };
488        }
489        // Returns data_pos == data_block (offset) +
490        //     portion of code_point bit field for last (4th) lookup
491        {
    #[allow(unused_parens)]
    let a = data_block;
    let b = code_point & SMALL_DATA_MASK;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    data_block.wrapping_add(code_point & SMALL_DATA_MASK)
}w!(data_block + code_point & SMALL_DATA_MASK)
492    }
493
494    /// Returns the position in the `data` array for the given code point,
495    /// where this code point is at or above the fast limit associated for the
496    /// `trie_type`. We will refer to that limit as "`fastMax`" here.
497    ///
498    /// A lookup of the value in the code point trie for a code point in the
499    /// code point space range [`fastMax`, `high_start`) will be a 4-step
500    /// lookup: 3 lookups in the `index` array and one lookup in the `data`
501    /// array. Lookups for code points in the range [`high_start`,
502    /// `CODE_POINT_MAX`] are short-circuited to be a single lookup, see
503    /// [`CodePointTrieHeader::high_start`].
504    fn small_index(&self, code_point: u32) -> u32 {
505        if code_point >= self.header.high_start {
506            {
    #[allow(unused_parens)]
    let a = (self.data.len() as u32);
    let b = HIGH_VALUE_NEG_DATA_OFFSET;
    if true {
        if !a.checked_sub(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_sub(b).is_some()")
        };
    };
    (self.data.len() as u32).wrapping_sub(HIGH_VALUE_NEG_DATA_OFFSET)
}w!((self.data.len() as u32) - HIGH_VALUE_NEG_DATA_OFFSET)
507        } else {
508            self.internal_small_index(code_point) // helper fn
509        }
510    }
511
512    /// Returns the position in the `data` array for the given code point,
513    /// where this code point is below the fast limit associated for the
514    /// `trie type`. We will refer to that limit as "`fastMax`" here.
515    ///
516    /// A lookup of the value in the code point trie for a code point in the
517    /// code point space range [0, `fastMax`) will be a 2-step lookup: 1
518    /// lookup in the `index` array and one lookup in the `data` array. By
519    /// design, for trie type `T`, there is an element allocated in the `index`
520    /// array for each block of code points in [0, `fastMax`), which in
521    /// turn guarantees that those code points are represented and only need 1
522    /// lookup.
523    fn fast_index(&self, code_point: u32) -> u32 {
524        let index_array_pos: u32 = code_point >> FAST_TYPE_SHIFT;
525        let index_array_val: u16 =
526            if let Some(index_array_val) = self.index.get(index_array_pos as usize) {
527                index_array_val
528            } else {
529                return self.trie_error_val_index();
530            };
531        let masked_cp = code_point & FAST_TYPE_DATA_MASK;
532        let index_array_val = index_array_val as u32;
533        let fast_index_val: u32 = {
    #[allow(unused_parens)]
    let a = index_array_val;
    let b = masked_cp;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index_array_val.wrapping_add(masked_cp)
}w!(index_array_val + masked_cp);
534        fast_index_val
535    }
536
537    /// Returns the value that is associated with `code_point` in this [`CodePointTrie`]
538    /// if `code_point` uses fast-path lookup or `None` if `code_point`
539    /// should use small-path lookup or is above the supported range.
540    #[inline(always)] // "always" to make the `Option` collapse away
541    fn get32_by_fast_index(&self, code_point: u32) -> Option<T> {
542        let fast_max = match self.header.trie_type {
543            TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
544            TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
545        };
546        if code_point <= fast_max {
547            // SAFETY: We just checked the invariant of
548            // `get32_assuming_fast_index`,
549            // which is
550            // "If `self.header.trie_type == TrieType::Small`, `code_point` must be at most
551            // `SMALL_TYPE_FAST_INDEXING_MAX`. If `self.header.trie_type ==
552            // TrieType::Fast`, `code_point` must be at most `FAST_TYPE_FAST_INDEXING_MAX`."
553            Some(unsafe { self.get32_assuming_fast_index(code_point) })
554        } else {
555            // The caller needs to call `get32_by_small_index` or determine
556            // that the argument is above the permitted range.
557            None
558        }
559    }
560
561    /// Performs the actual fast-mode lookup
562    ///
563    /// # Safety
564    ///
565    /// If `self.header.trie_type == TrieType::Small`, `code_point` must be at most
566    /// `SMALL_TYPE_FAST_INDEXING_MAX`. If `self.header.trie_type ==
567    /// TrieType::Fast`, `code_point` must be at most `FAST_TYPE_FAST_INDEXING_MAX`.
568    #[inline(always)]
569    unsafe fn get32_assuming_fast_index(&self, code_point: u32) -> T {
570        // Check the safety invariant of this method.
571        if true {
    if !(code_point <=
                match self.header.trie_type {
                    TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
                    TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
                }) {
        ::core::panicking::panic("assertion failed: code_point <=\n    match self.header.trie_type {\n        TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,\n        TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,\n    }")
    };
};debug_assert!(
572            code_point
573                <= match self.header.trie_type {
574                    TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
575                    TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
576                }
577        );
578
579        let bit_prefix = (code_point as usize) >> FAST_TYPE_SHIFT;
580        if true {
    if !(bit_prefix < self.index.len()) {
        ::core::panicking::panic("assertion failed: bit_prefix < self.index.len()")
    };
};debug_assert!(bit_prefix < self.index.len());
581        // SAFETY: Relying on the length invariant of `self.index` having
582        // been checked and on the unchangedness invariant of `self.index`
583        // and `self.header.trie_type` after construction.
584        let base_offset_to_data: usize = usize::from(u16::from_unaligned(*unsafe {
585            self.index.as_ule_slice().get_unchecked(bit_prefix)
586        }));
587        let bit_suffix = (code_point & FAST_TYPE_DATA_MASK) as usize;
588        // SAFETY: Cannot overflow with supported (32-bit and 64-bit) `usize`
589        // sizes, since `base_offset_to_data` was extended from `u16` and
590        // `bit_suffix` is at most `FAST_TYPE_DATA_MASK`, which is well
591        // under what it takes to reach the 32-bit (or 64-bit) max with
592        // additon from the max of `u16`.
593        let offset_to_data = {
    #[allow(unused_parens)]
    let a = base_offset_to_data;
    let b = bit_suffix;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    base_offset_to_data.wrapping_add(bit_suffix)
}w!(base_offset_to_data + bit_suffix);
594        if true {
    if !(offset_to_data < self.data.len()) {
        ::core::panicking::panic("assertion failed: offset_to_data < self.data.len()")
    };
};debug_assert!(offset_to_data < self.data.len());
595        // SAFETY: Relying on the length invariant of `self.data` having
596        // been checked and on the unchangedness invariant of `self.data`,
597        // `self.index`, and `self.header.trie_type` after construction.
598        T::from_unaligned(*unsafe { self.data.as_ule_slice().get_unchecked(offset_to_data) })
599    }
600
601    /// Coldness wrapper for `get32_by_small_index` to also allow
602    /// calls without the effects of `#[cold]`.
603    #[cold]
604    #[inline(always)]
605    fn get32_by_small_index_cold(&self, code_point: u32) -> T {
606        self.get32_by_small_index(code_point)
607    }
608
609    /// Returns the value that is associated with `code_point` in this [`CodePointTrie`]
610    /// assuming that the small index path should be used.
611    ///
612    /// # Intended Precondition
613    ///
614    /// `code_point` must be at most `CODE_POINT_MAX` AND greter than
615    /// `FAST_TYPE_FAST_INDEXING_MAX` if the trie type is fast or greater
616    /// than `SMALL_TYPE_FAST_INDEXING_MAX` if the trie type is small.
617    /// This is checked when debug assertions are enabled. If this
618    /// precondition is violated, the behavior of this method is
619    /// memory-safe, but the returned value may be bogus (not
620    /// necessarily the designated error value).
621    #[inline(never)]
622    fn get32_by_small_index(&self, code_point: u32) -> T {
623        if true {
    if !(code_point <= CODE_POINT_MAX) {
        ::core::panicking::panic("assertion failed: code_point <= CODE_POINT_MAX")
    };
};debug_assert!(code_point <= CODE_POINT_MAX);
624        if true {
    if !(code_point >
                match self.header.trie_type {
                    TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
                    TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
                }) {
        ::core::panicking::panic("assertion failed: code_point >\n    match self.header.trie_type {\n        TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,\n        TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,\n    }")
    };
};debug_assert!(
625            code_point
626                > match self.header.trie_type {
627                    TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
628                    TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
629                }
630        );
631        self.data
632            .get(self.small_index(code_point) as usize)
633            .unwrap_or(self.error_value)
634    }
635
636    /// Returns the value that is associated with `code_point` in this [`CodePointTrie`].
637    ///
638    /// # Examples
639    ///
640    /// ```
641    /// use icu::collections::codepointtrie::planes;
642    /// let trie = planes::get_planes_trie();
643    ///
644    /// assert_eq!(0, trie.get32(0x41)); // 'A' as u32
645    /// assert_eq!(0, trie.get32(0x13E0)); // 'Ꮰ' as u32
646    /// assert_eq!(1, trie.get32(0x10044)); // '𐁄' as u32
647    /// ```
648    #[inline(always)] // `always` based on normalizer benchmarking
649    pub fn get32(&self, code_point: u32) -> T {
650        if let Some(v) = self.get32_by_fast_index(code_point) {
651            v
652        } else if code_point <= CODE_POINT_MAX {
653            self.get32_by_small_index_cold(code_point)
654        } else {
655            self.error_value
656        }
657    }
658
659    /// Returns the value that is associated with `char` in this [`CodePointTrie`].
660    ///
661    /// # Examples
662    ///
663    /// ```
664    /// use icu::collections::codepointtrie::planes;
665    /// let trie = planes::get_planes_trie();
666    ///
667    /// assert_eq!(0, trie.get('A')); // 'A' as u32
668    /// assert_eq!(0, trie.get('Ꮰ')); // 'Ꮰ' as u32
669    /// assert_eq!(1, trie.get('𐁄')); // '𐁄' as u32
670    /// ```
671    #[inline(always)]
672    pub fn get(&self, c: char) -> T {
673        // LLVM's optimizations have been observed not to be 100%
674        // reliable around collapsing away unnecessary parts of
675        // `get32`, so not just calling `get32` here.
676        let code_point = u32::from(c);
677        if let Some(v) = self.get32_by_fast_index(code_point) {
678            v
679        } else {
680            self.get32_by_small_index_cold(code_point)
681        }
682    }
683
684    /// Returns the value that is associated with `bmp` in this [`CodePointTrie`].
685    #[inline(always)]
686    pub fn get16(&self, bmp: u16) -> T {
687        // LLVM's optimizations have been observed not to be 100%
688        // reliable around collapsing away unnecessary parts of
689        // `get32`, so not just calling `get32` here.
690        let code_point = u32::from(bmp);
691        if let Some(v) = self.get32_by_fast_index(code_point) {
692            v
693        } else {
694            self.get32_by_small_index_cold(code_point)
695        }
696    }
697
698    /// Lookup trie value by non-Basic Multilingual Plane Scalar Value.
699    ///
700    /// The return value may be bogus (not necessarily `error_value`) is the argument is actually in
701    /// the Basic Multilingual Plane or above the Unicode Scalar Value
702    /// range (panics instead with debug assertions enabled).
703    #[inline(always)]
704    pub fn get32_supplementary(&self, supplementary: u32) -> T {
705        if true {
    if !(supplementary > 0xFFFF) {
        ::core::panicking::panic("assertion failed: supplementary > 0xFFFF")
    };
};debug_assert!(supplementary > 0xFFFF);
706        if true {
    if !(supplementary <= CODE_POINT_MAX) {
        ::core::panicking::panic("assertion failed: supplementary <= CODE_POINT_MAX")
    };
};debug_assert!(supplementary <= CODE_POINT_MAX);
707        self.get32_by_small_index(supplementary)
708    }
709
710    /// Returns a reference to the ULE of the value that is associated with `code_point` in this [`CodePointTrie`].
711    ///
712    /// # Examples
713    ///
714    /// ```
715    /// use icu::collections::codepointtrie::planes;
716    /// let trie = planes::get_planes_trie();
717    ///
718    /// assert_eq!(Some(&0), trie.get32_ule(0x41)); // 'A' as u32
719    /// assert_eq!(Some(&0), trie.get32_ule(0x13E0)); // 'Ꮰ' as u32
720    /// assert_eq!(Some(&1), trie.get32_ule(0x10044)); // '𐁄' as u32
721    /// ```
722    #[inline(always)] // `always` was based on previous normalizer benchmarking
723    pub fn get32_ule(&self, code_point: u32) -> Option<&T::ULE> {
724        // All code points up to the fast max limit are represented
725        // individually in the `index` array to hold their `data` array position, and
726        // thus only need 2 lookups for a [CodePointTrie::get()](`crate::codepointtrie::CodePointTrie::get`).
727        // Code points above the "fast max" limit require 4 lookups.
728        let fast_max = match self.header.trie_type {
729            TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
730            TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
731        };
732        let data_pos: u32 = if code_point <= fast_max {
733            Self::fast_index(self, code_point)
734        } else if code_point <= CODE_POINT_MAX {
735            Self::small_index(self, code_point)
736        } else {
737            self.trie_error_val_index()
738        };
739        // Returns the trie value (or trie's error value).
740        self.data.as_ule_slice().get(data_pos as usize)
741    }
742
743    /// Converts the [`CodePointTrie`] into one that returns another type of the same size.
744    ///
745    /// Borrowed data remains borrowed, and owned data remains owned.
746    ///
747    /// If the old and new types are not the same size, use
748    /// [`CodePointTrie::try_alloc_map_value`].
749    ///
750    /// # Panics
751    ///
752    /// Panics if `T` and `P` are different sizes.
753    ///
754    /// More specifically, panics if [`ZeroVec::try_into_converted()`] panics when converting
755    /// `ZeroVec<T>` into `ZeroVec<P>`, which happens if `T::ULE` and `P::ULE` differ in size.
756    ///
757    /// ✨ *Enabled with the `alloc` Cargo feature.*
758    ///
759    /// # Examples
760    ///
761    /// ```no_run
762    /// use icu::collections::codepointtrie::planes;
763    /// use icu::collections::codepointtrie::CodePointTrie;
764    ///
765    /// let planes_trie_u8: CodePointTrie<u8> = planes::get_planes_trie();
766    /// let planes_trie_i8: CodePointTrie<i8> =
767    ///     planes_trie_u8.try_into_converted().expect("infallible");
768    ///
769    /// assert_eq!(planes_trie_i8.get32(0x30000), 3);
770    /// ```
771    #[cfg(feature = "alloc")]
772    pub fn try_into_converted<P>(self) -> Result<CodePointTrie<'trie, P>, UleError>
773    where
774        P: TrieValue,
775    {
776        let converted_data = self.data.try_into_converted()?;
777        let error_ule = self.error_value.to_unaligned();
778        let slice = &[error_ule];
779        let error_vec = ZeroVec::<T>::new_borrowed(slice);
780        let error_converted = error_vec.try_into_converted::<P>()?;
781        #[expect(clippy::expect_used)] // we know this cannot fail
782        Ok(CodePointTrie {
783            header: self.header,
784            index: self.index,
785            data: converted_data,
786            error_value: error_converted
787                .get(0)
788                .expect("vector known to have one element"),
789        })
790    }
791
792    /// Maps the [`CodePointTrie`] into one that returns a different type.
793    ///
794    /// This function returns owned data.
795    ///
796    /// If the old and new types are the same size, use the more efficient
797    /// [`CodePointTrie::try_into_converted`].
798    ///
799    /// ✨ *Enabled with the `alloc` Cargo feature.*
800    ///
801    /// # Examples
802    ///
803    /// ```
804    /// use icu::collections::codepointtrie::planes;
805    /// use icu::collections::codepointtrie::CodePointTrie;
806    ///
807    /// let planes_trie_u8: CodePointTrie<u8> = planes::get_planes_trie();
808    /// let planes_trie_u16: CodePointTrie<u16> = planes_trie_u8
809    ///     .try_alloc_map_value(TryFrom::try_from)
810    ///     .expect("infallible");
811    ///
812    /// assert_eq!(planes_trie_u16.get32(0x30000), 3);
813    /// ```
814    #[cfg(feature = "alloc")]
815    pub fn try_alloc_map_value<P, E>(
816        &self,
817        mut f: impl FnMut(T) -> Result<P, E>,
818    ) -> Result<CodePointTrie<'trie, P>, E>
819    where
820        P: TrieValue,
821    {
822        let error_converted = f(self.error_value)?;
823        let converted_data = self.data.iter().map(f).collect::<Result<ZeroVec<P>, E>>()?;
824        Ok(CodePointTrie {
825            header: self.header,
826            index: self.index.clone(),
827            data: converted_data,
828            error_value: error_converted,
829        })
830    }
831
832    /// Returns a [`CodePointMapRange`] struct which represents a range of code
833    /// points associated with the same trie value. The returned range will be
834    /// the longest stretch of consecutive code points starting at `start` that
835    /// share this value.
836    ///
837    /// This method is designed to use the internal details of
838    /// the structure of [`CodePointTrie`] to be optimally efficient. This will
839    /// outperform a naive approach that just uses [`CodePointTrie::get()`].
840    ///
841    /// This method provides lower-level functionality that can be used in the
842    /// implementation of other methods that are more convenient to the user.
843    /// To obtain an optimal partition of the code point space for
844    /// this trie resulting in the fewest number of ranges, see
845    /// [`CodePointTrie::iter_ranges()`].
846    ///
847    /// # Examples
848    ///
849    /// ```
850    /// use icu::collections::codepointtrie::planes;
851    ///
852    /// let trie = planes::get_planes_trie();
853    ///
854    /// const CODE_POINT_MAX: u32 = 0x10ffff;
855    /// let start = 0x1_0000;
856    /// let exp_end = 0x1_ffff;
857    ///
858    /// let start_val = trie.get32(start);
859    /// assert_eq!(trie.get32(exp_end), start_val);
860    /// assert_ne!(trie.get32(exp_end + 1), start_val);
861    ///
862    /// use icu::collections::codepointtrie::CodePointMapRange;
863    ///
864    /// let cpm_range: CodePointMapRange<u8> = trie.get_range(start).unwrap();
865    ///
866    /// assert_eq!(cpm_range.range.start(), &start);
867    /// assert_eq!(cpm_range.range.end(), &exp_end);
868    /// assert_eq!(cpm_range.value, start_val);
869    ///
870    /// // `start` can be any code point, whether or not it lies on the boundary
871    /// // of a maximally large range that still contains `start`
872    ///
873    /// let submaximal_1_start = start + 0x1234;
874    /// let submaximal_1 = trie.get_range(submaximal_1_start).unwrap();
875    /// assert_eq!(submaximal_1.range.start(), &0x1_1234);
876    /// assert_eq!(submaximal_1.range.end(), &0x1_ffff);
877    /// assert_eq!(submaximal_1.value, start_val);
878    ///
879    /// let submaximal_2_start = start + 0xffff;
880    /// let submaximal_2 = trie.get_range(submaximal_2_start).unwrap();
881    /// assert_eq!(submaximal_2.range.start(), &0x1_ffff);
882    /// assert_eq!(submaximal_2.range.end(), &0x1_ffff);
883    /// assert_eq!(submaximal_2.value, start_val);
884    /// ```
885    pub fn get_range(&self, start: u32) -> Option<CodePointMapRange<T>> {
886        // Exit early if the start code point is out of range, or if it is
887        // in the last range of code points in high_start..=CODE_POINT_MAX
888        // (start- and end-inclusive) that all share the same trie value.
889        if CODE_POINT_MAX < start {
890            return None;
891        }
892        if start >= self.header.high_start {
893            let di: usize = self.data.len() - (HIGH_VALUE_NEG_DATA_OFFSET as usize);
894            let value: T = self.data.get(di)?;
895            return Some(CodePointMapRange {
896                range: start..=CODE_POINT_MAX,
897                value,
898            });
899        }
900
901        let null_value: T = T::try_from_u32(self.header.null_value).ok()?;
902
903        let mut prev_i3_block: u32 = u32::MAX; // using u32::MAX (instead of -1 as an i32 in ICU)
904        let mut prev_block: u32 = u32::MAX; // using u32::MAX (instead of -1 as an i32 in ICU)
905        let mut c: u32 = start;
906        let mut trie_value: T = self.error_value();
907        let mut value: T = self.error_value();
908        let mut have_value: bool = false;
909
910        loop {
911            let i3_block: u32;
912            let mut i3: u32;
913            let i3_block_length: u32;
914            let data_block_length: u32;
915
916            // Initialize values before beginning the iteration in the subsequent
917            // `loop` block. In particular, use the "i3*" local variables
918            // (representing the `index` array position's offset + increment
919            // for a 3rd-level trie lookup) to help initialize the data block
920            // variable `block` in the loop for the `data` array.
921            //
922            // When a lookup code point is <= the trie's *_FAST_INDEXING_MAX that
923            // corresponds to its `trie_type`, the lookup only takes 2 steps
924            // (once into the `index`, once into the `data` array); otherwise,
925            // takes 4 steps (3 iterative lookups into the `index`, once more
926            // into the `data` array). So for convenience's sake, when we have the
927            // 2-stage lookup, reuse the "i3*" variable names for the first lookup.
928            if c <= 0xffff
929                && (self.header.trie_type == TrieType::Fast || c <= SMALL_TYPE_FAST_INDEXING_MAX)
930            {
931                i3_block = 0;
932                i3 = c >> FAST_TYPE_SHIFT;
933                i3_block_length = if self.header.trie_type == TrieType::Fast {
934                    BMP_INDEX_LENGTH
935                } else {
936                    SMALL_INDEX_LENGTH
937                };
938                data_block_length = FAST_TYPE_DATA_BLOCK_LENGTH;
939            } else {
940                // Use the multi-stage index.
941                let mut i1: u32 = c >> SHIFT_1;
942                if self.header.trie_type == TrieType::Fast {
943                    if true {
    if !(0xffff < c && c < self.header.high_start) {
        ::core::panicking::panic("assertion failed: 0xffff < c && c < self.header.high_start")
    };
};debug_assert!(0xffff < c && c < self.header.high_start);
944                    i1 = i1 + BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
945                } else {
946                    if true {
    if !(c < self.header.high_start && self.header.high_start > SMALL_LIMIT) {
        ::core::panicking::panic("assertion failed: c < self.header.high_start && self.header.high_start > SMALL_LIMIT")
    };
};debug_assert!(
947                        c < self.header.high_start && self.header.high_start > SMALL_LIMIT
948                    );
949                    i1 += SMALL_INDEX_LENGTH;
950                }
951                let i2: u16 = self.index.get(i1 as usize)?;
952                let i3_block_idx: u32 = (i2 as u32) + ((c >> SHIFT_2) & INDEX_2_MASK);
953                i3_block = if let Some(i3b) = self.index.get(i3_block_idx as usize) {
954                    i3b as u32
955                } else {
956                    return None;
957                };
958                if i3_block == prev_i3_block && (c - start) >= CP_PER_INDEX_2_ENTRY {
959                    // The index-3 block is the same as the previous one, and filled with value.
960                    if true {
    if !((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0) {
        ::core::panicking::panic("assertion failed: (c & (CP_PER_INDEX_2_ENTRY - 1)) == 0")
    };
};debug_assert!((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0);
961                    c += CP_PER_INDEX_2_ENTRY;
962
963                    if c >= self.header.high_start {
964                        break;
965                    } else {
966                        continue;
967                    }
968                }
969                prev_i3_block = i3_block;
970                if i3_block == self.header.index3_null_offset as u32 {
971                    // This is the index-3 null block.
972                    // All of the `data` array blocks pointed to by the values
973                    // in this block of the `index` 3rd-stage subarray will
974                    // contain this trie's null_value. So if we are in the middle
975                    // of a range, end it and return early, otherwise start a new
976                    // range of null values.
977                    if have_value {
978                        if null_value != value {
979                            return Some(CodePointMapRange {
980                                range: start..=(c - 1),
981                                value,
982                            });
983                        }
984                    } else {
985                        trie_value = T::try_from_u32(self.header.null_value).ok()?;
986                        value = null_value;
987                        have_value = true;
988                    }
989                    prev_block = self.header.data_null_offset;
990                    c = (c + CP_PER_INDEX_2_ENTRY) & !(CP_PER_INDEX_2_ENTRY - 1);
991
992                    if c >= self.header.high_start {
993                        break;
994                    } else {
995                        continue;
996                    }
997                }
998                i3 = (c >> SHIFT_3) & INDEX_3_MASK;
999                i3_block_length = INDEX_3_BLOCK_LENGTH;
1000                data_block_length = SMALL_DATA_BLOCK_LENGTH;
1001            }
1002
1003            // Enumerate data blocks for one index-3 block.
1004            loop {
1005                let mut block: u32;
1006                if (i3_block & 0x8000) == 0 {
1007                    block = if let Some(b) = self.index.get((i3_block + i3) as usize) {
1008                        b as u32
1009                    } else {
1010                        return None;
1011                    };
1012                } else {
1013                    // 18-bit indexes stored in groups of 9 entries per 8 indexes.
1014                    let mut group: u32 = (i3_block & 0x7fff) + (i3 & !7) + (i3 >> 3);
1015                    let gi: u32 = i3 & 7;
1016                    let gi_val: u32 = if let Some(giv) = self.index.get(group as usize) {
1017                        giv.into()
1018                    } else {
1019                        return None;
1020                    };
1021                    block = (gi_val << (2 + (2 * gi))) & 0x30000;
1022                    group += 1;
1023                    let ggi_val: u32 = if let Some(ggiv) = self.index.get((group + gi) as usize) {
1024                        ggiv as u32
1025                    } else {
1026                        return None;
1027                    };
1028                    block |= ggi_val;
1029                }
1030
1031                // If our previous and current return values of the 3rd-stage `index`
1032                // lookup yield the same `data` block offset, and if we already know that
1033                // the entire `data` block / subarray starting at that offset stores
1034                // `value` and nothing else, then we can extend our range by the length
1035                // of a data block and continue.
1036                // Otherwise, we have to iterate over the values stored in the
1037                // new data block to see if they differ from `value`.
1038                if block == prev_block && (c - start) >= data_block_length {
1039                    // The block is the same as the previous one, and filled with value.
1040                    if true {
    if !((c & (data_block_length - 1)) == 0) {
        ::core::panicking::panic("assertion failed: (c & (data_block_length - 1)) == 0")
    };
};debug_assert!((c & (data_block_length - 1)) == 0);
1041                    c += data_block_length;
1042                } else {
1043                    let data_mask: u32 = data_block_length - 1;
1044                    prev_block = block;
1045                    if block == self.header.data_null_offset {
1046                        // This is the data null block.
1047                        // If we are in the middle of a range, end it and
1048                        // return early, otherwise start a new range of null
1049                        // values.
1050                        if have_value {
1051                            if null_value != value {
1052                                return Some(CodePointMapRange {
1053                                    range: start..=(c - 1),
1054                                    value,
1055                                });
1056                            }
1057                        } else {
1058                            trie_value = T::try_from_u32(self.header.null_value).ok()?;
1059                            value = null_value;
1060                            have_value = true;
1061                        }
1062                        c = (c + data_block_length) & !data_mask;
1063                    } else {
1064                        let mut di: u32 = block + (c & data_mask);
1065                        let mut trie_value_2: T = self.data.get(di as usize)?;
1066                        if have_value {
1067                            if trie_value_2 != trie_value {
1068                                if maybe_filter_value(
1069                                    trie_value_2,
1070                                    T::try_from_u32(self.header.null_value).ok()?,
1071                                    null_value,
1072                                ) != value
1073                                {
1074                                    return Some(CodePointMapRange {
1075                                        range: start..=(c - 1),
1076                                        value,
1077                                    });
1078                                }
1079                                // `trie_value` stores the previous value that was retrieved
1080                                // from the trie.
1081                                // `value` stores the value associated for the range (return
1082                                // value) that we are currently building, which is computed
1083                                // as a transformation by applying maybe_filter_value()
1084                                // to the trie value.
1085                                // The current trie value `trie_value_2` within this data block
1086                                // differs here from the previous value in `trie_value`.
1087                                // But both map to `value` after applying `maybe_filter_value`.
1088                                // It is not clear whether the previous or the current trie value
1089                                // (or neither) is more likely to match potential subsequent trie
1090                                // values that would extend the range by mapping to `value`.
1091                                // On the assumption of locality -- often times consecutive
1092                                // characters map to the same trie values -- remembering the new
1093                                // one might make it faster to extend this range further
1094                                // (by increasing the chance that the next `trie_value_2 !=
1095                                // trie_value` test will be false).
1096                                trie_value = trie_value_2; // may or may not help
1097                            }
1098                        } else {
1099                            trie_value = trie_value_2;
1100                            value = maybe_filter_value(
1101                                trie_value_2,
1102                                T::try_from_u32(self.header.null_value).ok()?,
1103                                null_value,
1104                            );
1105                            have_value = true;
1106                        }
1107
1108                        c += 1;
1109                        while (c & data_mask) != 0 {
1110                            di += 1;
1111                            trie_value_2 = self.data.get(di as usize)?;
1112                            if trie_value_2 != trie_value {
1113                                if maybe_filter_value(
1114                                    trie_value_2,
1115                                    T::try_from_u32(self.header.null_value).ok()?,
1116                                    null_value,
1117                                ) != value
1118                                {
1119                                    return Some(CodePointMapRange {
1120                                        range: start..=(c - 1),
1121                                        value,
1122                                    });
1123                                }
1124                                // `trie_value` stores the previous value that was retrieved
1125                                // from the trie.
1126                                // `value` stores the value associated for the range (return
1127                                // value) that we are currently building, which is computed
1128                                // as a transformation by applying maybe_filter_value()
1129                                // to the trie value.
1130                                // The current trie value `trie_value_2` within this data block
1131                                // differs here from the previous value in `trie_value`.
1132                                // But both map to `value` after applying `maybe_filter_value`.
1133                                // It is not clear whether the previous or the current trie value
1134                                // (or neither) is more likely to match potential subsequent trie
1135                                // values that would extend the range by mapping to `value`.
1136                                // On the assumption of locality -- often times consecutive
1137                                // characters map to the same trie values -- remembering the new
1138                                // one might make it faster to extend this range further
1139                                // (by increasing the chance that the next `trie_value_2 !=
1140                                // trie_value` test will be false).
1141                                trie_value = trie_value_2; // may or may not help
1142                            }
1143
1144                            c += 1;
1145                        }
1146                    }
1147                }
1148
1149                i3 += 1;
1150                if i3 >= i3_block_length {
1151                    break;
1152                }
1153            }
1154
1155            if c >= self.header.high_start {
1156                break;
1157            }
1158        }
1159
1160        if true {
    if !have_value {
        ::core::panicking::panic("assertion failed: have_value")
    };
};debug_assert!(have_value);
1161
1162        // Now that c >= high_start, compare `value` to `high_value` to see
1163        // if we can merge our current range with the high_value range
1164        // high_start..=CODE_POINT_MAX (start- and end-inclusive), otherwise
1165        // stop at high_start - 1.
1166        let di: u32 = self.data.len() as u32 - HIGH_VALUE_NEG_DATA_OFFSET;
1167        let high_value: T = self.data.get(di as usize)?;
1168        if maybe_filter_value(
1169            high_value,
1170            T::try_from_u32(self.header.null_value).ok()?,
1171            null_value,
1172        ) != value
1173        {
1174            c -= 1;
1175        } else {
1176            c = CODE_POINT_MAX;
1177        }
1178        Some(CodePointMapRange {
1179            range: start..=c,
1180            value,
1181        })
1182    }
1183
1184    /// Yields an [`Iterator`] returning ranges of consecutive code points that
1185    /// share the same value in the [`CodePointTrie`], as given by
1186    /// [`CodePointTrie::get_range()`].
1187    ///
1188    /// # Examples
1189    ///
1190    /// ```
1191    /// use core::ops::RangeInclusive;
1192    /// use icu::collections::codepointtrie::planes;
1193    /// use icu::collections::codepointtrie::CodePointMapRange;
1194    ///
1195    /// let planes_trie = planes::get_planes_trie();
1196    ///
1197    /// let mut ranges = planes_trie.iter_ranges();
1198    ///
1199    /// for plane in 0..=16 {
1200    ///     let exp_start = plane * 0x1_0000;
1201    ///     let exp_end = exp_start + 0xffff;
1202    ///     assert_eq!(
1203    ///         ranges.next(),
1204    ///         Some(CodePointMapRange {
1205    ///             range: exp_start..=exp_end,
1206    ///             value: plane as u8
1207    ///         })
1208    ///     );
1209    /// }
1210    ///
1211    /// // Hitting the end of the iterator returns `None`, as will subsequent
1212    /// // calls to .next().
1213    /// assert_eq!(ranges.next(), None);
1214    /// assert_eq!(ranges.next(), None);
1215    /// ```
1216    pub fn iter_ranges(&self) -> CodePointMapRangeIterator<'_, T> {
1217        let init_range = Some(CodePointMapRange {
1218            range: u32::MAX..=u32::MAX,
1219            value: self.error_value(),
1220        });
1221        CodePointMapRangeIterator::<T> {
1222            cpt: self,
1223            cpm_range: init_range,
1224        }
1225    }
1226
1227    /// Yields an [`Iterator`] returning the ranges of the code points whose values
1228    /// match `value` in the [`CodePointTrie`].
1229    ///
1230    /// # Examples
1231    ///
1232    /// ```
1233    /// use icu::collections::codepointtrie::planes;
1234    ///
1235    /// let trie = planes::get_planes_trie();
1236    ///
1237    /// let plane_val = 2;
1238    /// let mut sip_range_iter = trie.iter_ranges_for_value(plane_val as u8);
1239    ///
1240    /// let start = plane_val * 0x1_0000;
1241    /// let end = start + 0xffff;
1242    ///
1243    /// let sip_range = sip_range_iter.next()
1244    ///     .expect("Plane 2 (SIP) should exist in planes data");
1245    /// assert_eq!(start..=end, sip_range);
1246    ///
1247    /// assert!(sip_range_iter.next().is_none());
1248    pub fn iter_ranges_for_value(
1249        &self,
1250        value: T,
1251    ) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
1252        self.iter_ranges()
1253            .filter(move |cpm_range| cpm_range.value == value)
1254            .map(|cpm_range| cpm_range.range)
1255    }
1256
1257    /// Yields an [`Iterator`] returning the ranges of the code points after passing
1258    /// the value through a mapping function.
1259    ///
1260    /// This is preferable to calling `.get_ranges().map()` since it will coalesce
1261    /// adjacent ranges into one.
1262    ///
1263    /// # Examples
1264    ///
1265    /// ```
1266    /// use icu::collections::codepointtrie::planes;
1267    ///
1268    /// let trie = planes::get_planes_trie();
1269    ///
1270    /// let plane_val = 2;
1271    /// let mut sip_range_iter = trie.iter_ranges_mapped(|value| value != plane_val as u8).filter(|range| range.value);
1272    ///
1273    /// let end = plane_val * 0x1_0000 - 1;
1274    ///
1275    /// let sip_range = sip_range_iter.next()
1276    ///     .expect("Complemented planes data should have at least one entry");
1277    /// assert_eq!(0..=end, sip_range.range);
1278    pub fn iter_ranges_mapped<'a, U: Eq + 'a>(
1279        &'a self,
1280        mut map: impl FnMut(T) -> U + Copy + 'a,
1281    ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a {
1282        crate::iterator_utils::RangeListIteratorCoalescer::new(self.iter_ranges().map(
1283            move |range| CodePointMapRange {
1284                range: range.range,
1285                value: map(range.value),
1286            },
1287        ))
1288    }
1289
1290    /// Returns a [`CodePointInversionList`] for the code points that have the given
1291    /// [`TrieValue`] in the trie.
1292    ///
1293    /// ✨ *Enabled with the `alloc` Cargo feature.*
1294    ///
1295    /// # Examples
1296    ///
1297    /// ```
1298    /// use icu::collections::codepointtrie::planes;
1299    ///
1300    /// let trie = planes::get_planes_trie();
1301    ///
1302    /// let plane_val = 2;
1303    /// let sip = trie.get_set_for_value(plane_val as u8);
1304    ///
1305    /// let start = plane_val * 0x1_0000;
1306    /// let end = start + 0xffff;
1307    ///
1308    /// assert!(!sip.contains32(start - 1));
1309    /// assert!(sip.contains32(start));
1310    /// assert!(sip.contains32(end));
1311    /// assert!(!sip.contains32(end + 1));
1312    /// ```
1313    #[cfg(feature = "alloc")]
1314    pub fn get_set_for_value(&self, value: T) -> CodePointInversionList<'static> {
1315        let value_ranges = self.iter_ranges_for_value(value);
1316        CodePointInversionList::from_iter(value_ranges)
1317    }
1318
1319    /// Returns the value used as an error value for this trie
1320    #[inline]
1321    pub fn error_value(&self) -> T {
1322        self.error_value
1323    }
1324}
1325
1326#[cfg(feature = "databake")]
1327impl<T: TrieValue + databake::Bake> databake::Bake for CodePointTrie<'_, T> {
1328    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
1329        let header = self.header.bake(env);
1330        let index = self.index.bake(env);
1331        let data = self.data.bake(env);
1332        let error_value = self.error_value.bake(env);
1333        databake::quote! { unsafe { icu_collections::codepointtrie::CodePointTrie::from_parts_unstable_unchecked_v1(#header, #index, #data, #error_value) } }
1334    }
1335}
1336
1337#[cfg(feature = "databake")]
1338impl<T: TrieValue + databake::Bake> databake::BakeSize for CodePointTrie<'_, T> {
1339    fn borrows_size(&self) -> usize {
1340        self.header.borrows_size() + self.index.borrows_size() + self.data.borrows_size()
1341    }
1342}
1343
1344impl<T: TrieValue + Into<u32>> CodePointTrie<'_, T> {
1345    /// Returns the value that is associated with `code_point` for this [`CodePointTrie`]
1346    /// as a `u32`.
1347    ///
1348    /// # Examples
1349    ///
1350    /// ```
1351    /// use icu::collections::codepointtrie::planes;
1352    /// let trie = planes::get_planes_trie();
1353    ///
1354    /// let cp = '𑖎' as u32;
1355    /// assert_eq!(cp, 0x1158E);
1356    ///
1357    /// let plane_num: u8 = trie.get32(cp);
1358    /// assert_eq!(trie.get32_u32(cp), plane_num as u32);
1359    /// ```
1360    // Note: This API method maintains consistency with the corresponding
1361    // original ICU APIs.
1362    pub fn get32_u32(&self, code_point: u32) -> u32 {
1363        self.get32(code_point).into()
1364    }
1365}
1366
1367impl<T: TrieValue> Clone for CodePointTrie<'_, T>
1368where
1369    <T as AsULE>::ULE: Clone,
1370{
1371    fn clone(&self) -> Self {
1372        CodePointTrie {
1373            header: self.header,
1374            index: self.index.clone(),
1375            data: self.data.clone(),
1376            error_value: self.error_value,
1377        }
1378    }
1379}
1380
1381/// Represents a range of consecutive code points sharing the same value in a
1382/// code point map.
1383///
1384/// The start and end of the interval is represented as a
1385/// `RangeInclusive<u32>`, and the value is represented as `T`.
1386#[derive(#[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl<T: ::core::cmp::PartialEq> ::core::cmp::PartialEq for
    CodePointMapRange<T> {
    #[inline]
    fn eq(&self, other: &CodePointMapRange<T>) -> bool {
        self.range == other.range && self.value == other.value
    }
}PartialEq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl<T: ::core::cmp::Eq> ::core::cmp::Eq for CodePointMapRange<T> {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<RangeInclusive<u32>>;
        let _: ::core::cmp::AssertParamIsEq<T>;
    }
}Eq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl<T: ::core::fmt::Debug> ::core::fmt::Debug for CodePointMapRange<T> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field2_finish(f,
            "CodePointMapRange", "range", &self.range, "value", &&self.value)
    }
}Debug, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl<T: ::core::clone::Clone> ::core::clone::Clone for CodePointMapRange<T> {
    #[inline]
    fn clone(&self) -> CodePointMapRange<T> {
        CodePointMapRange {
            range: ::core::clone::Clone::clone(&self.range),
            value: ::core::clone::Clone::clone(&self.value),
        }
    }
}Clone)]
1387#[allow(clippy::exhaustive_structs)] // based on a stable serialized form
1388pub struct CodePointMapRange<T> {
1389    /// Range of code points from start to end (inclusive).
1390    pub range: RangeInclusive<u32>,
1391    /// Trie value associated with this range.
1392    pub value: T,
1393}
1394
1395/// A custom [`Iterator`] type specifically for a code point trie that returns
1396/// [`CodePointMapRange`]s.
1397#[derive(#[automatically_derived]
impl<'a, T: ::core::fmt::Debug + TrieValue> ::core::fmt::Debug for
    CodePointMapRangeIterator<'a, T> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field2_finish(f,
            "CodePointMapRangeIterator", "cpt", &self.cpt, "cpm_range",
            &&self.cpm_range)
    }
}Debug)]
1398pub struct CodePointMapRangeIterator<'a, T: TrieValue> {
1399    cpt: &'a CodePointTrie<'a, T>,
1400    // Initialize `range` to Some(CodePointMapRange{ start: u32::MAX, end: u32::MAX, value: 0}).
1401    // When `range` is Some(...) and has a start value different from u32::MAX, then we have
1402    // returned at least one code point range due to a call to `next()`.
1403    // When `range` == `None`, it means that we have hit the end of iteration. It would occur
1404    // after a call to `next()` returns a None <=> we attempted to call `get_range()`
1405    // with a start code point that is > CODE_POINT_MAX.
1406    cpm_range: Option<CodePointMapRange<T>>,
1407}
1408
1409impl<T: TrieValue> Iterator for CodePointMapRangeIterator<'_, T> {
1410    type Item = CodePointMapRange<T>;
1411
1412    fn next(&mut self) -> Option<Self::Item> {
1413        self.cpm_range = match &self.cpm_range {
1414            Some(cpmr) => {
1415                if *cpmr.range.start() == u32::MAX {
1416                    self.cpt.get_range(0)
1417                } else {
1418                    self.cpt.get_range(cpmr.range.end() + 1)
1419                }
1420            }
1421            None => None,
1422        };
1423        // Note: Clone is cheap. We can't Copy because RangeInclusive does not impl Copy.
1424        self.cpm_range.clone()
1425    }
1426}
1427
1428/// For sealing `TypedCodePointTrie`
1429///
1430/// # Safety Usable Invariant
1431///
1432/// All implementations of `TypedCodePointTrie` are reviewable in this module.
1433trait Seal {}
1434
1435/// Trait for writing trait bounds for monomorphizing over either
1436/// `FastCodePointTrie` or `SmallCodePointTrie`.
1437#[allow(private_bounds)] // Permit sealing
1438pub trait TypedCodePointTrie<'trie, T: TrieValue>: Seal {
1439    /// The `TrieType` associated with this `TypedCodePointTrie`
1440    ///
1441    /// # Safety Usable Invariant
1442    ///
1443    /// This constant matches `self.as_untyped_ref().header.trie_type`.
1444    const TRIE_TYPE: TrieType;
1445
1446    /// Lookup trie value as `u32` by Unicode Scalar Value without branching on trie type.
1447    #[inline(always)]
1448    fn get32_u32(&self, code_point: u32) -> u32 {
1449        self.get32(code_point).to_u32()
1450    }
1451
1452    /// Lookup trie value by Basic Multilingual Plane Code Point without branching on trie type.
1453    #[inline(always)]
1454    fn get16(&self, bmp: u16) -> T {
1455        // LLVM's optimizations have been observed not to be 100%
1456        // reliable around collapsing away unnecessary parts of
1457        // `get32`, so not just calling `get32` here.
1458        let code_point = u32::from(bmp);
1459        if let Some(v) = self.get32_by_fast_index(code_point) {
1460            v
1461        } else {
1462            self.as_untyped_ref().get32_by_small_index_cold(code_point)
1463        }
1464    }
1465
1466    /// Lookup trie value by non-Basic Multilingual Plane Scalar Value without branching on trie type.
1467    #[inline(always)]
1468    fn get32_supplementary(&self, supplementary: u32) -> T {
1469        self.as_untyped_ref().get32_supplementary(supplementary)
1470    }
1471
1472    /// Lookup trie value by Unicode Scalar Value without branching on trie type.
1473    #[inline(always)]
1474    fn get(&self, c: char) -> T {
1475        // LLVM's optimizations have been observed not to be 100%
1476        // reliable around collapsing away unnecessary parts of
1477        // `get32`, so not just calling `get32` here.
1478        let code_point = u32::from(c);
1479        if let Some(v) = self.get32_by_fast_index(code_point) {
1480            v
1481        } else {
1482            self.as_untyped_ref().get32_by_small_index_cold(code_point)
1483        }
1484    }
1485
1486    /// Lookup trie value by Unicode Code Point without branching on trie type.
1487    #[inline(always)]
1488    fn get32(&self, code_point: u32) -> T {
1489        if let Some(v) = self.get32_by_fast_index(code_point) {
1490            v
1491        } else if code_point <= CODE_POINT_MAX {
1492            self.as_untyped_ref().get32_by_small_index_cold(code_point)
1493        } else {
1494            self.as_untyped_ref().error_value
1495        }
1496    }
1497
1498    /// Returns the value that is associated with `code_point` in this [`CodePointTrie`]
1499    /// if `code_point` uses fast-path lookup or `None` if `code_point`
1500    /// should use small-path lookup or is above the supported range.
1501    #[inline(always)] // "always" to make the `Option` collapse away
1502    fn get32_by_fast_index(&self, code_point: u32) -> Option<T> {
1503        if true {
    match (&Self::TRIE_TYPE, &self.as_untyped_ref().header.trie_type) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(Self::TRIE_TYPE, self.as_untyped_ref().header.trie_type);
1504        let fast_max = match Self::TRIE_TYPE {
1505            TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
1506            TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
1507        };
1508        if code_point <= fast_max {
1509            // SAFETY: We just checked the invariant of
1510            // `get32_assuming_fast_index`,
1511            // which is
1512            // "If `self.header.trie_type == TrieType::Small`, `code_point` must be at most
1513            // `SMALL_TYPE_FAST_INDEXING_MAX`. If `self.header.trie_type ==
1514            // TrieType::Fast`, `code_point` must be at most `FAST_TYPE_FAST_INDEXING_MAX`."
1515            // ... assuming that `Self::TRIE_TYPE` always matches
1516            // `self.as_untyped_ref().header.trie_type`, i.e. we're relying on
1517            // `CodePointTrie::to_typed` and `CodePointTrie::as_typed_ref` being correct
1518            // and the exclusive ways of obtaining `Self`.
1519            Some(unsafe { self.as_untyped_ref().get32_assuming_fast_index(code_point) })
1520        } else {
1521            // The caller needs to call `get32_by_small_index` or determine
1522            // that the argument is above the permitted range.
1523            None
1524        }
1525    }
1526
1527    /// Returns a reference to the wrapped `CodePointTrie`.
1528    fn as_untyped_ref(&self) -> &CodePointTrie<'trie, T>;
1529
1530    /// Extracts the wrapped `CodePointTrie`.
1531    fn to_untyped(self) -> CodePointTrie<'trie, T>;
1532}
1533
1534/// Type-safe wrapper for a fast trie guaranteeing
1535/// the the getters don't branch on the trie type
1536/// and for guarenteeing that `get16` is branchless
1537/// in release builds.
1538#[derive(#[automatically_derived]
impl<'trie, T: ::core::fmt::Debug + TrieValue> ::core::fmt::Debug for
    FastCodePointTrie<'trie, T> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field1_finish(f,
            "FastCodePointTrie", "inner", &&self.inner)
    }
}Debug)]
1539#[repr(transparent)]
1540pub struct FastCodePointTrie<'trie, T: TrieValue> {
1541    inner: CodePointTrie<'trie, T>,
1542}
1543
1544impl<'trie, T: TrieValue> TypedCodePointTrie<'trie, T> for FastCodePointTrie<'trie, T> {
1545    const TRIE_TYPE: TrieType = TrieType::Fast;
1546
1547    /// Returns a reference to the wrapped `CodePointTrie`.
1548    #[inline(always)]
1549    fn as_untyped_ref(&self) -> &CodePointTrie<'trie, T> {
1550        &self.inner
1551    }
1552
1553    /// Extracts the wrapped `CodePointTrie`.
1554    #[inline(always)]
1555    fn to_untyped(self) -> CodePointTrie<'trie, T> {
1556        self.inner
1557    }
1558
1559    /// Lookup trie value by Basic Multilingual Plane Code Point without branching on trie type.
1560    #[inline(always)]
1561    fn get16(&self, bmp: u16) -> T {
1562        if true {
    if !(u32::from(u16::MAX) <= FAST_TYPE_FAST_INDEXING_MAX) {
        ::core::panicking::panic("assertion failed: u32::from(u16::MAX) <= FAST_TYPE_FAST_INDEXING_MAX")
    };
};debug_assert!(u32::from(u16::MAX) <= FAST_TYPE_FAST_INDEXING_MAX);
1563        if true {
    match (&Self::TRIE_TYPE, &TrieType::Fast) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(Self::TRIE_TYPE, TrieType::Fast);
1564        if true {
    match (&self.as_untyped_ref().header.trie_type, &TrieType::Fast) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(self.as_untyped_ref().header.trie_type, TrieType::Fast);
1565        let code_point = u32::from(bmp);
1566        // SAFETY: With `TrieType::Fast`, the `u16` range satisfies
1567        // the invariant of `get32_assuming_fast_index`, which is
1568        // "If `self.header.trie_type == TrieType::Small`, `code_point` must be at most
1569        // `SMALL_TYPE_FAST_INDEXING_MAX`. If `self.header.trie_type ==
1570        // TrieType::Fast`, `code_point` must be at most `FAST_TYPE_FAST_INDEXING_MAX`."
1571        //
1572        // We're relying on `CodePointTrie::to_typed` and `CodePointTrie::as_typed_ref`
1573        // being correct and the exclusive ways of obtaining `Self`.
1574        unsafe { self.as_untyped_ref().get32_assuming_fast_index(code_point) }
1575    }
1576}
1577
1578impl<'trie, T: TrieValue> Seal for FastCodePointTrie<'trie, T> {}
1579
1580impl<'trie, T: TrieValue> TryFrom<&'trie CodePointTrie<'trie, T>>
1581    for &'trie FastCodePointTrie<'trie, T>
1582{
1583    type Error = TypedCodePointTrieError;
1584
1585    fn try_from(
1586        reference: &'trie CodePointTrie<'trie, T>,
1587    ) -> Result<&'trie FastCodePointTrie<'trie, T>, TypedCodePointTrieError> {
1588        match reference.as_typed_ref() {
1589            Typed::Fast(trie) => Ok(trie),
1590            Typed::Small(_) => Err(TypedCodePointTrieError),
1591        }
1592    }
1593}
1594
1595impl<'trie, T: TrieValue> TryFrom<CodePointTrie<'trie, T>> for FastCodePointTrie<'trie, T> {
1596    type Error = TypedCodePointTrieError;
1597
1598    fn try_from(
1599        value: CodePointTrie<'trie, T>,
1600    ) -> Result<FastCodePointTrie<'trie, T>, TypedCodePointTrieError> {
1601        match value.to_typed() {
1602            Typed::Fast(trie) => Ok(trie),
1603            Typed::Small(_) => Err(TypedCodePointTrieError),
1604        }
1605    }
1606}
1607
1608/// Type-safe wrapper for a small trie guaranteeing
1609/// the the getters don't branch on the trie type.
1610#[derive(#[automatically_derived]
impl<'trie, T: ::core::fmt::Debug + TrieValue> ::core::fmt::Debug for
    SmallCodePointTrie<'trie, T> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field1_finish(f,
            "SmallCodePointTrie", "inner", &&self.inner)
    }
}Debug)]
1611#[repr(transparent)]
1612pub struct SmallCodePointTrie<'trie, T: TrieValue> {
1613    inner: CodePointTrie<'trie, T>,
1614}
1615
1616impl<'trie, T: TrieValue> TypedCodePointTrie<'trie, T> for SmallCodePointTrie<'trie, T> {
1617    const TRIE_TYPE: TrieType = TrieType::Small;
1618
1619    /// Returns a reference to the wrapped `CodePointTrie`.
1620    #[inline(always)]
1621    fn as_untyped_ref(&self) -> &CodePointTrie<'trie, T> {
1622        &self.inner
1623    }
1624
1625    /// Extracts the wrapped `CodePointTrie`.
1626    #[inline(always)]
1627    fn to_untyped(self) -> CodePointTrie<'trie, T> {
1628        self.inner
1629    }
1630}
1631
1632impl<'trie, T: TrieValue> Seal for SmallCodePointTrie<'trie, T> {}
1633
1634impl<'trie, T: TrieValue> TryFrom<&'trie CodePointTrie<'trie, T>>
1635    for &'trie SmallCodePointTrie<'trie, T>
1636{
1637    type Error = TypedCodePointTrieError;
1638
1639    fn try_from(
1640        reference: &'trie CodePointTrie<'trie, T>,
1641    ) -> Result<&'trie SmallCodePointTrie<'trie, T>, TypedCodePointTrieError> {
1642        match reference.as_typed_ref() {
1643            Typed::Fast(_) => Err(TypedCodePointTrieError),
1644            Typed::Small(trie) => Ok(trie),
1645        }
1646    }
1647}
1648
1649impl<'trie, T: TrieValue> TryFrom<CodePointTrie<'trie, T>> for SmallCodePointTrie<'trie, T> {
1650    type Error = TypedCodePointTrieError;
1651
1652    fn try_from(
1653        value: CodePointTrie<'trie, T>,
1654    ) -> Result<SmallCodePointTrie<'trie, T>, TypedCodePointTrieError> {
1655        match value.to_typed() {
1656            Typed::Fast(_) => Err(TypedCodePointTrieError),
1657            Typed::Small(trie) => Ok(trie),
1658        }
1659    }
1660}
1661
1662/// Error indicating that the `TrieType` of an untyped trie
1663/// does not match the requested typed trie type.
1664#[derive(#[automatically_derived]
impl ::core::fmt::Debug for TypedCodePointTrieError {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f, "TypedCodePointTrieError")
    }
}Debug)]
1665#[non_exhaustive]
1666pub struct TypedCodePointTrieError;
1667
1668/// Holder for either fast or small trie with the trie
1669/// type encoded into the Rust type.
1670#[allow(clippy::exhaustive_enums)]
1671#[derive(#[automatically_derived]
#[allow(clippy::exhaustive_enums)]
impl<F: ::core::fmt::Debug, S: ::core::fmt::Debug> ::core::fmt::Debug for
    Typed<F, S> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            Typed::Fast(__self_0) =>
                ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Fast",
                    &__self_0),
            Typed::Small(__self_0) =>
                ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Small",
                    &__self_0),
        }
    }
}Debug)]
1672pub enum Typed<F, S> {
1673    /// The trie type is fast.
1674    Fast(F),
1675    /// The trie type is small.
1676    Small(S),
1677}
1678
1679#[cfg(test)]
1680mod tests {
1681    use super::*;
1682    use crate::codepointtrie::planes;
1683    use alloc::vec::Vec;
1684
1685    #[test]
1686    #[cfg(feature = "serde")]
1687    fn test_serde_with_postcard_roundtrip() -> Result<(), postcard::Error> {
1688        let trie = planes::get_planes_trie();
1689        let trie_serialized: Vec<u8> = postcard::to_allocvec(&trie).unwrap();
1690
1691        // Assert an expected (golden data) version of the serialized trie.
1692        const EXP_TRIE_SERIALIZED: &[u8] = &[
1693            128, 128, 64, 128, 2, 2, 0, 0, 1, 160, 18, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1694            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1695            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1696            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1697            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136,
1698            2, 144, 2, 144, 2, 144, 2, 176, 2, 176, 2, 176, 2, 176, 2, 208, 2, 208, 2, 208, 2, 208,
1699            2, 240, 2, 240, 2, 240, 2, 240, 2, 16, 3, 16, 3, 16, 3, 16, 3, 48, 3, 48, 3, 48, 3, 48,
1700            3, 80, 3, 80, 3, 80, 3, 80, 3, 112, 3, 112, 3, 112, 3, 112, 3, 144, 3, 144, 3, 144, 3,
1701            144, 3, 176, 3, 176, 3, 176, 3, 176, 3, 208, 3, 208, 3, 208, 3, 208, 3, 240, 3, 240, 3,
1702            240, 3, 240, 3, 16, 4, 16, 4, 16, 4, 16, 4, 48, 4, 48, 4, 48, 4, 48, 4, 80, 4, 80, 4,
1703            80, 4, 80, 4, 112, 4, 112, 4, 112, 4, 112, 4, 0, 0, 16, 0, 32, 0, 48, 0, 64, 0, 80, 0,
1704            96, 0, 112, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32,
1705            0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48,
1706            0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 128, 0, 128, 0, 128, 0, 128,
1707            0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
1708            0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
1709            0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144,
1710            0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144,
1711            0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144,
1712            0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160,
1713            0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160,
1714            0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160,
1715            0, 160, 0, 160, 0, 160, 0, 160, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176,
1716            0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176,
1717            0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176,
1718            0, 176, 0, 176, 0, 176, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192,
1719            0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192,
1720            0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192,
1721            0, 192, 0, 192, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208,
1722            0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208,
1723            0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208,
1724            0, 208, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224,
1725            0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224,
1726            0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224,
1727            0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240,
1728            0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240,
1729            0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 0,
1730            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
1731            0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1732            1, 0, 1, 0, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1,
1733            16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16,
1734            1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 32, 1, 32, 1, 32, 1,
1735            32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32,
1736            1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1,
1737            32, 1, 32, 1, 32, 1, 32, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48,
1738            1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1,
1739            48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 64, 1, 64,
1740            1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1,
1741            64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64,
1742            1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1,
1743            80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80,
1744            1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1,
1745            96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96,
1746            1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1,
1747            96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 128, 0, 136, 0, 136, 0, 136, 0, 136,
1748            0, 136, 0, 136, 0, 136, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0,
1749            2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2,
1750            0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0,
1751            168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0,
1752            168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0,
1753            168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0,
1754            200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0,
1755            200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0,
1756            200, 0, 200, 0, 200, 0, 200, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0,
1757            232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0,
1758            232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0,
1759            232, 0, 232, 0, 232, 0, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8,
1760            1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1,
1761            8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40,
1762            1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1,
1763            40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40,
1764            1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1,
1765            72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72,
1766            1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 104, 1, 104, 1, 104, 1, 104, 1,
1767            104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1,
1768            104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1,
1769            104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1,
1770            136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1,
1771            136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1,
1772            136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1,
1773            168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1,
1774            168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1,
1775            168, 1, 168, 1, 168, 1, 168, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1,
1776            200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1,
1777            200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1,
1778            200, 1, 200, 1, 200, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1,
1779            232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1,
1780            232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1,
1781            232, 1, 232, 1, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2,
1782            8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8,
1783            2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40,
1784            2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2,
1785            40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 72,
1786            2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2,
1787            72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72,
1788            2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2,
1789            104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2,
1790            104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2,
1791            104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 244, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1792            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1793            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1794            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1795            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1796            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1797            2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1798            4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
1799            6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
1800            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10,
1801            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11,
1802            11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
1803            12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14,
1804            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1805            15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 0,
1806        ];
1807        assert_eq!(trie_serialized, EXP_TRIE_SERIALIZED);
1808
1809        let trie_deserialized = postcard::from_bytes::<CodePointTrie<u8>>(&trie_serialized)?;
1810
1811        assert_eq!(&trie.index, &trie_deserialized.index);
1812        assert_eq!(&trie.data, &trie_deserialized.data);
1813
1814        assert!(!trie_deserialized.index.is_owned());
1815        assert!(!trie_deserialized.data.is_owned());
1816
1817        Ok(())
1818    }
1819
1820    #[test]
1821    fn test_typed() {
1822        let untyped = planes::get_planes_trie();
1823        assert_eq!(untyped.get('\u{10000}'), 1);
1824        let small_ref = <&SmallCodePointTrie<_>>::try_from(&untyped).unwrap();
1825        assert_eq!(small_ref.get('\u{10000}'), 1);
1826        let _ = <&FastCodePointTrie<_>>::try_from(&untyped).is_err();
1827        let small = <SmallCodePointTrie<_>>::try_from(untyped).unwrap();
1828        assert_eq!(small.get('\u{10000}'), 1);
1829    }
1830
1831    #[test]
1832    fn test_get_range() {
1833        let planes_trie = planes::get_planes_trie();
1834
1835        let first_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0x0);
1836        assert_eq!(
1837            first_range,
1838            Some(CodePointMapRange {
1839                range: 0x0..=0xffff,
1840                value: 0
1841            })
1842        );
1843
1844        let second_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0x1_0000);
1845        assert_eq!(
1846            second_range,
1847            Some(CodePointMapRange {
1848                range: 0x10000..=0x1ffff,
1849                value: 1
1850            })
1851        );
1852
1853        let penultimate_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0xf_0000);
1854        assert_eq!(
1855            penultimate_range,
1856            Some(CodePointMapRange {
1857                range: 0xf_0000..=0xf_ffff,
1858                value: 15
1859            })
1860        );
1861
1862        let last_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0x10_0000);
1863        assert_eq!(
1864            last_range,
1865            Some(CodePointMapRange {
1866                range: 0x10_0000..=0x10_ffff,
1867                value: 16
1868            })
1869        );
1870    }
1871
1872    #[test]
1873    #[allow(unused_unsafe)] // `unsafe` below is both necessary and unnecessary
1874    fn databake() {
1875        databake::test_bake!(
1876            CodePointTrie<'static, u32>,
1877            const,
1878            unsafe {
1879                crate::codepointtrie::CodePointTrie::from_parts_unstable_unchecked_v1(
1880                    crate::codepointtrie::CodePointTrieHeader {
1881                        high_start: 1u32,
1882                        shifted12_high_start: 2u16,
1883                        index3_null_offset: 3u16,
1884                        data_null_offset: 4u32,
1885                        null_value: 5u32,
1886                        trie_type: crate::codepointtrie::TrieType::Small,
1887                    },
1888                    zerovec::ZeroVec::new(),
1889                    zerovec::ZeroVec::new(),
1890                    0u32,
1891                )
1892            },
1893            icu_collections,
1894            [zerovec],
1895        );
1896    }
1897}