icu_collections/codepointtrie/
cptrie.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::codepointtrie::error::Error;
6use crate::codepointtrie::impl_const::*;
7
8#[cfg(feature = "alloc")]
9use crate::codepointinvlist::CodePointInversionList;
10use core::char::CharTryFromError;
11use core::convert::Infallible;
12use core::convert::TryFrom;
13use core::fmt::Display;
14#[cfg(feature = "alloc")]
15use core::iter::FromIterator;
16use core::num::TryFromIntError;
17use core::ops::RangeInclusive;
18use yoke::Yokeable;
19use zerofrom::ZeroFrom;
20use zerovec::ule::AsULE;
21#[cfg(feature = "alloc")]
22use zerovec::ule::UleError;
23use zerovec::ZeroSlice;
24use zerovec::ZeroVec;
25
26/// The type of trie represents whether the trie has an optimization that
27/// would make it smaller or faster.
28///
29/// Regarding performance, a trie being a small or fast type affects the number of array lookups
30/// needed for code points in the range `[0x1000, 0x10000)`. In this range, `Small` tries use 4 array lookups,
31/// while `Fast` tries use 2 array lookups.
32/// Code points before the interval (in `[0, 0x1000)`) will always use 2 array lookups.
33/// Code points after the interval (in `[0x10000, 0x10FFFF]`) will always use 4 array lookups.
34///
35/// Regarding size, `Fast` type tries are larger than `Small` type tries because the minimum size of
36/// the index array is larger. The minimum size is the "fast max" limit, which is the limit of the range
37/// of code points with 2 array lookups.
38///
39/// See the document [Unicode Properties and Code Point Tries in ICU4X](https://github.com/unicode-org/icu4x/blob/main/documents/design/properties_code_point_trie.md).
40///
41/// Also see [`UCPTrieType`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
42#[derive(#[automatically_derived]
impl ::core::clone::Clone for TrieType {
    #[inline]
    fn clone(&self) -> TrieType { *self }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for TrieType { }Copy, #[automatically_derived]
impl ::core::cmp::PartialEq for TrieType {
    #[inline]
    fn eq(&self, other: &TrieType) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr
    }
}PartialEq, #[automatically_derived]
impl ::core::fmt::Debug for TrieType {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f,
            match self {
                TrieType::Fast => "Fast",
                TrieType::Small => "Small",
            })
    }
}Debug, #[automatically_derived]
impl ::core::cmp::Eq for TrieType {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {}
}Eq)]
43#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
44#[cfg_attr(feature = "databake", derive(databake::Bake))]
45#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
46pub enum TrieType {
47    /// Represents the "fast" type code point tries for the
48    /// [`TrieType`] trait. The "fast max" limit is set to `0xffff`.
49    Fast = 0,
50    /// Represents the "small" type code point tries for the
51    /// [`TrieType`] trait. The "fast max" limit is set to `0x0fff`.
52    Small = 1,
53}
54
55// TrieValue trait
56
57// AsULE is AsUnalignedLittleEndian, i.e. "allowed in a zerovec"
58
59/// A trait representing the values stored in the data array of a [`CodePointTrie`].
60/// This trait is used as a type parameter in constructing a `CodePointTrie`.
61///
62/// This trait can be implemented on anything that can be represented as a u32s worth of data.
63pub trait TrieValue: Copy + Eq + PartialEq + zerovec::ule::AsULE + 'static {
64    /// Last-resort fallback value to return if we cannot read data from the trie.
65    ///
66    /// In most cases, the error value is read from the last element of the `data` array,
67    /// this value is used for empty codepointtrie arrays
68    /// Error type when converting from a u32 to this `TrieValue`.
69    type TryFromU32Error: Display;
70    /// A parsing function that is primarily motivated by deserialization contexts.
71    /// When the serialization type width is smaller than 32 bits, then it is expected
72    /// that the call site will widen the value to a `u32` first.
73    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>;
74
75    /// A method for converting back to a `u32` that can roundtrip through
76    /// [`Self::try_from_u32()`]. The default implementation of this trait
77    /// method panics in debug mode and returns 0 in release mode.
78    ///
79    /// This method is allowed to have GIGO behavior when fed a value that has
80    /// no corresponding `u32` (since such values cannot be stored in the trie)
81    fn to_u32(self) -> u32;
82}
83
84macro_rules! impl_primitive_trie_value {
85    ($primitive:ty, $error:ty) => {
86        impl TrieValue for $primitive {
87            type TryFromU32Error = $error;
88            fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
89                Self::try_from(i)
90            }
91
92            fn to_u32(self) -> u32 {
93                // bitcast when the same size, zero-extend/sign-extend
94                // when not the same size
95                self as u32
96            }
97        }
98    };
99}
100
101impl TrieValue for u8 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(u8, TryFromIntError);
102impl TrieValue for u16 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(u16, TryFromIntError);
103impl TrieValue for u32 {
    type TryFromU32Error = Infallible;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(u32, Infallible);
104impl TrieValue for i8 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(i8, TryFromIntError);
105impl TrieValue for i16 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(i16, TryFromIntError);
106impl TrieValue for i32 {
    type TryFromU32Error = TryFromIntError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(i32, TryFromIntError);
107impl TrieValue for char {
    type TryFromU32Error = CharTryFromError;
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
        Self::try_from(i)
    }
    fn to_u32(self) -> u32 { self as u32 }
}impl_primitive_trie_value!(char, CharTryFromError);
108
109/// Helper function used by [`get_range`]. Converts occurrences of trie's null
110/// value into the provided `null_value`.
111///
112/// Note: the ICU version of this helper function uses a `ValueFilter` function
113/// to apply a transform on a non-null value. But currently, this implementation
114/// stops short of that functionality, and instead leaves the non-null trie value
115/// untouched. This is equivalent to having a `ValueFilter` function that is the
116/// identity function.
117fn maybe_filter_value<T: TrieValue>(value: T, trie_null_value: T, null_value: T) -> T {
118    if value == trie_null_value {
119        null_value
120    } else {
121        value
122    }
123}
124
125/// This struct represents a de-serialized [`CodePointTrie`] that was exported from
126/// ICU binary data.
127///
128/// For more information:
129/// - [ICU Site design doc](http://site.icu-project.org/design/struct/utrie)
130/// - [ICU User Guide section on Properties lookup](https://unicode-org.github.io/icu/userguide/strings/properties.html#lookup)
131// serde impls in crate::serde
132#[derive(#[automatically_derived]
impl<'trie, T: ::core::fmt::Debug + TrieValue> ::core::fmt::Debug for
    CodePointTrie<'trie, T> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field4_finish(f, "CodePointTrie",
            "header", &self.header, "index", &self.index, "data", &self.data,
            "error_value", &&self.error_value)
    }
}Debug, #[automatically_derived]
impl<'trie, T: ::core::cmp::Eq + TrieValue> ::core::cmp::Eq for
    CodePointTrie<'trie, T> {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<CodePointTrieHeader>;
        let _: ::core::cmp::AssertParamIsEq<ZeroVec<'trie, u16>>;
        let _: ::core::cmp::AssertParamIsEq<ZeroVec<'trie, T>>;
        let _: ::core::cmp::AssertParamIsEq<T>;
    }
}Eq, #[automatically_derived]
impl<'trie, T: ::core::cmp::PartialEq + TrieValue> ::core::cmp::PartialEq for
    CodePointTrie<'trie, T> {
    #[inline]
    fn eq(&self, other: &CodePointTrie<'trie, T>) -> bool {
        self.header == other.header && self.index == other.index &&
                self.data == other.data &&
            self.error_value == other.error_value
    }
}PartialEq, unsafe impl<'a, T: TrieValue> yoke::Yokeable<'a> for CodePointTrie<'static, T>
    where T: 'static {
    type Output = CodePointTrie<'a, T>;
    #[inline]
    fn transform(&'a self) -> &'a Self::Output { self }
    #[inline]
    fn transform_owned(self) -> Self::Output { self }
    #[inline]
    unsafe fn make(this: Self::Output) -> Self {
        use core::{mem, ptr};
        if true {
            if !(mem::size_of::<Self::Output>() == mem::size_of::<Self>()) {
                ::core::panicking::panic("assertion failed: mem::size_of::<Self::Output>() == mem::size_of::<Self>()")
            };
        };
        let ptr: *const Self = (&this as *const Self::Output).cast();

        #[allow(forgetting_copy_types, clippy :: forget_copy, clippy ::
        forget_non_drop, clippy :: mem_forget)]
        mem::forget(this);
        ptr::read(ptr)
    }
    #[inline]
    fn transform_mut<F>(&'a mut self, f: F) where F: 'static +
        for<'b> FnOnce(&'b mut Self::Output) {
        unsafe {
            f(core::mem::transmute::<&'a mut Self,
                        &'a mut Self::Output>(self))
        }
    }
}Yokeable, impl<'zf, 'zf_inner, T: TrieValue>
    zerofrom::ZeroFrom<'zf, CodePointTrie<'zf_inner, T>> for
    CodePointTrie<'zf, T> where
    ZeroVec<'zf, T>: zerofrom::ZeroFrom<'zf, ZeroVec<'zf_inner, T>> {
    fn zero_from(this: &'zf CodePointTrie<'zf_inner, T>) -> Self {
        match *this {
            CodePointTrie {
                header: ref __binding_0,
                index: ref __binding_1,
                data: ref __binding_2,
                error_value: ref __binding_3 } => {
                CodePointTrie {
                    header: *__binding_0,
                    index: <ZeroVec<'zf, u16> as
                            zerofrom::ZeroFrom<'zf,
                            ZeroVec<'zf_inner, u16>>>::zero_from(__binding_1),
                    data: <ZeroVec<'zf, T> as
                            zerofrom::ZeroFrom<'zf,
                            ZeroVec<'zf_inner, T>>>::zero_from(__binding_2),
                    error_value: __binding_3.clone(),
                }
            }
        }
    }
}ZeroFrom)]
133pub struct CodePointTrie<'trie, T: TrieValue> {
134    /// # Safety Invariant
135    ///
136    /// The value of `header.trie_type` must not change after construction.
137    pub(crate) header: CodePointTrieHeader,
138    /// # Safety Invariant
139    ///
140    /// If `header.trie_type == TrieType::Fast`, `index.len()` must be greater
141    /// than `FAST_TYPE_FAST_INDEXING_MAX`. Otherwise, `index.len()`
142    /// must be greater than `SMALL_TYPE_FAST_INDEXING_MAX`. Furthermore,
143    /// this field must not change after construction. (Strictly: It must
144    /// not become shorter than the length requirement stated above and the
145    /// values within the prefix up to the length requirement must not change.)
146    pub(crate) index: ZeroVec<'trie, u16>,
147    /// # Safety Invariant
148    ///
149    /// If `header.trie_type == TrieType::Fast`, `data.len()` must be greater
150    /// than `FAST_TYPE_DATA_MASK` plus the largest value in
151    /// `index[0..FAST_TYPE_FAST_INDEXING_MAX + 1]`. Otherwise, `data.len()`
152    /// must be greater than `FAST_TYPE_DATA_MASK` plus the largest value in
153    /// `index[0..SMALL_TYPE_FAST_INDEXING_MAX + 1]`. Furthermore, this field
154    /// must not change after construction. (Strictly: The stated length
155    /// requirement must continue to hold.)
156    pub(crate) data: ZeroVec<'trie, T>,
157    // serde impl skips this field
158    #[zerofrom(clone)] // TrieValue is Copy, this allows us to avoid
159    // a T: ZeroFrom bound
160    pub(crate) error_value: T,
161}
162
163/// This struct contains the fixed-length header fields of a [`CodePointTrie`].
164///
165/// # Safety Invariant
166///
167/// The `trie_type` field must not change after construction.
168///
169/// (In practice, all the fields here remain unchanged during the lifetime
170/// of the trie.)
171#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
172#[cfg_attr(feature = "databake", derive(databake::Bake))]
173#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
174#[derive(#[automatically_derived]
impl ::core::marker::Copy for CodePointTrieHeader { }Copy, #[automatically_derived]
impl ::core::clone::Clone for CodePointTrieHeader {
    #[inline]
    fn clone(&self) -> CodePointTrieHeader {
        let _: ::core::clone::AssertParamIsClone<u32>;
        let _: ::core::clone::AssertParamIsClone<u16>;
        let _: ::core::clone::AssertParamIsClone<TrieType>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for CodePointTrieHeader {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        let names: &'static _ =
            &["high_start", "shifted12_high_start", "index3_null_offset",
                        "data_null_offset", "null_value", "trie_type"];
        let values: &[&dyn ::core::fmt::Debug] =
            &[&self.high_start, &self.shifted12_high_start,
                        &self.index3_null_offset, &self.data_null_offset,
                        &self.null_value, &&self.trie_type];
        ::core::fmt::Formatter::debug_struct_fields_finish(f,
            "CodePointTrieHeader", names, values)
    }
}Debug, #[automatically_derived]
impl ::core::cmp::Eq for CodePointTrieHeader {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<u32>;
        let _: ::core::cmp::AssertParamIsEq<u16>;
        let _: ::core::cmp::AssertParamIsEq<TrieType>;
    }
}Eq, #[automatically_derived]
impl ::core::cmp::PartialEq for CodePointTrieHeader {
    #[inline]
    fn eq(&self, other: &CodePointTrieHeader) -> bool {
        self.high_start == other.high_start &&
                            self.shifted12_high_start == other.shifted12_high_start &&
                        self.index3_null_offset == other.index3_null_offset &&
                    self.data_null_offset == other.data_null_offset &&
                self.null_value == other.null_value &&
            self.trie_type == other.trie_type
    }
}PartialEq, unsafe impl<'a> yoke::Yokeable<'a> for CodePointTrieHeader<> where Self: Sized
    {
    type Output = Self;
    #[inline]
    fn transform(&self) -> &Self::Output { self }
    #[inline]
    fn transform_owned(self) -> Self::Output { self }
    #[inline]
    unsafe fn make(this: Self::Output) -> Self { this }
    #[inline]
    fn transform_mut<F>(&'a mut self, f: F) where F: 'static +
        for<'b> FnOnce(&'b mut Self::Output) {
        f(self)
    }
}Yokeable, impl<'zf> zerofrom::ZeroFrom<'zf, CodePointTrieHeader<>> for
    CodePointTrieHeader<> where  {
    fn zero_from(this: &'zf Self) -> Self { *this }
}ZeroFrom)]
175pub struct CodePointTrieHeader {
176    /// The code point of the start of the last range of the trie. A
177    /// range is defined as a partition of the code point space such that the
178    /// value in this trie associated with all code points of the same range is
179    /// the same.
180    ///
181    /// For the property value data for many Unicode properties,
182    /// often times, `high_start` is `U+10000` or lower. In such cases, not
183    /// reserving space in the `index` array for duplicate values is a large
184    /// savings. The "highValue" associated with the `high_start` range is
185    /// stored at the second-to-last position of the `data` array.
186    /// (See `impl_const::HIGH_VALUE_NEG_DATA_OFFSET`.)
187    pub high_start: u32,
188    /// A version of the `high_start` value that is right-shifted 12 spaces,
189    /// but is rounded up to a multiple `0x1000` for easy testing from UTF-8
190    /// lead bytes.
191    pub shifted12_high_start: u16,
192    /// Offset for the null block in the "index-3" table of the `index` array.
193    /// Set to an impossibly high value (e.g., `0xffff`) if there is no
194    /// dedicated index-3 null block.
195    pub index3_null_offset: u16,
196    /// Internal data null block offset, not shifted.
197    /// Set to an impossibly high value (e.g., `0xfffff`) if there is no
198    /// dedicated data null block.
199    pub data_null_offset: u32,
200    /// The value stored in the trie that represents a null value being
201    /// associated to a code point.
202    pub null_value: u32,
203    /// The enum value representing the type of trie, where trie type is as it
204    /// is defined in ICU (ex: Fast, Small).
205    ///
206    /// # Safety Invariant
207    ///
208    /// Must not change after construction.
209    pub trie_type: TrieType,
210}
211
212impl TryFrom<u8> for TrieType {
213    type Error = crate::codepointtrie::error::Error;
214
215    fn try_from(trie_type_int: u8) -> Result<TrieType, crate::codepointtrie::error::Error> {
216        match trie_type_int {
217            0 => Ok(TrieType::Fast),
218            1 => Ok(TrieType::Small),
219            _ => Err(crate::codepointtrie::error::Error::FromDeserialized {
220                reason: "Cannot parse value for trie_type",
221            }),
222        }
223    }
224}
225
226// Helper macro that turns arithmetic into wrapping-in-release, checked-in-debug arithmetic
227//
228// This is rustc's default behavior anyway, however some projects like Android deliberately
229// enable overflow checks. CodePointTrie::get() is intended to be used in Android bionic which
230// cares about codesize and we don't want the pile of panicking infrastructure brought in by overflow
231// checks, so we force wrapping in release.
232// See #6052
233macro_rules! w(
234    // Note: these matchers are not perfect since you cannot have an operator after an expr matcher
235    // Use variables if you need complex first operands.
236    ($a:tt + $b:expr) => {
237        {
238            #[allow(unused_parens)]
239            let a = $a;
240            let b = $b;
241            debug_assert!(a.checked_add(b).is_some());
242            $a.wrapping_add($b)
243        }
244    };
245    ($a:tt - $b:expr) => {
246
247        {
248            #[allow(unused_parens)]
249            let a = $a;
250            let b = $b;
251            debug_assert!(a.checked_sub(b).is_some());
252            $a.wrapping_sub($b)
253        }
254    };
255    ($a:tt * $b:expr) => {
256        {
257            #[allow(unused_parens)]
258            let a = $a;
259            let b = $b;
260            debug_assert!(a.checked_mul(b).is_some());
261            $a.wrapping_mul($b)
262        }
263    };
264);
265
266impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
267    #[doc(hidden)] // databake internal
268    /// # Safety
269    ///
270    /// `header.trie_type`, `index`, and `data` must
271    /// satisfy the invariants for the fields of the
272    /// same names on `CodePointTrie`.
273    pub const unsafe fn from_parts_unstable_unchecked_v1(
274        header: CodePointTrieHeader,
275        index: ZeroVec<'trie, u16>,
276        data: ZeroVec<'trie, T>,
277        error_value: T,
278    ) -> Self {
279        // Field invariants upheld: The caller is responsible.
280        // In practice, this means that datagen in the databake
281        // mode upholds these invariants when constructing the
282        // `CodePointTrie` that is then baked.
283        Self {
284            header,
285            index,
286            data,
287            error_value,
288        }
289    }
290
291    /// Returns a new [`CodePointTrie`] backed by borrowed data for the `index`
292    /// array and `data` array, whose data values have width `W`.
293    pub fn try_new(
294        header: CodePointTrieHeader,
295        index: ZeroVec<'trie, u16>,
296        data: ZeroVec<'trie, T>,
297    ) -> Result<CodePointTrie<'trie, T>, Error> {
298        let error_value = Self::validate_fields(&header, &index, &data)?;
299        // Field invariants upheld: Checked by `validate_fields` above.
300        let trie: CodePointTrie<'trie, T> = CodePointTrie {
301            header,
302            index,
303            data,
304            error_value,
305        };
306        Ok(trie)
307    }
308
309    /// Checks the invariant on the fields that fast-path access relies on for
310    /// safety in order to omit slice bound checks and upon success returns the
311    /// `error_value` for the trie.
312    ///
313    /// # Safety Usable Invariant
314    ///
315    /// Iff this function returns `Ok(T)`, the arguments satisfy the invariants
316    /// for corresponding fields of `CodePointTrie`. (Other than proving that
317    /// nothing else changes the fields subsequently.)
318    pub(crate) fn validate_fields(
319        header: &CodePointTrieHeader,
320        index: &ZeroSlice<u16>,
321        data: &ZeroSlice<T>,
322    ) -> Result<T, Error> {
323        let error_value = data.last().ok_or(Error::EmptyDataVector)?;
324
325        // `CodePointTrie` lookup has two stages: fast and small (the trie types
326        // are also fast and small; they affect where the boundary between fast
327        // and small lookups is).
328        //
329        // The length requirements for `index` and `data` are checked here only
330        // for the fast lookup case so that the fast lookup can omit bound checks
331        // at the time of access. In the small lookup case, bounds are checked at
332        // the time of access.
333        //
334        // The fast lookup happens on the prefixes of `index` and `data` with
335        // more items for the small lookup case afterwards. The check here
336        // only looks at the prefixes relevant to the fast case.
337        //
338        // In the fast lookup case, the bits of the of the code point are
339        // partitioned into a bit prefix and a bit suffix. First, a value
340        // is read from `index` by indexing into it using the bit prefix.
341        // Then `data` is accessed by the value just read from `index` plus
342        // the bit suffix.
343        //
344        // Therefore, the length of `index` needs to accommodate access
345        // by the maximum possible bit prefix, and the length of `data`
346        // needs to accommodate access by the largest value stored in the part
347        // of `data` reachable by the bit prefix plus the maximum possible bit
348        // suffix.
349        //
350        // The maximum possible bit prefix depends on the trie type.
351
352        // The maximum code point that can be used for fast-path access:
353        let fast_max = match header.trie_type {
354            TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
355            TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
356        };
357        // Keep only the prefix bits:
358        let max_bit_prefix = fast_max >> FAST_TYPE_SHIFT;
359        // Attempt slice the part of `index` that the fast path can index into.
360        // Since `max_bit_prefix` is the largest possible value used for
361        // indexing (inclusive bound), we need to add one to get the exclusive
362        // bound, which is what `get_subslice` wants.
363        let fast_index = index
364            .get_subslice(0..(max_bit_prefix as usize) + 1)
365            .ok_or(Error::IndexTooShortForFastAccess)?;
366        // Invariant upheld for `index`: If we got this far, the length of `index`
367        // satisfies its length invariant on the assumption that `header.trie_type`
368        // will not change subsequently.
369
370        // Now find the largest offset in the part of `index` reachable by the
371        // bit prefix. `max` can never actually return `None`, since we already
372        // know the slice isn't empty. Hence, reusing the error kind instead of
373        // minting a new one for this check.
374        let max_offset = fast_index
375            .iter()
376            .max()
377            .ok_or(Error::IndexTooShortForFastAccess)?;
378        // `FAST_TYPE_DATA_MASK` is the maximum possible bit suffix, since the
379        // maximum is when all the bits in the suffix are set, and the mask
380        // has that many bits set.
381        if (max_offset) as usize + (FAST_TYPE_DATA_MASK as usize) >= data.len() {
382            return Err(Error::DataTooShortForFastAccess);
383        }
384
385        // Invariant upheld for `data`: If we got this far, the length of `data`
386        // satisfies `data`'s length invariant on the assumption that the contents
387        // of `fast_index` subslice of `index` and `header.trie_type` will not
388        // change subsequently.
389
390        Ok(error_value)
391    }
392
393    /// Turns this trie into a version whose trie type is encoded in the Rust type.
394    #[inline]
395    pub fn to_typed(self) -> Typed<FastCodePointTrie<'trie, T>, SmallCodePointTrie<'trie, T>> {
396        match self.header.trie_type {
397            TrieType::Fast => Typed::Fast(FastCodePointTrie { inner: self }),
398            TrieType::Small => Typed::Small(SmallCodePointTrie { inner: self }),
399        }
400    }
401
402    /// Obtains a reference to this trie as a Rust type that encodes the trie type in the Rust type.
403    #[inline]
404    pub fn as_typed_ref(
405        &self,
406    ) -> Typed<&FastCodePointTrie<'trie, T>, &SmallCodePointTrie<'trie, T>> {
407        // SAFETY: `FastCodePointTrie` and `SmallCodePointTrie` are `repr(transparent)`
408        // with `CodePointTrie`, so transmuting between the references is OK when the
409        // actual trie type agrees with the semantics of the typed wrapper.
410        match self.header.trie_type {
411            TrieType::Fast => Typed::Fast(unsafe {
412                core::mem::transmute::<&CodePointTrie<'trie, T>, &FastCodePointTrie<'trie, T>>(self)
413            }),
414            TrieType::Small => Typed::Small(unsafe {
415                core::mem::transmute::<&CodePointTrie<'trie, T>, &SmallCodePointTrie<'trie, T>>(
416                    self,
417                )
418            }),
419        }
420    }
421
422    /// Returns the position in the data array containing the trie's stored
423    /// error value.
424    #[inline(always)] // `always` was based on previous normalizer benchmarking
425    fn trie_error_val_index(&self) -> u32 {
426        // We use wrapping_sub here to avoid panicky overflow checks.
427        // len should always be > 1, but if it isn't this will just cause GIGO behavior of producing
428        // None on `.get()`
429        if true {
    if !(self.data.len() as u32 >= ERROR_VALUE_NEG_DATA_OFFSET) {
        ::core::panicking::panic("assertion failed: self.data.len() as u32 >= ERROR_VALUE_NEG_DATA_OFFSET")
    };
};debug_assert!(self.data.len() as u32 >= ERROR_VALUE_NEG_DATA_OFFSET);
430        {
    #[allow(unused_parens)]
    let a = (self.data.len() as u32);
    let b = ERROR_VALUE_NEG_DATA_OFFSET;
    if true {
        if !a.checked_sub(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_sub(b).is_some()")
        };
    };
    (self.data.len() as u32).wrapping_sub(ERROR_VALUE_NEG_DATA_OFFSET)
}w!((self.data.len() as u32) - ERROR_VALUE_NEG_DATA_OFFSET)
431    }
432
433    fn internal_small_index(&self, code_point: u32) -> u32 {
434        // We use wrapping arithmetic here to avoid overflow checks making their way into binaries
435        // with overflow checks enabled. Ultimately this code ends up as a checked index, so any
436        // bugs here will cause GIGO
437        let mut index1_pos: u32 = code_point >> SHIFT_1;
438        if self.header.trie_type == TrieType::Fast {
439            if true {
    if !(FAST_TYPE_FAST_INDEXING_MAX < code_point &&
                code_point < self.header.high_start) {
        ::core::panicking::panic("assertion failed: FAST_TYPE_FAST_INDEXING_MAX < code_point &&\n    code_point < self.header.high_start")
    };
};debug_assert!(
440                FAST_TYPE_FAST_INDEXING_MAX < code_point && code_point < self.header.high_start
441            );
442            index1_pos = {
    #[allow(unused_parens)]
    let a = index1_pos;
    let b = BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index1_pos.wrapping_add(BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH)
}w!(index1_pos + BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH);
443        } else {
444            if !(code_point < self.header.high_start &&
            self.header.high_start > SMALL_LIMIT) {
    ::core::panicking::panic("assertion failed: code_point < self.header.high_start && self.header.high_start > SMALL_LIMIT")
};assert!(code_point < self.header.high_start && self.header.high_start > SMALL_LIMIT);
445            index1_pos = {
    #[allow(unused_parens)]
    let a = index1_pos;
    let b = SMALL_INDEX_LENGTH;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index1_pos.wrapping_add(SMALL_INDEX_LENGTH)
}w!(index1_pos + SMALL_INDEX_LENGTH);
446        }
447        let index1_val = if let Some(index1_val) = self.index.get(index1_pos as usize) {
448            index1_val
449        } else {
450            return self.trie_error_val_index();
451        };
452        let index3_block_idx: u32 =
453            {
    #[allow(unused_parens)]
    let a = (index1_val as u32);
    let b = (code_point >> SHIFT_2) & INDEX_2_MASK;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    (index1_val as u32).wrapping_add((code_point >> SHIFT_2) & INDEX_2_MASK)
}w!((index1_val as u32) + (code_point >> SHIFT_2) & INDEX_2_MASK);
454        let mut index3_block: u32 =
455            if let Some(index3_block) = self.index.get(index3_block_idx as usize) {
456                index3_block as u32
457            } else {
458                return self.trie_error_val_index();
459            };
460        let mut index3_pos: u32 = (code_point >> SHIFT_3) & INDEX_3_MASK;
461        let mut data_block: u32;
462        if index3_block & 0x8000 == 0 {
463            // 16-bit indexes
464            data_block =
465                if let Some(data_block) = self.index.get({
    #[allow(unused_parens)]
    let a = index3_block;
    let b = index3_pos;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index3_block.wrapping_add(index3_pos)
}w!(index3_block + index3_pos) as usize) {
466                    data_block as u32
467                } else {
468                    return self.trie_error_val_index();
469                };
470        } else {
471            // 18-bit indexes stored in groups of 9 entries per 8 indexes.
472            index3_block = {
    #[allow(unused_parens)]
    let a = (index3_block & 0x7fff);
    let b =
        {
            #[allow(unused_parens)]
            let a = (index3_pos & !7);
            let b = index3_pos >> 3;
            if true {
                if !a.checked_add(b).is_some() {
                    ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
                };
            };
            (index3_pos & !7).wrapping_add(index3_pos >> 3)
        };
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    (index3_block &
                0x7fff).wrapping_add({
            #[allow(unused_parens)]
            let a = (index3_pos & !7);
            let b = index3_pos >> 3;
            if true {
                if !a.checked_add(b).is_some() {
                    ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
                };
            };
            (index3_pos & !7).wrapping_add(index3_pos >> 3)
        })
}w!((index3_block & 0x7fff) + w!((index3_pos & !7) + index3_pos >> 3));
473            index3_pos &= 7;
474            data_block = if let Some(data_block) = self.index.get(index3_block as usize) {
475                data_block as u32
476            } else {
477                return self.trie_error_val_index();
478            };
479            data_block = (data_block << {
    #[allow(unused_parens)]
    let a = 2u32;
    let b =
        {
            #[allow(unused_parens)]
            let a = 2u32;
            let b = index3_pos;
            if true {
                if !a.checked_mul(b).is_some() {
                    ::core::panicking::panic("assertion failed: a.checked_mul(b).is_some()")
                };
            };
            2u32.wrapping_mul(index3_pos)
        };
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    2u32.wrapping_add({
            #[allow(unused_parens)]
            let a = 2u32;
            let b = index3_pos;
            if true {
                if !a.checked_mul(b).is_some() {
                    ::core::panicking::panic("assertion failed: a.checked_mul(b).is_some()")
                };
            };
            2u32.wrapping_mul(index3_pos)
        })
}w!(2u32 + w!(2u32 * index3_pos))) & 0x30000;
480            index3_block += 1;
481            data_block =
482                if let Some(index3_val) = self.index.get({
    #[allow(unused_parens)]
    let a = index3_block;
    let b = index3_pos;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index3_block.wrapping_add(index3_pos)
}w!(index3_block + index3_pos) as usize) {
483                    data_block | (index3_val as u32)
484                } else {
485                    return self.trie_error_val_index();
486                };
487        }
488        // Returns data_pos == data_block (offset) +
489        //     portion of code_point bit field for last (4th) lookup
490        {
    #[allow(unused_parens)]
    let a = data_block;
    let b = code_point & SMALL_DATA_MASK;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    data_block.wrapping_add(code_point & SMALL_DATA_MASK)
}w!(data_block + code_point & SMALL_DATA_MASK)
491    }
492
493    /// Returns the position in the `data` array for the given code point,
494    /// where this code point is at or above the fast limit associated for the
495    /// `trie_type`. We will refer to that limit as "`fastMax`" here.
496    ///
497    /// A lookup of the value in the code point trie for a code point in the
498    /// code point space range [`fastMax`, `high_start`) will be a 4-step
499    /// lookup: 3 lookups in the `index` array and one lookup in the `data`
500    /// array. Lookups for code points in the range [`high_start`,
501    /// `CODE_POINT_MAX`] are short-circuited to be a single lookup, see
502    /// [`CodePointTrieHeader::high_start`].
503    fn small_index(&self, code_point: u32) -> u32 {
504        if code_point >= self.header.high_start {
505            {
    #[allow(unused_parens)]
    let a = (self.data.len() as u32);
    let b = HIGH_VALUE_NEG_DATA_OFFSET;
    if true {
        if !a.checked_sub(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_sub(b).is_some()")
        };
    };
    (self.data.len() as u32).wrapping_sub(HIGH_VALUE_NEG_DATA_OFFSET)
}w!((self.data.len() as u32) - HIGH_VALUE_NEG_DATA_OFFSET)
506        } else {
507            self.internal_small_index(code_point) // helper fn
508        }
509    }
510
511    /// Returns the position in the `data` array for the given code point,
512    /// where this code point is below the fast limit associated for the
513    /// `trie type`. We will refer to that limit as "`fastMax`" here.
514    ///
515    /// A lookup of the value in the code point trie for a code point in the
516    /// code point space range [0, `fastMax`) will be a 2-step lookup: 1
517    /// lookup in the `index` array and one lookup in the `data` array. By
518    /// design, for trie type `T`, there is an element allocated in the `index`
519    /// array for each block of code points in [0, `fastMax`), which in
520    /// turn guarantees that those code points are represented and only need 1
521    /// lookup.
522    fn fast_index(&self, code_point: u32) -> u32 {
523        let index_array_pos: u32 = code_point >> FAST_TYPE_SHIFT;
524        let index_array_val: u16 =
525            if let Some(index_array_val) = self.index.get(index_array_pos as usize) {
526                index_array_val
527            } else {
528                return self.trie_error_val_index();
529            };
530        let masked_cp = code_point & FAST_TYPE_DATA_MASK;
531        let index_array_val = index_array_val as u32;
532        let fast_index_val: u32 = {
    #[allow(unused_parens)]
    let a = index_array_val;
    let b = masked_cp;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    index_array_val.wrapping_add(masked_cp)
}w!(index_array_val + masked_cp);
533        fast_index_val
534    }
535
536    /// Returns the value that is associated with `code_point` in this [`CodePointTrie`]
537    /// if `code_point` uses fast-path lookup or `None` if `code_point`
538    /// should use small-path lookup or is above the supported range.
539    #[inline(always)] // "always" to make the `Option` collapse away
540    fn get32_by_fast_index(&self, code_point: u32) -> Option<T> {
541        let fast_max = match self.header.trie_type {
542            TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
543            TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
544        };
545        if code_point <= fast_max {
546            // SAFETY: We just checked the invariant of
547            // `get32_assuming_fast_index`,
548            // which is
549            // "If `self.header.trie_type == TrieType::Small`, `code_point` must be at most
550            // `SMALL_TYPE_FAST_INDEXING_MAX`. If `self.header.trie_type ==
551            // TrieType::Fast`, `code_point` must be at most `FAST_TYPE_FAST_INDEXING_MAX`."
552            Some(unsafe { self.get32_assuming_fast_index(code_point) })
553        } else {
554            // The caller needs to call `get32_by_small_index` or determine
555            // that the argument is above the permitted range.
556            None
557        }
558    }
559
560    /// Performs the actual fast-mode lookup
561    ///
562    /// # Safety
563    ///
564    /// If `self.header.trie_type == TrieType::Small`, `code_point` must be at most
565    /// `SMALL_TYPE_FAST_INDEXING_MAX`. If `self.header.trie_type ==
566    /// TrieType::Fast`, `code_point` must be at most `FAST_TYPE_FAST_INDEXING_MAX`.
567    #[inline(always)]
568    unsafe fn get32_assuming_fast_index(&self, code_point: u32) -> T {
569        // Check the safety invariant of this method.
570        if true {
    if !(code_point <=
                match self.header.trie_type {
                    TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
                    TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
                }) {
        ::core::panicking::panic("assertion failed: code_point <=\n    match self.header.trie_type {\n        TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,\n        TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,\n    }")
    };
};debug_assert!(
571            code_point
572                <= match self.header.trie_type {
573                    TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
574                    TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
575                }
576        );
577
578        let bit_prefix = (code_point as usize) >> FAST_TYPE_SHIFT;
579        if true {
    if !(bit_prefix < self.index.len()) {
        ::core::panicking::panic("assertion failed: bit_prefix < self.index.len()")
    };
};debug_assert!(bit_prefix < self.index.len());
580        // SAFETY: Relying on the length invariant of `self.index` having
581        // been checked and on the unchangedness invariant of `self.index`
582        // and `self.header.trie_type` after construction.
583        let base_offset_to_data: usize = usize::from(u16::from_unaligned(*unsafe {
584            self.index.as_ule_slice().get_unchecked(bit_prefix)
585        }));
586        let bit_suffix = (code_point & FAST_TYPE_DATA_MASK) as usize;
587        // SAFETY: Cannot overflow with supported (32-bit and 64-bit) `usize`
588        // sizes, since `base_offset_to_data` was extended from `u16` and
589        // `bit_suffix` is at most `FAST_TYPE_DATA_MASK`, which is well
590        // under what it takes to reach the 32-bit (or 64-bit) max with
591        // additon from the max of `u16`.
592        let offset_to_data = {
    #[allow(unused_parens)]
    let a = base_offset_to_data;
    let b = bit_suffix;
    if true {
        if !a.checked_add(b).is_some() {
            ::core::panicking::panic("assertion failed: a.checked_add(b).is_some()")
        };
    };
    base_offset_to_data.wrapping_add(bit_suffix)
}w!(base_offset_to_data + bit_suffix);
593        if true {
    if !(offset_to_data < self.data.len()) {
        ::core::panicking::panic("assertion failed: offset_to_data < self.data.len()")
    };
};debug_assert!(offset_to_data < self.data.len());
594        // SAFETY: Relying on the length invariant of `self.data` having
595        // been checked and on the unchangedness invariant of `self.data`,
596        // `self.index`, and `self.header.trie_type` after construction.
597        T::from_unaligned(*unsafe { self.data.as_ule_slice().get_unchecked(offset_to_data) })
598    }
599
600    /// Coldness wrapper for `get32_by_small_index` to also allow
601    /// calls without the effects of `#[cold]`.
602    #[cold]
603    #[inline(always)]
604    fn get32_by_small_index_cold(&self, code_point: u32) -> T {
605        self.get32_by_small_index(code_point)
606    }
607
608    /// Returns the value that is associated with `code_point` in this [`CodePointTrie`]
609    /// assuming that the small index path should be used.
610    ///
611    /// # Intended Precondition
612    ///
613    /// `code_point` must be at most `CODE_POINT_MAX` AND greter than
614    /// `FAST_TYPE_FAST_INDEXING_MAX` if the trie type is fast or greater
615    /// than `SMALL_TYPE_FAST_INDEXING_MAX` if the trie type is small.
616    /// This is checked when debug assertions are enabled. If this
617    /// precondition is violated, the behavior of this method is
618    /// memory-safe, but the returned value may be bogus (not
619    /// necessarily the designated error value).
620    #[inline(never)]
621    fn get32_by_small_index(&self, code_point: u32) -> T {
622        if true {
    if !(code_point <= CODE_POINT_MAX) {
        ::core::panicking::panic("assertion failed: code_point <= CODE_POINT_MAX")
    };
};debug_assert!(code_point <= CODE_POINT_MAX);
623        if true {
    if !(code_point >
                match self.header.trie_type {
                    TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
                    TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
                }) {
        ::core::panicking::panic("assertion failed: code_point >\n    match self.header.trie_type {\n        TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,\n        TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,\n    }")
    };
};debug_assert!(
624            code_point
625                > match self.header.trie_type {
626                    TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
627                    TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
628                }
629        );
630        self.data
631            .get(self.small_index(code_point) as usize)
632            .unwrap_or(self.error_value)
633    }
634
635    /// Returns the value that is associated with `code_point` in this [`CodePointTrie`].
636    ///
637    /// # Examples
638    ///
639    /// ```
640    /// use icu::collections::codepointtrie::planes;
641    /// let trie = planes::get_planes_trie();
642    ///
643    /// assert_eq!(0, trie.get32(0x41)); // 'A' as u32
644    /// assert_eq!(0, trie.get32(0x13E0)); // 'Ꮰ' as u32
645    /// assert_eq!(1, trie.get32(0x10044)); // '𐁄' as u32
646    /// ```
647    #[inline(always)] // `always` based on normalizer benchmarking
648    pub fn get32(&self, code_point: u32) -> T {
649        if let Some(v) = self.get32_by_fast_index(code_point) {
650            v
651        } else if code_point <= CODE_POINT_MAX {
652            self.get32_by_small_index_cold(code_point)
653        } else {
654            self.error_value
655        }
656    }
657
658    /// Returns the value that is associated with `char` in this [`CodePointTrie`].
659    ///
660    /// # Examples
661    ///
662    /// ```
663    /// use icu::collections::codepointtrie::planes;
664    /// let trie = planes::get_planes_trie();
665    ///
666    /// assert_eq!(0, trie.get('A')); // 'A' as u32
667    /// assert_eq!(0, trie.get('Ꮰ')); // 'Ꮰ' as u32
668    /// assert_eq!(1, trie.get('𐁄')); // '𐁄' as u32
669    /// ```
670    #[inline(always)]
671    pub fn get(&self, c: char) -> T {
672        // LLVM's optimizations have been observed not to be 100%
673        // reliable around collapsing away unnecessary parts of
674        // `get32`, so not just calling `get32` here.
675        let code_point = u32::from(c);
676        if let Some(v) = self.get32_by_fast_index(code_point) {
677            v
678        } else {
679            self.get32_by_small_index_cold(code_point)
680        }
681    }
682
683    /// Returns the value that is associated with `bmp` in this [`CodePointTrie`].
684    #[inline(always)]
685    pub fn get16(&self, bmp: u16) -> T {
686        // LLVM's optimizations have been observed not to be 100%
687        // reliable around collapsing away unnecessary parts of
688        // `get32`, so not just calling `get32` here.
689        let code_point = u32::from(bmp);
690        if let Some(v) = self.get32_by_fast_index(code_point) {
691            v
692        } else {
693            self.get32_by_small_index_cold(code_point)
694        }
695    }
696
697    /// Lookup trie value by non-Basic Multilingual Plane Scalar Value.
698    ///
699    /// The return value may be bogus (not necessarily `error_value`) is the argument is actually in
700    /// the Basic Multilingual Plane or above the Unicode Scalar Value
701    /// range (panics instead with debug assertions enabled).
702    #[inline(always)]
703    pub fn get32_supplementary(&self, supplementary: u32) -> T {
704        if true {
    if !(supplementary > 0xFFFF) {
        ::core::panicking::panic("assertion failed: supplementary > 0xFFFF")
    };
};debug_assert!(supplementary > 0xFFFF);
705        if true {
    if !(supplementary <= CODE_POINT_MAX) {
        ::core::panicking::panic("assertion failed: supplementary <= CODE_POINT_MAX")
    };
};debug_assert!(supplementary <= CODE_POINT_MAX);
706        self.get32_by_small_index(supplementary)
707    }
708
709    /// Returns a reference to the ULE of the value that is associated with `code_point` in this [`CodePointTrie`].
710    ///
711    /// # Examples
712    ///
713    /// ```
714    /// use icu::collections::codepointtrie::planes;
715    /// let trie = planes::get_planes_trie();
716    ///
717    /// assert_eq!(Some(&0), trie.get32_ule(0x41)); // 'A' as u32
718    /// assert_eq!(Some(&0), trie.get32_ule(0x13E0)); // 'Ꮰ' as u32
719    /// assert_eq!(Some(&1), trie.get32_ule(0x10044)); // '𐁄' as u32
720    /// ```
721    #[inline(always)] // `always` was based on previous normalizer benchmarking
722    pub fn get32_ule(&self, code_point: u32) -> Option<&T::ULE> {
723        // All code points up to the fast max limit are represented
724        // individually in the `index` array to hold their `data` array position, and
725        // thus only need 2 lookups for a [CodePointTrie::get()](`crate::codepointtrie::CodePointTrie::get`).
726        // Code points above the "fast max" limit require 4 lookups.
727        let fast_max = match self.header.trie_type {
728            TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
729            TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
730        };
731        let data_pos: u32 = if code_point <= fast_max {
732            Self::fast_index(self, code_point)
733        } else if code_point <= CODE_POINT_MAX {
734            Self::small_index(self, code_point)
735        } else {
736            self.trie_error_val_index()
737        };
738        // Returns the trie value (or trie's error value).
739        self.data.as_ule_slice().get(data_pos as usize)
740    }
741
742    /// Converts the [`CodePointTrie`] into one that returns another type of the same size.
743    ///
744    /// Borrowed data remains borrowed, and owned data remains owned.
745    ///
746    /// If the old and new types are not the same size, use
747    /// [`CodePointTrie::try_alloc_map_value`].
748    ///
749    /// # Panics
750    ///
751    /// Panics if `T` and `P` are different sizes.
752    ///
753    /// More specifically, panics if [`ZeroVec::try_into_converted()`] panics when converting
754    /// `ZeroVec<T>` into `ZeroVec<P>`, which happens if `T::ULE` and `P::ULE` differ in size.
755    ///
756    /// ✨ *Enabled with the `alloc` Cargo feature.*
757    ///
758    /// # Examples
759    ///
760    /// ```no_run
761    /// use icu::collections::codepointtrie::planes;
762    /// use icu::collections::codepointtrie::CodePointTrie;
763    ///
764    /// let planes_trie_u8: CodePointTrie<u8> = planes::get_planes_trie();
765    /// let planes_trie_i8: CodePointTrie<i8> =
766    ///     planes_trie_u8.try_into_converted().expect("infallible");
767    ///
768    /// assert_eq!(planes_trie_i8.get32(0x30000), 3);
769    /// ```
770    #[cfg(feature = "alloc")]
771    pub fn try_into_converted<P>(self) -> Result<CodePointTrie<'trie, P>, UleError>
772    where
773        P: TrieValue,
774    {
775        let converted_data = self.data.try_into_converted()?;
776        let error_ule = self.error_value.to_unaligned();
777        let slice = &[error_ule];
778        let error_vec = ZeroVec::<T>::new_borrowed(slice);
779        let error_converted = error_vec.try_into_converted::<P>()?;
780        #[expect(clippy::expect_used)] // we know this cannot fail
781        Ok(CodePointTrie {
782            header: self.header,
783            index: self.index,
784            data: converted_data,
785            error_value: error_converted
786                .get(0)
787                .expect("vector known to have one element"),
788        })
789    }
790
791    /// Maps the [`CodePointTrie`] into one that returns a different type.
792    ///
793    /// This function returns owned data.
794    ///
795    /// If the old and new types are the same size, use the more efficient
796    /// [`CodePointTrie::try_into_converted`].
797    ///
798    /// ✨ *Enabled with the `alloc` Cargo feature.*
799    ///
800    /// # Examples
801    ///
802    /// ```
803    /// use icu::collections::codepointtrie::planes;
804    /// use icu::collections::codepointtrie::CodePointTrie;
805    ///
806    /// let planes_trie_u8: CodePointTrie<u8> = planes::get_planes_trie();
807    /// let planes_trie_u16: CodePointTrie<u16> = planes_trie_u8
808    ///     .try_alloc_map_value(TryFrom::try_from)
809    ///     .expect("infallible");
810    ///
811    /// assert_eq!(planes_trie_u16.get32(0x30000), 3);
812    /// ```
813    #[cfg(feature = "alloc")]
814    pub fn try_alloc_map_value<P, E>(
815        &self,
816        mut f: impl FnMut(T) -> Result<P, E>,
817    ) -> Result<CodePointTrie<'trie, P>, E>
818    where
819        P: TrieValue,
820    {
821        let error_converted = f(self.error_value)?;
822        let converted_data = self.data.iter().map(f).collect::<Result<ZeroVec<P>, E>>()?;
823        Ok(CodePointTrie {
824            header: self.header,
825            index: self.index.clone(),
826            data: converted_data,
827            error_value: error_converted,
828        })
829    }
830
831    /// Returns a [`CodePointMapRange`] struct which represents a range of code
832    /// points associated with the same trie value. The returned range will be
833    /// the longest stretch of consecutive code points starting at `start` that
834    /// share this value.
835    ///
836    /// This method is designed to use the internal details of
837    /// the structure of [`CodePointTrie`] to be optimally efficient. This will
838    /// outperform a naive approach that just uses [`CodePointTrie::get()`].
839    ///
840    /// This method provides lower-level functionality that can be used in the
841    /// implementation of other methods that are more convenient to the user.
842    /// To obtain an optimal partition of the code point space for
843    /// this trie resulting in the fewest number of ranges, see
844    /// [`CodePointTrie::iter_ranges()`].
845    ///
846    /// # Examples
847    ///
848    /// ```
849    /// use icu::collections::codepointtrie::planes;
850    ///
851    /// let trie = planes::get_planes_trie();
852    ///
853    /// const CODE_POINT_MAX: u32 = 0x10ffff;
854    /// let start = 0x1_0000;
855    /// let exp_end = 0x1_ffff;
856    ///
857    /// let start_val = trie.get32(start);
858    /// assert_eq!(trie.get32(exp_end), start_val);
859    /// assert_ne!(trie.get32(exp_end + 1), start_val);
860    ///
861    /// use icu::collections::codepointtrie::CodePointMapRange;
862    ///
863    /// let cpm_range: CodePointMapRange<u8> = trie.get_range(start).unwrap();
864    ///
865    /// assert_eq!(cpm_range.range.start(), &start);
866    /// assert_eq!(cpm_range.range.end(), &exp_end);
867    /// assert_eq!(cpm_range.value, start_val);
868    ///
869    /// // `start` can be any code point, whether or not it lies on the boundary
870    /// // of a maximally large range that still contains `start`
871    ///
872    /// let submaximal_1_start = start + 0x1234;
873    /// let submaximal_1 = trie.get_range(submaximal_1_start).unwrap();
874    /// assert_eq!(submaximal_1.range.start(), &0x1_1234);
875    /// assert_eq!(submaximal_1.range.end(), &0x1_ffff);
876    /// assert_eq!(submaximal_1.value, start_val);
877    ///
878    /// let submaximal_2_start = start + 0xffff;
879    /// let submaximal_2 = trie.get_range(submaximal_2_start).unwrap();
880    /// assert_eq!(submaximal_2.range.start(), &0x1_ffff);
881    /// assert_eq!(submaximal_2.range.end(), &0x1_ffff);
882    /// assert_eq!(submaximal_2.value, start_val);
883    /// ```
884    pub fn get_range(&self, start: u32) -> Option<CodePointMapRange<T>> {
885        // Exit early if the start code point is out of range, or if it is
886        // in the last range of code points in high_start..=CODE_POINT_MAX
887        // (start- and end-inclusive) that all share the same trie value.
888        if CODE_POINT_MAX < start {
889            return None;
890        }
891        if start >= self.header.high_start {
892            let di: usize = self.data.len() - (HIGH_VALUE_NEG_DATA_OFFSET as usize);
893            let value: T = self.data.get(di)?;
894            return Some(CodePointMapRange {
895                range: start..=CODE_POINT_MAX,
896                value,
897            });
898        }
899
900        let null_value: T = T::try_from_u32(self.header.null_value).ok()?;
901
902        let mut prev_i3_block: u32 = u32::MAX; // using u32::MAX (instead of -1 as an i32 in ICU)
903        let mut prev_block: u32 = u32::MAX; // using u32::MAX (instead of -1 as an i32 in ICU)
904        let mut c: u32 = start;
905        let mut trie_value: T = self.error_value();
906        let mut value: T = self.error_value();
907        let mut have_value: bool = false;
908
909        loop {
910            let i3_block: u32;
911            let mut i3: u32;
912            let i3_block_length: u32;
913            let data_block_length: u32;
914
915            // Initialize values before beginning the iteration in the subsequent
916            // `loop` block. In particular, use the "i3*" local variables
917            // (representing the `index` array position's offset + increment
918            // for a 3rd-level trie lookup) to help initialize the data block
919            // variable `block` in the loop for the `data` array.
920            //
921            // When a lookup code point is <= the trie's *_FAST_INDEXING_MAX that
922            // corresponds to its `trie_type`, the lookup only takes 2 steps
923            // (once into the `index`, once into the `data` array); otherwise,
924            // takes 4 steps (3 iterative lookups into the `index`, once more
925            // into the `data` array). So for convenience's sake, when we have the
926            // 2-stage lookup, reuse the "i3*" variable names for the first lookup.
927            if c <= 0xffff
928                && (self.header.trie_type == TrieType::Fast || c <= SMALL_TYPE_FAST_INDEXING_MAX)
929            {
930                i3_block = 0;
931                i3 = c >> FAST_TYPE_SHIFT;
932                i3_block_length = if self.header.trie_type == TrieType::Fast {
933                    BMP_INDEX_LENGTH
934                } else {
935                    SMALL_INDEX_LENGTH
936                };
937                data_block_length = FAST_TYPE_DATA_BLOCK_LENGTH;
938            } else {
939                // Use the multi-stage index.
940                let mut i1: u32 = c >> SHIFT_1;
941                if self.header.trie_type == TrieType::Fast {
942                    if true {
    if !(0xffff < c && c < self.header.high_start) {
        ::core::panicking::panic("assertion failed: 0xffff < c && c < self.header.high_start")
    };
};debug_assert!(0xffff < c && c < self.header.high_start);
943                    i1 = i1 + BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
944                } else {
945                    if true {
    if !(c < self.header.high_start && self.header.high_start > SMALL_LIMIT) {
        ::core::panicking::panic("assertion failed: c < self.header.high_start && self.header.high_start > SMALL_LIMIT")
    };
};debug_assert!(
946                        c < self.header.high_start && self.header.high_start > SMALL_LIMIT
947                    );
948                    i1 += SMALL_INDEX_LENGTH;
949                }
950                let i2: u16 = self.index.get(i1 as usize)?;
951                let i3_block_idx: u32 = (i2 as u32) + ((c >> SHIFT_2) & INDEX_2_MASK);
952                i3_block = if let Some(i3b) = self.index.get(i3_block_idx as usize) {
953                    i3b as u32
954                } else {
955                    return None;
956                };
957                if i3_block == prev_i3_block && (c - start) >= CP_PER_INDEX_2_ENTRY {
958                    // The index-3 block is the same as the previous one, and filled with value.
959                    if true {
    if !((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0) {
        ::core::panicking::panic("assertion failed: (c & (CP_PER_INDEX_2_ENTRY - 1)) == 0")
    };
};debug_assert!((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0);
960                    c += CP_PER_INDEX_2_ENTRY;
961
962                    if c >= self.header.high_start {
963                        break;
964                    } else {
965                        continue;
966                    }
967                }
968                prev_i3_block = i3_block;
969                if i3_block == self.header.index3_null_offset as u32 {
970                    // This is the index-3 null block.
971                    // All of the `data` array blocks pointed to by the values
972                    // in this block of the `index` 3rd-stage subarray will
973                    // contain this trie's null_value. So if we are in the middle
974                    // of a range, end it and return early, otherwise start a new
975                    // range of null values.
976                    if have_value {
977                        if null_value != value {
978                            return Some(CodePointMapRange {
979                                range: start..=(c - 1),
980                                value,
981                            });
982                        }
983                    } else {
984                        trie_value = T::try_from_u32(self.header.null_value).ok()?;
985                        value = null_value;
986                        have_value = true;
987                    }
988                    prev_block = self.header.data_null_offset;
989                    c = (c + CP_PER_INDEX_2_ENTRY) & !(CP_PER_INDEX_2_ENTRY - 1);
990
991                    if c >= self.header.high_start {
992                        break;
993                    } else {
994                        continue;
995                    }
996                }
997                i3 = (c >> SHIFT_3) & INDEX_3_MASK;
998                i3_block_length = INDEX_3_BLOCK_LENGTH;
999                data_block_length = SMALL_DATA_BLOCK_LENGTH;
1000            }
1001
1002            // Enumerate data blocks for one index-3 block.
1003            loop {
1004                let mut block: u32;
1005                if (i3_block & 0x8000) == 0 {
1006                    block = if let Some(b) = self.index.get((i3_block + i3) as usize) {
1007                        b as u32
1008                    } else {
1009                        return None;
1010                    };
1011                } else {
1012                    // 18-bit indexes stored in groups of 9 entries per 8 indexes.
1013                    let mut group: u32 = (i3_block & 0x7fff) + (i3 & !7) + (i3 >> 3);
1014                    let gi: u32 = i3 & 7;
1015                    let gi_val: u32 = if let Some(giv) = self.index.get(group as usize) {
1016                        giv.into()
1017                    } else {
1018                        return None;
1019                    };
1020                    block = (gi_val << (2 + (2 * gi))) & 0x30000;
1021                    group += 1;
1022                    let ggi_val: u32 = if let Some(ggiv) = self.index.get((group + gi) as usize) {
1023                        ggiv as u32
1024                    } else {
1025                        return None;
1026                    };
1027                    block |= ggi_val;
1028                }
1029
1030                // If our previous and current return values of the 3rd-stage `index`
1031                // lookup yield the same `data` block offset, and if we already know that
1032                // the entire `data` block / subarray starting at that offset stores
1033                // `value` and nothing else, then we can extend our range by the length
1034                // of a data block and continue.
1035                // Otherwise, we have to iterate over the values stored in the
1036                // new data block to see if they differ from `value`.
1037                if block == prev_block && (c - start) >= data_block_length {
1038                    // The block is the same as the previous one, and filled with value.
1039                    if true {
    if !((c & (data_block_length - 1)) == 0) {
        ::core::panicking::panic("assertion failed: (c & (data_block_length - 1)) == 0")
    };
};debug_assert!((c & (data_block_length - 1)) == 0);
1040                    c += data_block_length;
1041                } else {
1042                    let data_mask: u32 = data_block_length - 1;
1043                    prev_block = block;
1044                    if block == self.header.data_null_offset {
1045                        // This is the data null block.
1046                        // If we are in the middle of a range, end it and
1047                        // return early, otherwise start a new range of null
1048                        // values.
1049                        if have_value {
1050                            if null_value != value {
1051                                return Some(CodePointMapRange {
1052                                    range: start..=(c - 1),
1053                                    value,
1054                                });
1055                            }
1056                        } else {
1057                            trie_value = T::try_from_u32(self.header.null_value).ok()?;
1058                            value = null_value;
1059                            have_value = true;
1060                        }
1061                        c = (c + data_block_length) & !data_mask;
1062                    } else {
1063                        let mut di: u32 = block + (c & data_mask);
1064                        let mut trie_value_2: T = self.data.get(di as usize)?;
1065                        if have_value {
1066                            if trie_value_2 != trie_value {
1067                                if maybe_filter_value(
1068                                    trie_value_2,
1069                                    T::try_from_u32(self.header.null_value).ok()?,
1070                                    null_value,
1071                                ) != value
1072                                {
1073                                    return Some(CodePointMapRange {
1074                                        range: start..=(c - 1),
1075                                        value,
1076                                    });
1077                                }
1078                                // `trie_value` stores the previous value that was retrieved
1079                                // from the trie.
1080                                // `value` stores the value associated for the range (return
1081                                // value) that we are currently building, which is computed
1082                                // as a transformation by applying maybe_filter_value()
1083                                // to the trie value.
1084                                // The current trie value `trie_value_2` within this data block
1085                                // differs here from the previous value in `trie_value`.
1086                                // But both map to `value` after applying `maybe_filter_value`.
1087                                // It is not clear whether the previous or the current trie value
1088                                // (or neither) is more likely to match potential subsequent trie
1089                                // values that would extend the range by mapping to `value`.
1090                                // On the assumption of locality -- often times consecutive
1091                                // characters map to the same trie values -- remembering the new
1092                                // one might make it faster to extend this range further
1093                                // (by increasing the chance that the next `trie_value_2 !=
1094                                // trie_value` test will be false).
1095                                trie_value = trie_value_2; // may or may not help
1096                            }
1097                        } else {
1098                            trie_value = trie_value_2;
1099                            value = maybe_filter_value(
1100                                trie_value_2,
1101                                T::try_from_u32(self.header.null_value).ok()?,
1102                                null_value,
1103                            );
1104                            have_value = true;
1105                        }
1106
1107                        c += 1;
1108                        while (c & data_mask) != 0 {
1109                            di += 1;
1110                            trie_value_2 = self.data.get(di as usize)?;
1111                            if trie_value_2 != trie_value {
1112                                if maybe_filter_value(
1113                                    trie_value_2,
1114                                    T::try_from_u32(self.header.null_value).ok()?,
1115                                    null_value,
1116                                ) != value
1117                                {
1118                                    return Some(CodePointMapRange {
1119                                        range: start..=(c - 1),
1120                                        value,
1121                                    });
1122                                }
1123                                // `trie_value` stores the previous value that was retrieved
1124                                // from the trie.
1125                                // `value` stores the value associated for the range (return
1126                                // value) that we are currently building, which is computed
1127                                // as a transformation by applying maybe_filter_value()
1128                                // to the trie value.
1129                                // The current trie value `trie_value_2` within this data block
1130                                // differs here from the previous value in `trie_value`.
1131                                // But both map to `value` after applying `maybe_filter_value`.
1132                                // It is not clear whether the previous or the current trie value
1133                                // (or neither) is more likely to match potential subsequent trie
1134                                // values that would extend the range by mapping to `value`.
1135                                // On the assumption of locality -- often times consecutive
1136                                // characters map to the same trie values -- remembering the new
1137                                // one might make it faster to extend this range further
1138                                // (by increasing the chance that the next `trie_value_2 !=
1139                                // trie_value` test will be false).
1140                                trie_value = trie_value_2; // may or may not help
1141                            }
1142
1143                            c += 1;
1144                        }
1145                    }
1146                }
1147
1148                i3 += 1;
1149                if i3 >= i3_block_length {
1150                    break;
1151                }
1152            }
1153
1154            if c >= self.header.high_start {
1155                break;
1156            }
1157        }
1158
1159        if true {
    if !have_value {
        ::core::panicking::panic("assertion failed: have_value")
    };
};debug_assert!(have_value);
1160
1161        // Now that c >= high_start, compare `value` to `high_value` to see
1162        // if we can merge our current range with the high_value range
1163        // high_start..=CODE_POINT_MAX (start- and end-inclusive), otherwise
1164        // stop at high_start - 1.
1165        let di: u32 = self.data.len() as u32 - HIGH_VALUE_NEG_DATA_OFFSET;
1166        let high_value: T = self.data.get(di as usize)?;
1167        if maybe_filter_value(
1168            high_value,
1169            T::try_from_u32(self.header.null_value).ok()?,
1170            null_value,
1171        ) != value
1172        {
1173            c -= 1;
1174        } else {
1175            c = CODE_POINT_MAX;
1176        }
1177        Some(CodePointMapRange {
1178            range: start..=c,
1179            value,
1180        })
1181    }
1182
1183    /// Yields an [`Iterator`] returning ranges of consecutive code points that
1184    /// share the same value in the [`CodePointTrie`], as given by
1185    /// [`CodePointTrie::get_range()`].
1186    ///
1187    /// # Examples
1188    ///
1189    /// ```
1190    /// use core::ops::RangeInclusive;
1191    /// use icu::collections::codepointtrie::planes;
1192    /// use icu::collections::codepointtrie::CodePointMapRange;
1193    ///
1194    /// let planes_trie = planes::get_planes_trie();
1195    ///
1196    /// let mut ranges = planes_trie.iter_ranges();
1197    ///
1198    /// for plane in 0..=16 {
1199    ///     let exp_start = plane * 0x1_0000;
1200    ///     let exp_end = exp_start + 0xffff;
1201    ///     assert_eq!(
1202    ///         ranges.next(),
1203    ///         Some(CodePointMapRange {
1204    ///             range: exp_start..=exp_end,
1205    ///             value: plane as u8
1206    ///         })
1207    ///     );
1208    /// }
1209    ///
1210    /// // Hitting the end of the iterator returns `None`, as will subsequent
1211    /// // calls to .next().
1212    /// assert_eq!(ranges.next(), None);
1213    /// assert_eq!(ranges.next(), None);
1214    /// ```
1215    pub fn iter_ranges(&self) -> CodePointMapRangeIterator<'_, T> {
1216        let init_range = Some(CodePointMapRange {
1217            range: u32::MAX..=u32::MAX,
1218            value: self.error_value(),
1219        });
1220        CodePointMapRangeIterator::<T> {
1221            cpt: self,
1222            cpm_range: init_range,
1223        }
1224    }
1225
1226    /// Yields an [`Iterator`] returning the ranges of the code points whose values
1227    /// match `value` in the [`CodePointTrie`].
1228    ///
1229    /// # Examples
1230    ///
1231    /// ```
1232    /// use icu::collections::codepointtrie::planes;
1233    ///
1234    /// let trie = planes::get_planes_trie();
1235    ///
1236    /// let plane_val = 2;
1237    /// let mut sip_range_iter = trie.iter_ranges_for_value(plane_val as u8);
1238    ///
1239    /// let start = plane_val * 0x1_0000;
1240    /// let end = start + 0xffff;
1241    ///
1242    /// let sip_range = sip_range_iter.next()
1243    ///     .expect("Plane 2 (SIP) should exist in planes data");
1244    /// assert_eq!(start..=end, sip_range);
1245    ///
1246    /// assert!(sip_range_iter.next().is_none());
1247    pub fn iter_ranges_for_value(
1248        &self,
1249        value: T,
1250    ) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
1251        self.iter_ranges()
1252            .filter(move |cpm_range| cpm_range.value == value)
1253            .map(|cpm_range| cpm_range.range)
1254    }
1255
1256    /// Yields an [`Iterator`] returning the ranges of the code points after passing
1257    /// the value through a mapping function.
1258    ///
1259    /// This is preferable to calling `.get_ranges().map()` since it will coalesce
1260    /// adjacent ranges into one.
1261    ///
1262    /// # Examples
1263    ///
1264    /// ```
1265    /// use icu::collections::codepointtrie::planes;
1266    ///
1267    /// let trie = planes::get_planes_trie();
1268    ///
1269    /// let plane_val = 2;
1270    /// let mut sip_range_iter = trie.iter_ranges_mapped(|value| value != plane_val as u8).filter(|range| range.value);
1271    ///
1272    /// let end = plane_val * 0x1_0000 - 1;
1273    ///
1274    /// let sip_range = sip_range_iter.next()
1275    ///     .expect("Complemented planes data should have at least one entry");
1276    /// assert_eq!(0..=end, sip_range.range);
1277    pub fn iter_ranges_mapped<'a, U: Eq + 'a>(
1278        &'a self,
1279        mut map: impl FnMut(T) -> U + Copy + 'a,
1280    ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a {
1281        crate::iterator_utils::RangeListIteratorCoalescer::new(self.iter_ranges().map(
1282            move |range| CodePointMapRange {
1283                range: range.range,
1284                value: map(range.value),
1285            },
1286        ))
1287    }
1288
1289    /// Returns a [`CodePointInversionList`] for the code points that have the given
1290    /// [`TrieValue`] in the trie.
1291    ///
1292    /// ✨ *Enabled with the `alloc` Cargo feature.*
1293    ///
1294    /// # Examples
1295    ///
1296    /// ```
1297    /// use icu::collections::codepointtrie::planes;
1298    ///
1299    /// let trie = planes::get_planes_trie();
1300    ///
1301    /// let plane_val = 2;
1302    /// let sip = trie.get_set_for_value(plane_val as u8);
1303    ///
1304    /// let start = plane_val * 0x1_0000;
1305    /// let end = start + 0xffff;
1306    ///
1307    /// assert!(!sip.contains32(start - 1));
1308    /// assert!(sip.contains32(start));
1309    /// assert!(sip.contains32(end));
1310    /// assert!(!sip.contains32(end + 1));
1311    /// ```
1312    #[cfg(feature = "alloc")]
1313    pub fn get_set_for_value(&self, value: T) -> CodePointInversionList<'static> {
1314        let value_ranges = self.iter_ranges_for_value(value);
1315        CodePointInversionList::from_iter(value_ranges)
1316    }
1317
1318    /// Returns the value used as an error value for this trie
1319    #[inline]
1320    pub fn error_value(&self) -> T {
1321        self.error_value
1322    }
1323}
1324
1325#[cfg(feature = "databake")]
1326impl<T: TrieValue + databake::Bake> databake::Bake for CodePointTrie<'_, T> {
1327    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
1328        let header = self.header.bake(env);
1329        let index = self.index.bake(env);
1330        let data = self.data.bake(env);
1331        let error_value = self.error_value.bake(env);
1332        databake::quote! { unsafe { icu_collections::codepointtrie::CodePointTrie::from_parts_unstable_unchecked_v1(#header, #index, #data, #error_value) } }
1333    }
1334}
1335
1336#[cfg(feature = "databake")]
1337impl<T: TrieValue + databake::Bake> databake::BakeSize for CodePointTrie<'_, T> {
1338    fn borrows_size(&self) -> usize {
1339        self.header.borrows_size() + self.index.borrows_size() + self.data.borrows_size()
1340    }
1341}
1342
1343impl<T: TrieValue + Into<u32>> CodePointTrie<'_, T> {
1344    /// Returns the value that is associated with `code_point` for this [`CodePointTrie`]
1345    /// as a `u32`.
1346    ///
1347    /// # Examples
1348    ///
1349    /// ```
1350    /// use icu::collections::codepointtrie::planes;
1351    /// let trie = planes::get_planes_trie();
1352    ///
1353    /// let cp = '𑖎' as u32;
1354    /// assert_eq!(cp, 0x1158E);
1355    ///
1356    /// let plane_num: u8 = trie.get32(cp);
1357    /// assert_eq!(trie.get32_u32(cp), plane_num as u32);
1358    /// ```
1359    // Note: This API method maintains consistency with the corresponding
1360    // original ICU APIs.
1361    pub fn get32_u32(&self, code_point: u32) -> u32 {
1362        self.get32(code_point).into()
1363    }
1364}
1365
1366impl<T: TrieValue> Clone for CodePointTrie<'_, T>
1367where
1368    <T as zerovec::ule::AsULE>::ULE: Clone,
1369{
1370    fn clone(&self) -> Self {
1371        CodePointTrie {
1372            header: self.header,
1373            index: self.index.clone(),
1374            data: self.data.clone(),
1375            error_value: self.error_value,
1376        }
1377    }
1378}
1379
1380/// Represents a range of consecutive code points sharing the same value in a
1381/// code point map.
1382///
1383/// The start and end of the interval is represented as a
1384/// `RangeInclusive<u32>`, and the value is represented as `T`.
1385#[derive(#[automatically_derived]
impl<T: ::core::cmp::PartialEq> ::core::cmp::PartialEq for
    CodePointMapRange<T> {
    #[inline]
    fn eq(&self, other: &CodePointMapRange<T>) -> bool {
        self.range == other.range && self.value == other.value
    }
}PartialEq, #[automatically_derived]
impl<T: ::core::cmp::Eq> ::core::cmp::Eq for CodePointMapRange<T> {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<RangeInclusive<u32>>;
        let _: ::core::cmp::AssertParamIsEq<T>;
    }
}Eq, #[automatically_derived]
impl<T: ::core::fmt::Debug> ::core::fmt::Debug for CodePointMapRange<T> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field2_finish(f,
            "CodePointMapRange", "range", &self.range, "value", &&self.value)
    }
}Debug, #[automatically_derived]
impl<T: ::core::clone::Clone> ::core::clone::Clone for CodePointMapRange<T> {
    #[inline]
    fn clone(&self) -> CodePointMapRange<T> {
        CodePointMapRange {
            range: ::core::clone::Clone::clone(&self.range),
            value: ::core::clone::Clone::clone(&self.value),
        }
    }
}Clone)]
1386pub struct CodePointMapRange<T> {
1387    /// Range of code points from start to end (inclusive).
1388    pub range: RangeInclusive<u32>,
1389    /// Trie value associated with this range.
1390    pub value: T,
1391}
1392
1393/// A custom [`Iterator`] type specifically for a code point trie that returns
1394/// [`CodePointMapRange`]s.
1395pub struct CodePointMapRangeIterator<'a, T: TrieValue> {
1396    cpt: &'a CodePointTrie<'a, T>,
1397    // Initialize `range` to Some(CodePointMapRange{ start: u32::MAX, end: u32::MAX, value: 0}).
1398    // When `range` is Some(...) and has a start value different from u32::MAX, then we have
1399    // returned at least one code point range due to a call to `next()`.
1400    // When `range` == `None`, it means that we have hit the end of iteration. It would occur
1401    // after a call to `next()` returns a None <=> we attempted to call `get_range()`
1402    // with a start code point that is > CODE_POINT_MAX.
1403    cpm_range: Option<CodePointMapRange<T>>,
1404}
1405
1406impl<T: TrieValue> Iterator for CodePointMapRangeIterator<'_, T> {
1407    type Item = CodePointMapRange<T>;
1408
1409    fn next(&mut self) -> Option<Self::Item> {
1410        self.cpm_range = match &self.cpm_range {
1411            Some(cpmr) => {
1412                if *cpmr.range.start() == u32::MAX {
1413                    self.cpt.get_range(0)
1414                } else {
1415                    self.cpt.get_range(cpmr.range.end() + 1)
1416                }
1417            }
1418            None => None,
1419        };
1420        // Note: Clone is cheap. We can't Copy because RangeInclusive does not impl Copy.
1421        self.cpm_range.clone()
1422    }
1423}
1424
1425/// For sealing `TypedCodePointTrie`
1426///
1427/// # Safety Usable Invariant
1428///
1429/// All implementations of `TypedCodePointTrie` are reviewable in this module.
1430trait Seal {}
1431
1432/// Trait for writing trait bounds for monomorphizing over either
1433/// `FastCodePointTrie` or `SmallCodePointTrie`.
1434#[allow(private_bounds)] // Permit sealing
1435pub trait TypedCodePointTrie<'trie, T: TrieValue>: Seal {
1436    /// The `TrieType` associated with this `TypedCodePointTrie`
1437    ///
1438    /// # Safety Usable Invariant
1439    ///
1440    /// This constant matches `self.as_untyped_ref().header.trie_type`.
1441    const TRIE_TYPE: TrieType;
1442
1443    /// Lookup trie value as `u32` by Unicode Scalar Value without branching on trie type.
1444    #[inline(always)]
1445    fn get32_u32(&self, code_point: u32) -> u32 {
1446        self.get32(code_point).to_u32()
1447    }
1448
1449    /// Lookup trie value by Basic Multilingual Plane Code Point without branching on trie type.
1450    #[inline(always)]
1451    fn get16(&self, bmp: u16) -> T {
1452        // LLVM's optimizations have been observed not to be 100%
1453        // reliable around collapsing away unnecessary parts of
1454        // `get32`, so not just calling `get32` here.
1455        let code_point = u32::from(bmp);
1456        if let Some(v) = self.get32_by_fast_index(code_point) {
1457            v
1458        } else {
1459            self.as_untyped_ref().get32_by_small_index_cold(code_point)
1460        }
1461    }
1462
1463    /// Lookup trie value by non-Basic Multilingual Plane Scalar Value without branching on trie type.
1464    #[inline(always)]
1465    fn get32_supplementary(&self, supplementary: u32) -> T {
1466        self.as_untyped_ref().get32_supplementary(supplementary)
1467    }
1468
1469    /// Lookup trie value by Unicode Scalar Value without branching on trie type.
1470    #[inline(always)]
1471    fn get(&self, c: char) -> T {
1472        // LLVM's optimizations have been observed not to be 100%
1473        // reliable around collapsing away unnecessary parts of
1474        // `get32`, so not just calling `get32` here.
1475        let code_point = u32::from(c);
1476        if let Some(v) = self.get32_by_fast_index(code_point) {
1477            v
1478        } else {
1479            self.as_untyped_ref().get32_by_small_index_cold(code_point)
1480        }
1481    }
1482
1483    /// Lookup trie value by Unicode Code Point without branching on trie type.
1484    #[inline(always)]
1485    fn get32(&self, code_point: u32) -> T {
1486        if let Some(v) = self.get32_by_fast_index(code_point) {
1487            v
1488        } else if code_point <= CODE_POINT_MAX {
1489            self.as_untyped_ref().get32_by_small_index_cold(code_point)
1490        } else {
1491            self.as_untyped_ref().error_value
1492        }
1493    }
1494
1495    /// Returns the value that is associated with `code_point` in this [`CodePointTrie`]
1496    /// if `code_point` uses fast-path lookup or `None` if `code_point`
1497    /// should use small-path lookup or is above the supported range.
1498    #[inline(always)] // "always" to make the `Option` collapse away
1499    fn get32_by_fast_index(&self, code_point: u32) -> Option<T> {
1500        if true {
    match (&Self::TRIE_TYPE, &self.as_untyped_ref().header.trie_type) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(Self::TRIE_TYPE, self.as_untyped_ref().header.trie_type);
1501        let fast_max = match Self::TRIE_TYPE {
1502            TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
1503            TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
1504        };
1505        if code_point <= fast_max {
1506            // SAFETY: We just checked the invariant of
1507            // `get32_assuming_fast_index`,
1508            // which is
1509            // "If `self.header.trie_type == TrieType::Small`, `code_point` must be at most
1510            // `SMALL_TYPE_FAST_INDEXING_MAX`. If `self.header.trie_type ==
1511            // TrieType::Fast`, `code_point` must be at most `FAST_TYPE_FAST_INDEXING_MAX`."
1512            // ... assuming that `Self::TRIE_TYPE` always matches
1513            // `self.as_untyped_ref().header.trie_type`, i.e. we're relying on
1514            // `CodePointTrie::to_typed` and `CodePointTrie::as_typed_ref` being correct
1515            // and the exclusive ways of obtaining `Self`.
1516            Some(unsafe { self.as_untyped_ref().get32_assuming_fast_index(code_point) })
1517        } else {
1518            // The caller needs to call `get32_by_small_index` or determine
1519            // that the argument is above the permitted range.
1520            None
1521        }
1522    }
1523
1524    /// Returns a reference to the wrapped `CodePointTrie`.
1525    fn as_untyped_ref(&self) -> &CodePointTrie<'trie, T>;
1526
1527    /// Extracts the wrapped `CodePointTrie`.
1528    fn to_untyped(self) -> CodePointTrie<'trie, T>;
1529}
1530
1531/// Type-safe wrapper for a fast trie guaranteeing
1532/// the the getters don't branch on the trie type
1533/// and for guarenteeing that `get16` is branchless
1534/// in release builds.
1535#[derive(#[automatically_derived]
impl<'trie, T: ::core::fmt::Debug + TrieValue> ::core::fmt::Debug for
    FastCodePointTrie<'trie, T> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field1_finish(f,
            "FastCodePointTrie", "inner", &&self.inner)
    }
}Debug)]
1536#[repr(transparent)]
1537pub struct FastCodePointTrie<'trie, T: TrieValue> {
1538    inner: CodePointTrie<'trie, T>,
1539}
1540
1541impl<'trie, T: TrieValue> TypedCodePointTrie<'trie, T> for FastCodePointTrie<'trie, T> {
1542    const TRIE_TYPE: TrieType = TrieType::Fast;
1543
1544    /// Returns a reference to the wrapped `CodePointTrie`.
1545    #[inline(always)]
1546    fn as_untyped_ref(&self) -> &CodePointTrie<'trie, T> {
1547        &self.inner
1548    }
1549
1550    /// Extracts the wrapped `CodePointTrie`.
1551    #[inline(always)]
1552    fn to_untyped(self) -> CodePointTrie<'trie, T> {
1553        self.inner
1554    }
1555
1556    /// Lookup trie value by Basic Multilingual Plane Code Point without branching on trie type.
1557    #[inline(always)]
1558    fn get16(&self, bmp: u16) -> T {
1559        if true {
    if !(u32::from(u16::MAX) <= FAST_TYPE_FAST_INDEXING_MAX) {
        ::core::panicking::panic("assertion failed: u32::from(u16::MAX) <= FAST_TYPE_FAST_INDEXING_MAX")
    };
};debug_assert!(u32::from(u16::MAX) <= FAST_TYPE_FAST_INDEXING_MAX);
1560        if true {
    match (&Self::TRIE_TYPE, &TrieType::Fast) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(Self::TRIE_TYPE, TrieType::Fast);
1561        if true {
    match (&self.as_untyped_ref().header.trie_type, &TrieType::Fast) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(self.as_untyped_ref().header.trie_type, TrieType::Fast);
1562        let code_point = u32::from(bmp);
1563        // SAFETY: With `TrieType::Fast`, the `u16` range satisfies
1564        // the invariant of `get32_assuming_fast_index`, which is
1565        // "If `self.header.trie_type == TrieType::Small`, `code_point` must be at most
1566        // `SMALL_TYPE_FAST_INDEXING_MAX`. If `self.header.trie_type ==
1567        // TrieType::Fast`, `code_point` must be at most `FAST_TYPE_FAST_INDEXING_MAX`."
1568        //
1569        // We're relying on `CodePointTrie::to_typed` and `CodePointTrie::as_typed_ref`
1570        // being correct and the exclusive ways of obtaining `Self`.
1571        unsafe { self.as_untyped_ref().get32_assuming_fast_index(code_point) }
1572    }
1573}
1574
1575impl<'trie, T: TrieValue> Seal for FastCodePointTrie<'trie, T> {}
1576
1577impl<'trie, T: TrieValue> TryFrom<&'trie CodePointTrie<'trie, T>>
1578    for &'trie FastCodePointTrie<'trie, T>
1579{
1580    type Error = TypedCodePointTrieError;
1581
1582    fn try_from(
1583        reference: &'trie CodePointTrie<'trie, T>,
1584    ) -> Result<&'trie FastCodePointTrie<'trie, T>, TypedCodePointTrieError> {
1585        match reference.as_typed_ref() {
1586            Typed::Fast(trie) => Ok(trie),
1587            Typed::Small(_) => Err(TypedCodePointTrieError),
1588        }
1589    }
1590}
1591
1592impl<'trie, T: TrieValue> TryFrom<CodePointTrie<'trie, T>> for FastCodePointTrie<'trie, T> {
1593    type Error = TypedCodePointTrieError;
1594
1595    fn try_from(
1596        value: CodePointTrie<'trie, T>,
1597    ) -> Result<FastCodePointTrie<'trie, T>, TypedCodePointTrieError> {
1598        match value.to_typed() {
1599            Typed::Fast(trie) => Ok(trie),
1600            Typed::Small(_) => Err(TypedCodePointTrieError),
1601        }
1602    }
1603}
1604
1605/// Type-safe wrapper for a small trie guaranteeing
1606/// the the getters don't branch on the trie type.
1607#[derive(#[automatically_derived]
impl<'trie, T: ::core::fmt::Debug + TrieValue> ::core::fmt::Debug for
    SmallCodePointTrie<'trie, T> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field1_finish(f,
            "SmallCodePointTrie", "inner", &&self.inner)
    }
}Debug)]
1608#[repr(transparent)]
1609pub struct SmallCodePointTrie<'trie, T: TrieValue> {
1610    inner: CodePointTrie<'trie, T>,
1611}
1612
1613impl<'trie, T: TrieValue> TypedCodePointTrie<'trie, T> for SmallCodePointTrie<'trie, T> {
1614    const TRIE_TYPE: TrieType = TrieType::Small;
1615
1616    /// Returns a reference to the wrapped `CodePointTrie`.
1617    #[inline(always)]
1618    fn as_untyped_ref(&self) -> &CodePointTrie<'trie, T> {
1619        &self.inner
1620    }
1621
1622    /// Extracts the wrapped `CodePointTrie`.
1623    #[inline(always)]
1624    fn to_untyped(self) -> CodePointTrie<'trie, T> {
1625        self.inner
1626    }
1627}
1628
1629impl<'trie, T: TrieValue> Seal for SmallCodePointTrie<'trie, T> {}
1630
1631impl<'trie, T: TrieValue> TryFrom<&'trie CodePointTrie<'trie, T>>
1632    for &'trie SmallCodePointTrie<'trie, T>
1633{
1634    type Error = TypedCodePointTrieError;
1635
1636    fn try_from(
1637        reference: &'trie CodePointTrie<'trie, T>,
1638    ) -> Result<&'trie SmallCodePointTrie<'trie, T>, TypedCodePointTrieError> {
1639        match reference.as_typed_ref() {
1640            Typed::Fast(_) => Err(TypedCodePointTrieError),
1641            Typed::Small(trie) => Ok(trie),
1642        }
1643    }
1644}
1645
1646impl<'trie, T: TrieValue> TryFrom<CodePointTrie<'trie, T>> for SmallCodePointTrie<'trie, T> {
1647    type Error = TypedCodePointTrieError;
1648
1649    fn try_from(
1650        value: CodePointTrie<'trie, T>,
1651    ) -> Result<SmallCodePointTrie<'trie, T>, TypedCodePointTrieError> {
1652        match value.to_typed() {
1653            Typed::Fast(_) => Err(TypedCodePointTrieError),
1654            Typed::Small(trie) => Ok(trie),
1655        }
1656    }
1657}
1658
1659/// Error indicating that the `TrieType` of an untyped trie
1660/// does not match the requested typed trie type.
1661#[derive(#[automatically_derived]
impl ::core::fmt::Debug for TypedCodePointTrieError {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f, "TypedCodePointTrieError")
    }
}Debug)]
1662#[non_exhaustive]
1663pub struct TypedCodePointTrieError;
1664
1665/// Holder for either fast or small trie with the trie
1666/// type encoded into the Rust type.
1667pub enum Typed<F, S> {
1668    /// The trie type is fast.
1669    Fast(F),
1670    /// The trie type is small.
1671    Small(S),
1672}
1673
1674#[cfg(test)]
1675mod tests {
1676    use super::*;
1677    use crate::codepointtrie::planes;
1678    use alloc::vec::Vec;
1679
1680    #[test]
1681    #[cfg(feature = "serde")]
1682    fn test_serde_with_postcard_roundtrip() -> Result<(), postcard::Error> {
1683        let trie = crate::codepointtrie::planes::get_planes_trie();
1684        let trie_serialized: Vec<u8> = postcard::to_allocvec(&trie).unwrap();
1685
1686        // Assert an expected (golden data) version of the serialized trie.
1687        const EXP_TRIE_SERIALIZED: &[u8] = &[
1688            128, 128, 64, 128, 2, 2, 0, 0, 1, 160, 18, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1689            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1690            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1691            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1692            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136,
1693            2, 144, 2, 144, 2, 144, 2, 176, 2, 176, 2, 176, 2, 176, 2, 208, 2, 208, 2, 208, 2, 208,
1694            2, 240, 2, 240, 2, 240, 2, 240, 2, 16, 3, 16, 3, 16, 3, 16, 3, 48, 3, 48, 3, 48, 3, 48,
1695            3, 80, 3, 80, 3, 80, 3, 80, 3, 112, 3, 112, 3, 112, 3, 112, 3, 144, 3, 144, 3, 144, 3,
1696            144, 3, 176, 3, 176, 3, 176, 3, 176, 3, 208, 3, 208, 3, 208, 3, 208, 3, 240, 3, 240, 3,
1697            240, 3, 240, 3, 16, 4, 16, 4, 16, 4, 16, 4, 48, 4, 48, 4, 48, 4, 48, 4, 80, 4, 80, 4,
1698            80, 4, 80, 4, 112, 4, 112, 4, 112, 4, 112, 4, 0, 0, 16, 0, 32, 0, 48, 0, 64, 0, 80, 0,
1699            96, 0, 112, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32,
1700            0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48,
1701            0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 128, 0, 128, 0, 128, 0, 128,
1702            0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
1703            0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
1704            0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144,
1705            0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144,
1706            0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144,
1707            0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160,
1708            0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160,
1709            0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160,
1710            0, 160, 0, 160, 0, 160, 0, 160, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176,
1711            0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176,
1712            0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176,
1713            0, 176, 0, 176, 0, 176, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192,
1714            0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192,
1715            0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192,
1716            0, 192, 0, 192, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208,
1717            0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208,
1718            0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208,
1719            0, 208, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224,
1720            0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224,
1721            0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224,
1722            0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240,
1723            0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240,
1724            0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 0,
1725            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
1726            0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1727            1, 0, 1, 0, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1,
1728            16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16,
1729            1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 32, 1, 32, 1, 32, 1,
1730            32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32,
1731            1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1,
1732            32, 1, 32, 1, 32, 1, 32, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48,
1733            1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1,
1734            48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 64, 1, 64,
1735            1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1,
1736            64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64,
1737            1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1,
1738            80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80,
1739            1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1,
1740            96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96,
1741            1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1,
1742            96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 128, 0, 136, 0, 136, 0, 136, 0, 136,
1743            0, 136, 0, 136, 0, 136, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0,
1744            2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2,
1745            0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0,
1746            168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0,
1747            168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0,
1748            168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0,
1749            200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0,
1750            200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0,
1751            200, 0, 200, 0, 200, 0, 200, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0,
1752            232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0,
1753            232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0,
1754            232, 0, 232, 0, 232, 0, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8,
1755            1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1,
1756            8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40,
1757            1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1,
1758            40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40,
1759            1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1,
1760            72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72,
1761            1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 104, 1, 104, 1, 104, 1, 104, 1,
1762            104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1,
1763            104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1,
1764            104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1,
1765            136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1,
1766            136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1,
1767            136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1,
1768            168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1,
1769            168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1,
1770            168, 1, 168, 1, 168, 1, 168, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1,
1771            200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1,
1772            200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1,
1773            200, 1, 200, 1, 200, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1,
1774            232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1,
1775            232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1,
1776            232, 1, 232, 1, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2,
1777            8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8,
1778            2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40,
1779            2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2,
1780            40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 72,
1781            2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2,
1782            72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72,
1783            2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2,
1784            104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2,
1785            104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2,
1786            104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 244, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1787            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1788            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1789            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1790            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1791            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1792            2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1793            4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
1794            6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
1795            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10,
1796            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11,
1797            11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
1798            12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14,
1799            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1800            15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 0,
1801        ];
1802        assert_eq!(trie_serialized, EXP_TRIE_SERIALIZED);
1803
1804        let trie_deserialized = postcard::from_bytes::<CodePointTrie<u8>>(&trie_serialized)?;
1805
1806        assert_eq!(&trie.index, &trie_deserialized.index);
1807        assert_eq!(&trie.data, &trie_deserialized.data);
1808
1809        assert!(!trie_deserialized.index.is_owned());
1810        assert!(!trie_deserialized.data.is_owned());
1811
1812        Ok(())
1813    }
1814
1815    #[test]
1816    fn test_typed() {
1817        let untyped = planes::get_planes_trie();
1818        assert_eq!(untyped.get('\u{10000}'), 1);
1819        let small_ref = <&SmallCodePointTrie<_>>::try_from(&untyped).unwrap();
1820        assert_eq!(small_ref.get('\u{10000}'), 1);
1821        let _ = <&FastCodePointTrie<_>>::try_from(&untyped).is_err();
1822        let small = <SmallCodePointTrie<_>>::try_from(untyped).unwrap();
1823        assert_eq!(small.get('\u{10000}'), 1);
1824    }
1825
1826    #[test]
1827    fn test_get_range() {
1828        let planes_trie = planes::get_planes_trie();
1829
1830        let first_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0x0);
1831        assert_eq!(
1832            first_range,
1833            Some(CodePointMapRange {
1834                range: 0x0..=0xffff,
1835                value: 0
1836            })
1837        );
1838
1839        let second_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0x1_0000);
1840        assert_eq!(
1841            second_range,
1842            Some(CodePointMapRange {
1843                range: 0x10000..=0x1ffff,
1844                value: 1
1845            })
1846        );
1847
1848        let penultimate_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0xf_0000);
1849        assert_eq!(
1850            penultimate_range,
1851            Some(CodePointMapRange {
1852                range: 0xf_0000..=0xf_ffff,
1853                value: 15
1854            })
1855        );
1856
1857        let last_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0x10_0000);
1858        assert_eq!(
1859            last_range,
1860            Some(CodePointMapRange {
1861                range: 0x10_0000..=0x10_ffff,
1862                value: 16
1863            })
1864        );
1865    }
1866
1867    #[test]
1868    #[allow(unused_unsafe)] // `unsafe` below is both necessary and unnecessary
1869    fn databake() {
1870        databake::test_bake!(
1871            CodePointTrie<'static, u32>,
1872            const,
1873            unsafe {
1874                crate::codepointtrie::CodePointTrie::from_parts_unstable_unchecked_v1(
1875                    crate::codepointtrie::CodePointTrieHeader {
1876                        high_start: 1u32,
1877                        shifted12_high_start: 2u16,
1878                        index3_null_offset: 3u16,
1879                        data_null_offset: 4u32,
1880                        null_value: 5u32,
1881                        trie_type: crate::codepointtrie::TrieType::Small,
1882                    },
1883                    zerovec::ZeroVec::new(),
1884                    zerovec::ZeroVec::new(),
1885                    0u32,
1886                )
1887            },
1888            icu_collections,
1889            [zerovec],
1890        );
1891    }
1892}