icu_collections/codepointinvlist/
cpinvlist.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5#[cfg(feature = "serde")]
6use alloc::format;
7#[cfg(feature = "serde")]
8use alloc::string::String;
9#[cfg(feature = "alloc")]
10use alloc::vec::Vec;
11use core::{char, ops::RangeBounds, ops::RangeInclusive};
12use potential_utf::PotentialCodePoint;
13use yoke::Yokeable;
14use zerofrom::ZeroFrom;
15use zerovec::{ule::AsULE, zerovec, ZeroVec};
16
17use super::InvalidSetError;
18use crate::codepointinvlist::utils::{deconstruct_range, is_valid_zv};
19
20/// Represents the end code point of the Basic Multilingual Plane range, starting from code point 0, inclusive
21const BMP_MAX: u32 = 0xFFFF;
22
23/// Represents the inversion list for a set of all code points in the Basic Multilingual Plane.
24const BMP_INV_LIST_VEC: ZeroVec<PotentialCodePoint> = ::zerovec::ZeroSlice::<PotentialCodePoint>::from_ule_slice(const {
                &[PotentialCodePoint::to_unaligned(PotentialCodePoint::from_u24(0x0)),
                            PotentialCodePoint::to_unaligned(PotentialCodePoint::from_u24(BMP_MAX
                                        + 1))]
            }).as_zerovec()zerovec!(PotentialCodePoint; PotentialCodePoint::to_unaligned; [PotentialCodePoint::from_u24(0x0), PotentialCodePoint::from_u24(BMP_MAX + 1)]);
25
26/// Represents the inversion list for all of the code points in the Unicode range.
27const ALL_VEC: ZeroVec<PotentialCodePoint> = ::zerovec::ZeroSlice::<PotentialCodePoint>::from_ule_slice(const {
                &[PotentialCodePoint::to_unaligned(PotentialCodePoint::from_u24(0x0)),
                            PotentialCodePoint::to_unaligned(PotentialCodePoint::from_u24((char::MAX
                                                as u32) + 1))]
            }).as_zerovec()zerovec!(PotentialCodePoint; PotentialCodePoint::to_unaligned; [PotentialCodePoint::from_u24(0x0), PotentialCodePoint::from_u24((char::MAX as u32) + 1)]);
28
29/// A membership wrapper for [`CodePointInversionList`].
30///
31/// Provides exposure to membership functions and constructors from serialized `CodePointSet`s (sets of code points)
32/// and predefined ranges.
33impl core::fmt::Debug for CodePointInversionListULE {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        let this =
            <CodePointInversionList as
                    zerovec::__zerovec_internal_reexport::ZeroFrom<CodePointInversionListULE>>::zero_from(self);
        <CodePointInversionList as core::fmt::Debug>::fmt(&this, f)
    }
}#[zerovec::make_varule(CodePointInversionListULE)]
34#[zerovec::skip_derive(Ord)]
35#[zerovec::derive(Debug)]
36#[derive(#[automatically_derived]
impl<'data> ::core::fmt::Debug for CodePointInversionList<'data> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field2_finish(f,
            "CodePointInversionList", "inv_list", &self.inv_list, "size",
            &&self.size)
    }
}Debug, #[automatically_derived]
impl<'data> ::core::cmp::Eq for CodePointInversionList<'data> {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _:
                ::core::cmp::AssertParamIsEq<ZeroVec<'data,
                PotentialCodePoint>>;
        let _: ::core::cmp::AssertParamIsEq<u32>;
    }
}Eq, #[automatically_derived]
impl<'data> ::core::cmp::PartialEq for CodePointInversionList<'data> {
    #[inline]
    fn eq(&self, other: &CodePointInversionList<'data>) -> bool {
        self.size == other.size && self.inv_list == other.inv_list
    }
}PartialEq, #[automatically_derived]
impl<'data> ::core::clone::Clone for CodePointInversionList<'data> {
    #[inline]
    fn clone(&self) -> CodePointInversionList<'data> {
        CodePointInversionList {
            inv_list: ::core::clone::Clone::clone(&self.inv_list),
            size: ::core::clone::Clone::clone(&self.size),
        }
    }
}Clone, unsafe impl<'a> yoke::Yokeable<'a> for CodePointInversionList<'static> where
    {
    type Output = CodePointInversionList<'a>;
    #[inline]
    fn transform(&'a self) -> &'a Self::Output { self }
    #[inline]
    fn transform_owned(self) -> Self::Output { self }
    #[inline]
    unsafe fn make(this: Self::Output) -> Self {
        use core::{mem, ptr};
        if true {
            if !(mem::size_of::<Self::Output>() == mem::size_of::<Self>()) {
                ::core::panicking::panic("assertion failed: mem::size_of::<Self::Output>() == mem::size_of::<Self>()")
            };
        };
        let ptr: *const Self = (&this as *const Self::Output).cast();

        #[allow(forgetting_copy_types, clippy :: forget_copy, clippy ::
        forget_non_drop, clippy :: mem_forget)]
        mem::forget(this);
        ptr::read(ptr)
    }
    #[inline]
    fn transform_mut<F>(&'a mut self, f: F) where F: 'static +
        for<'b> FnOnce(&'b mut Self::Output) {
        unsafe {
            f(core::mem::transmute::<&'a mut Self,
                        &'a mut Self::Output>(self))
        }
    }
}Yokeable, impl<'zf, 'zf_inner>
    zerofrom::ZeroFrom<'zf, CodePointInversionList<'zf_inner>> for
    CodePointInversionList<'zf> where  {
    fn zero_from(this: &'zf CodePointInversionList<'zf_inner>) -> Self {
        match *this {
            CodePointInversionList {
                inv_list: ref __binding_0, size: ref __binding_1 } => {
                CodePointInversionList {
                    inv_list: <ZeroVec<'zf, PotentialCodePoint> as
                            zerofrom::ZeroFrom<'zf,
                            ZeroVec<'zf_inner,
                            PotentialCodePoint>>>::zero_from(__binding_0),
                    size: *__binding_1,
                }
            }
        }
    }
}ZeroFrom)]
37#[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV, ToOwned))]
38pub struct CodePointInversionList<'data> {
39    // If we wanted to use an array to keep the memory on the stack, there is an unsafe nightly feature
40    // https://doc.rust-lang.org/nightly/core/array/trait.FixedSizeArray.html
41    // Allows for traits of fixed size arrays
42
43    // Implements an [inversion list.](https://en.wikipedia.org/wiki/Inversion_list)
44    inv_list: ZeroVec<'data, PotentialCodePoint>,
45    size: u32,
46}
47
48#[cfg(feature = "serde")]
49impl<'de: 'a, 'a> serde::Deserialize<'de> for CodePointInversionList<'a> {
50    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
51    where
52        D: serde::Deserializer<'de>,
53    {
54        use serde::de::Error;
55
56        let parsed_inv_list = if deserializer.is_human_readable() {
57            let parsed_strings = Vec::<alloc::borrow::Cow<'de, str>>::deserialize(deserializer)?;
58            let mut inv_list = ZeroVec::new_owned(Vec::with_capacity(parsed_strings.len() * 2));
59            for range in parsed_strings {
60                fn internal(range: &str) -> Option<(u32, u32)> {
61                    let (start, range) = UnicodeCodePoint::parse(range)?;
62                    if range.is_empty() {
63                        return Some((start.0, start.0));
64                    }
65                    let (hyphen, range) = UnicodeCodePoint::parse(range)?;
66                    if hyphen.0 != '-' as u32 {
67                        return None;
68                    }
69                    let (end, range) = UnicodeCodePoint::parse(range)?;
70                    range.is_empty().then_some((start.0, end.0))
71                }
72                let (start, end) = internal(&range).ok_or_else(|| Error::custom(format!(
73                    "Cannot deserialize invalid inversion list for CodePointInversionList: {range:?}"
74                )))?;
75                inv_list.with_mut(|v| {
76                    v.push(PotentialCodePoint::from_u24(start).to_unaligned());
77                    v.push(PotentialCodePoint::from_u24(end + 1).to_unaligned());
78                });
79            }
80            inv_list
81        } else {
82            ZeroVec::<PotentialCodePoint>::deserialize(deserializer)?
83        };
84        CodePointInversionList::try_from_inversion_list(parsed_inv_list).map_err(|e| {
85            Error::custom(format!(
86                "Cannot deserialize invalid inversion list for CodePointInversionList: {e:?}"
87            ))
88        })
89    }
90}
91
92#[cfg(feature = "databake")]
93impl databake::Bake for CodePointInversionList<'_> {
94    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
95        env.insert("icu_collections");
96        let inv_list = self.inv_list.bake(env);
97        let size = self.size.bake(env);
98        // Safe because our parts are safe.
99        databake::quote! { unsafe {
100            #[allow(unused_unsafe)]
101            icu_collections::codepointinvlist::CodePointInversionList::from_parts_unchecked(#inv_list, #size)
102        }}
103    }
104}
105
106#[cfg(feature = "databake")]
107impl databake::BakeSize for CodePointInversionList<'_> {
108    fn borrows_size(&self) -> usize {
109        self.inv_list.borrows_size()
110    }
111}
112
113#[cfg(feature = "serde")]
114#[derive(Debug, Copy, Clone)]
115struct UnicodeCodePoint(u32);
116
117#[cfg(feature = "serde")]
118impl UnicodeCodePoint {
119    fn from_u32(cp: u32) -> Result<Self, String> {
120        if cp <= char::MAX as u32 {
121            Ok(Self(cp))
122        } else {
123            Err(format!("Not a Unicode code point {cp}"))
124        }
125    }
126
127    fn parse(value: &str) -> Option<(Self, &str)> {
128        Some(if let Some(hex) = value.strip_prefix("U+") {
129            let (escape, remainder) = (hex.get(..4)?, hex.get(4..)?);
130            (Self(u32::from_str_radix(escape, 16).ok()?), remainder)
131        } else {
132            let c = value.chars().next()?;
133            (Self(c as u32), value.get(c.len_utf8()..)?)
134        })
135    }
136}
137
138#[cfg(feature = "serde")]
139impl core::fmt::Display for UnicodeCodePoint {
140    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
141        match self.0 {
142            s @ 0xD800..=0xDFFF => write!(f, "U+{s:X}"),
143            // char should be in range by construction but this code is not so performance-sensitive
144            // so we just use the replacement character
145            c => write!(
146                f,
147                "{}",
148                char::from_u32(c).unwrap_or(char::REPLACEMENT_CHARACTER)
149            ),
150        }
151    }
152}
153
154#[cfg(feature = "serde")]
155impl serde::Serialize for CodePointInversionList<'_> {
156    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
157    where
158        S: serde::Serializer,
159    {
160        if serializer.is_human_readable() {
161            use serde::ser::Error;
162            use serde::ser::SerializeSeq;
163            let mut seq = serializer.serialize_seq(Some(self.inv_list.len() / 2))?;
164            for range in self.iter_ranges() {
165                let start = UnicodeCodePoint::from_u32(*range.start()).map_err(S::Error::custom)?;
166                if range.start() == range.end() {
167                    seq.serialize_element(&format!("{start}"))?;
168                } else {
169                    let end = UnicodeCodePoint::from_u32(*range.end()).map_err(S::Error::custom)?;
170                    seq.serialize_element(&format!("{start}-{end}",))?;
171                }
172            }
173            seq.end()
174        } else {
175            // Note: serde(flatten) currently does not promote a struct field of type Vec
176            // to replace the struct when serializing. The error message from the default
177            // serialization is: "can only flatten structs and maps (got a sequence)".
178            self.inv_list.serialize(serializer)
179        }
180    }
181}
182
183impl<'data> CodePointInversionList<'data> {
184    /// Returns a new [`CodePointInversionList`] from an [inversion list](https://en.wikipedia.org/wiki/Inversion_list)
185    /// represented as a [`ZeroVec`]`<`[`PotentialCodePoint`]`>` of code points.
186    ///
187    /// The inversion list must be of even length, sorted ascending non-overlapping,
188    /// and within the bounds of `0x0 -> 0x10FFFF` inclusive, and end points being exclusive.
189    ///
190    /// # Examples
191    ///
192    /// ```
193    /// use icu::collections::codepointinvlist::CodePointInversionList;
194    /// use icu::collections::codepointinvlist::InvalidSetError;
195    /// use potential_utf::PotentialCodePoint;
196    /// use zerovec::ZeroVec;
197    ///
198    /// let valid = [0x0, 0x10000];
199    /// let inv_list: ZeroVec<PotentialCodePoint> = valid
200    ///     .into_iter()
201    ///     .map(PotentialCodePoint::from_u24)
202    ///     .collect();
203    /// let result = CodePointInversionList::try_from_inversion_list(inv_list);
204    /// assert!(matches!(result, CodePointInversionList));
205    ///
206    /// let invalid = vec![0x0, 0x80, 0x3];
207    /// let inv_list: ZeroVec<PotentialCodePoint> = invalid
208    ///     .iter()
209    ///     .copied()
210    ///     .map(PotentialCodePoint::from_u24)
211    ///     .collect();
212    /// let result = CodePointInversionList::try_from_inversion_list(inv_list);
213    /// assert!(matches!(result, Err(InvalidSetError(_))));
214    /// if let Err(InvalidSetError(actual)) = result {
215    ///     assert_eq!(
216    ///         &invalid,
217    ///         &actual.into_iter().map(u32::from).collect::<Vec<_>>()
218    ///     );
219    /// }
220    /// ```
221    pub fn try_from_inversion_list(
222        inv_list: ZeroVec<'data, PotentialCodePoint>,
223    ) -> Result<Self, InvalidSetError> {
224        #[expect(clippy::indexing_slicing)] // chunks
225        if is_valid_zv(&inv_list) {
226            let size = inv_list
227                .as_ule_slice()
228                .chunks(2)
229                .map(|end_points| {
230                    u32::from(<PotentialCodePoint as AsULE>::from_unaligned(end_points[1]))
231                        - u32::from(<PotentialCodePoint as AsULE>::from_unaligned(end_points[0]))
232                })
233                .sum::<u32>();
234            Ok(Self { inv_list, size })
235        } else {
236            Err(InvalidSetError(
237                #[cfg(feature = "alloc")]
238                inv_list.to_vec(),
239            ))
240        }
241    }
242
243    /// Safety: no actual safety invariants, however has correctness invariants
244    #[doc(hidden)] // databake internal
245    pub const unsafe fn from_parts_unchecked(
246        inv_list: ZeroVec<'data, PotentialCodePoint>,
247        size: u32,
248    ) -> Self {
249        Self { inv_list, size }
250    }
251
252    /// Returns a new, fully-owned [`CodePointInversionList`] by cloning an [inversion list](https://en.wikipedia.org/wiki/Inversion_list)
253    /// represented as a slice of [`PotentialCodePoint`] code points.
254    ///
255    /// The inversion list must be of even length, sorted ascending non-overlapping,
256    /// and within the bounds of `0x0 -> 0x10FFFF` inclusive, and end points being exclusive.
257    ///
258    /// ✨ *Enabled with the `alloc` Cargo feature.*
259    ///
260    /// # Examples
261    ///
262    /// ```
263    /// use icu::collections::codepointinvlist::CodePointInversionList;
264    ///
265    /// let bmp_list = &[0x0, 0x10000];
266    /// let smp_list = &[0x10000, 0x20000];
267    /// let sip_list = &[0x20000, 0x30000];
268    ///
269    /// let lists: Vec<CodePointInversionList> =
270    ///     [&bmp_list[..], smp_list, sip_list]
271    ///         .into_iter()
272    ///         .map(|l| {
273    ///             CodePointInversionList::try_from_u32_inversion_list_slice(l)
274    ///                 .unwrap()
275    ///         })
276    ///         .collect();
277    ///
278    /// let bmp = &lists[0];
279    /// assert!(bmp.contains32(0xFFFF));
280    /// assert!(!bmp.contains32(0x10000));
281    ///
282    /// assert!(!lists.iter().any(|set| set.contains32(0x40000)));
283    /// ```
284    #[cfg(feature = "alloc")]
285    pub fn try_from_u32_inversion_list_slice(inv_list: &[u32]) -> Result<Self, InvalidSetError> {
286        let inv_list_zv: ZeroVec<PotentialCodePoint> = inv_list
287            .iter()
288            .copied()
289            .map(PotentialCodePoint::from_u24)
290            .collect();
291        CodePointInversionList::try_from_inversion_list(inv_list_zv)
292    }
293
294    /// Attempts to convert this list into a fully-owned one. No-op if already fully owned
295    ///
296    /// ✨ *Enabled with the `alloc` Cargo feature.*
297    #[cfg(feature = "alloc")]
298    pub fn into_owned(self) -> CodePointInversionList<'static> {
299        CodePointInversionList {
300            inv_list: self.inv_list.into_owned(),
301            size: self.size,
302        }
303    }
304
305    /// Returns an owned inversion list representing the current [`CodePointInversionList`]
306    ///
307    /// ✨ *Enabled with the `alloc` Cargo feature.*
308    #[cfg(feature = "alloc")]
309    pub fn get_inversion_list_vec(&self) -> Vec<u32> {
310        self.as_inversion_list().iter().map(u32::from).collect()
311    }
312
313    /// Returns [`CodePointInversionList`] spanning entire Unicode range
314    ///
315    /// The range spans from `0x0 -> 0x10FFFF` inclusive.
316    ///  
317    /// # Examples
318    ///
319    /// ```
320    /// use icu::collections::codepointinvlist::CodePointInversionList;
321    ///
322    /// let expected = [0x0, (char::MAX as u32) + 1];
323    /// assert_eq!(
324    ///     CodePointInversionList::all().get_inversion_list_vec(),
325    ///     expected
326    /// );
327    /// assert_eq!(
328    ///     CodePointInversionList::all().size(),
329    ///     (expected[1] - expected[0]) as usize
330    /// );
331    /// ```
332    pub fn all() -> Self {
333        Self {
334            inv_list: ALL_VEC,
335            size: (char::MAX as u32) + 1,
336        }
337    }
338
339    /// Returns [`CodePointInversionList`] spanning BMP range
340    ///
341    /// The range spans from `0x0 -> 0xFFFF` inclusive.
342    ///
343    /// # Examples
344    ///
345    /// ```
346    /// use icu::collections::codepointinvlist::CodePointInversionList;
347    ///
348    /// const BMP_MAX: u32 = 0xFFFF;
349    ///
350    /// let expected = [0x0, BMP_MAX + 1];
351    /// assert_eq!(
352    ///     CodePointInversionList::bmp().get_inversion_list_vec(),
353    ///     expected
354    /// );
355    /// assert_eq!(
356    ///     CodePointInversionList::bmp().size(),
357    ///     (expected[1] - expected[0]) as usize
358    /// );
359    /// ```
360    pub fn bmp() -> Self {
361        Self {
362            inv_list: BMP_INV_LIST_VEC,
363            size: BMP_MAX + 1,
364        }
365    }
366
367    /// Returns the inversion list as a slice
368    ///
369    /// Public only to the crate, not exposed to public
370    #[cfg(feature = "alloc")]
371    pub(crate) fn as_inversion_list(&self) -> &ZeroVec<'_, PotentialCodePoint> {
372        &self.inv_list
373    }
374
375    /// Yields an [`Iterator`] going through the character set in the [`CodePointInversionList`]
376    ///
377    /// # Examples
378    ///
379    /// ```
380    /// use icu::collections::codepointinvlist::CodePointInversionList;
381    /// let example_list = [0x41, 0x44, 0x45, 0x46];
382    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
383    ///     &example_list,
384    /// )
385    /// .unwrap();
386    /// let mut ex_iter_chars = example.iter_chars();
387    /// assert_eq!(Some('A'), ex_iter_chars.next());
388    /// assert_eq!(Some('B'), ex_iter_chars.next());
389    /// assert_eq!(Some('C'), ex_iter_chars.next());
390    /// assert_eq!(Some('E'), ex_iter_chars.next());
391    /// assert_eq!(None, ex_iter_chars.next());
392    /// ```
393    pub fn iter_chars(&self) -> impl Iterator<Item = char> + '_ {
394        #[expect(clippy::indexing_slicing)] // chunks
395        self.inv_list
396            .as_ule_slice()
397            .chunks(2)
398            .flat_map(|pair| {
399                u32::from(PotentialCodePoint::from_unaligned(pair[0]))
400                    ..u32::from(PotentialCodePoint::from_unaligned(pair[1]))
401            })
402            .filter_map(char::from_u32)
403    }
404
405    /// Yields an [`Iterator`] returning the ranges of the code points that are
406    /// included in the [`CodePointInversionList`]
407    ///
408    /// Ranges are returned as [`RangeInclusive`], which is inclusive of its
409    /// `end` bound value. An end-inclusive behavior matches the ICU4C/J
410    /// behavior of ranges, ex: `CodePointInversionList::contains(UChar32 start, UChar32 end)`.
411    ///
412    /// # Example
413    ///
414    /// ```
415    /// use icu::collections::codepointinvlist::CodePointInversionList;
416    /// let example_list = [0x41, 0x44, 0x45, 0x46];
417    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
418    ///     &example_list,
419    /// )
420    /// .unwrap();
421    /// let mut example_iter_ranges = example.iter_ranges();
422    /// assert_eq!(Some(0x41..=0x43), example_iter_ranges.next());
423    /// assert_eq!(Some(0x45..=0x45), example_iter_ranges.next());
424    /// assert_eq!(None, example_iter_ranges.next());
425    /// ```
426    pub fn iter_ranges(&self) -> impl ExactSizeIterator<Item = RangeInclusive<u32>> + '_ {
427        #[expect(clippy::indexing_slicing)] // chunks
428        self.inv_list.as_ule_slice().chunks(2).map(|pair| {
429            let range_start = u32::from(PotentialCodePoint::from_unaligned(pair[0]));
430            let range_limit = u32::from(PotentialCodePoint::from_unaligned(pair[1]));
431            range_start..=(range_limit - 1)
432        })
433    }
434
435    /// Yields an [`Iterator`] returning the ranges of the code points that are
436    /// *not* included in the [`CodePointInversionList`]
437    ///
438    /// Ranges are returned as [`RangeInclusive`], which is inclusive of its
439    /// `end` bound value. An end-inclusive behavior matches the ICU4C/J
440    /// behavior of ranges, ex: `CodePointInversionList::contains(UChar32 start, UChar32 end)`.
441    ///
442    /// # Example
443    ///
444    /// ```
445    /// use icu::collections::codepointinvlist::CodePointInversionList;
446    /// let example_list = [0x41, 0x44, 0x45, 0x46];
447    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
448    ///     &example_list,
449    /// )
450    /// .unwrap();
451    /// let mut example_iter_ranges = example.iter_ranges_complemented();
452    /// assert_eq!(Some(0..=0x40), example_iter_ranges.next());
453    /// assert_eq!(Some(0x44..=0x44), example_iter_ranges.next());
454    /// assert_eq!(Some(0x46..=char::MAX as u32), example_iter_ranges.next());
455    /// assert_eq!(None, example_iter_ranges.next());
456    /// ```
457    pub fn iter_ranges_complemented(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
458        let inv_ule = self.inv_list.as_ule_slice();
459        let middle = inv_ule.get(1..inv_ule.len() - 1).unwrap_or(&[]);
460        let beginning = if let Some(first) = self.inv_list.first() {
461            let first = u32::from(first);
462            if first == 0 {
463                None
464            } else {
465                Some(0..=first - 1)
466            }
467        } else {
468            None
469        };
470        let end = if let Some(last) = self.inv_list.last() {
471            let last = u32::from(last);
472            if last == char::MAX as u32 {
473                None
474            } else {
475                Some(last..=char::MAX as u32)
476            }
477        } else {
478            None
479        };
480        #[expect(clippy::indexing_slicing)] // chunks
481        let chunks = middle.chunks(2).map(|pair| {
482            let range_start = u32::from(PotentialCodePoint::from_unaligned(pair[0]));
483            let range_limit = u32::from(PotentialCodePoint::from_unaligned(pair[1]));
484            range_start..=(range_limit - 1)
485        });
486        beginning.into_iter().chain(chunks).chain(end)
487    }
488
489    /// Returns the number of ranges contained in this [`CodePointInversionList`]
490    pub fn get_range_count(&self) -> usize {
491        self.inv_list.len() / 2
492    }
493
494    /// Returns a specific range contained in this [`CodePointInversionList`] by index.
495    /// Intended for use in FFI.
496    pub fn get_nth_range(&self, idx: usize) -> Option<RangeInclusive<u32>> {
497        let start_idx = idx * 2;
498        let end_idx = start_idx + 1;
499        let start = u32::from(self.inv_list.get(start_idx)?);
500        let end = u32::from(self.inv_list.get(end_idx)?);
501        Some(start..=(end - 1))
502    }
503
504    /// Returns the number of elements of the [`CodePointInversionList`]
505    pub fn size(&self) -> usize {
506        if self.is_empty() {
507            return 0;
508        }
509        self.size as usize
510    }
511
512    /// Returns whether or not the [`CodePointInversionList`] is empty
513    pub fn is_empty(&self) -> bool {
514        self.inv_list.is_empty()
515    }
516
517    /// Wrapper for contains
518    ///
519    /// Returns an [`Option`] as to whether or not it is possible for the query to be contained.
520    /// The value in the [`Option`] is the start index of the range that contains the query.
521    fn contains_query(&self, query: u32) -> Option<usize> {
522        let query = PotentialCodePoint::try_from(query).ok()?;
523        match self.inv_list.binary_search(&query) {
524            Ok(pos) => {
525                if pos % 2 == 0 {
526                    Some(pos)
527                } else {
528                    None
529                }
530            }
531            Err(pos) => {
532                if pos % 2 != 0 && pos < self.inv_list.len() {
533                    Some(pos - 1)
534                } else {
535                    None
536                }
537            }
538        }
539    }
540
541    /// Checks to see the query is in the [`CodePointInversionList`]
542    ///
543    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
544    /// in the set using [`core`] implementation
545    ///
546    /// # Examples
547    ///
548    /// ```
549    /// use icu::collections::codepointinvlist::CodePointInversionList;
550    /// let example_list = [0x41, 0x43, 0x44, 0x45];
551    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
552    ///     &example_list,
553    /// )
554    /// .unwrap();
555    /// assert!(example.contains('A'));
556    /// assert!(!example.contains('C'));
557    /// ```
558    pub fn contains(&self, query: char) -> bool {
559        self.contains_query(query as u32).is_some()
560    }
561
562    /// Checks to see the unsigned int is in the [`CodePointInversionList::all()`](CodePointInversionList::all())
563    ///
564    /// Note: Even though [`u32`] and [`prim@char`] in Rust are non-negative 4-byte
565    /// values, there is an important difference. A [`u32`] can take values up to
566    /// a very large integer value, while a [`prim@char`] in Rust is defined to be in
567    /// the range from 0 to the maximum valid Unicode Scalar Value.
568    ///
569    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
570    /// in the set using [`core`] implementation
571    ///
572    /// # Examples
573    ///
574    /// ```
575    /// use icu::collections::codepointinvlist::CodePointInversionList;
576    /// let example_list = [0x41, 0x43, 0x44, 0x45];
577    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
578    ///     &example_list,
579    /// )
580    /// .unwrap();
581    /// assert!(example.contains32(0x41));
582    /// assert!(!example.contains32(0x43));
583    /// ```
584    pub fn contains32(&self, query: u32) -> bool {
585        self.contains_query(query).is_some()
586    }
587
588    /// Checks to see if the range is in the [`CodePointInversionList`]
589    ///
590    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
591    /// in the set using [`Vec`] implementation. Only runs the search once on the `start`
592    /// parameter, while the `end` parameter is checked in a single `O(1)` step.
593    ///
594    /// # Examples
595    ///
596    /// ```
597    /// use icu::collections::codepointinvlist::CodePointInversionList;
598    /// let example_list = [0x41, 0x43, 0x44, 0x45];
599    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
600    ///     &example_list,
601    /// )
602    /// .unwrap();
603    /// assert!(example.contains_range('A'..'C'));
604    /// assert!(example.contains_range('A'..='B'));
605    /// assert!(!example.contains_range('A'..='C'));
606    /// ```
607    ///
608    /// Surrogate points (`0xD800 -> 0xDFFF`) will return [`false`] if the Range contains them but the
609    /// [`CodePointInversionList`] does not.
610    ///
611    /// Note: when comparing to ICU4C/J, keep in mind that `Range`s in Rust are
612    /// constructed inclusive of start boundary and exclusive of end boundary.
613    /// The ICU4C/J `CodePointInversionList::contains(UChar32 start, UChar32 end)` method
614    /// differs by including the end boundary.
615    ///
616    /// # Examples
617    ///
618    /// ```
619    /// use icu::collections::codepointinvlist::CodePointInversionList;
620    /// use std::char;
621    /// let check =
622    ///     char::from_u32(0xD7FE).unwrap()..char::from_u32(0xE001).unwrap();
623    /// let example_list = [0xD7FE, 0xD7FF, 0xE000, 0xE001];
624    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
625    ///     &example_list,
626    /// )
627    /// .unwrap();
628    /// assert!(!example.contains_range(check));
629    /// ```
630    pub fn contains_range(&self, range: impl RangeBounds<char>) -> bool {
631        let (from, till) = deconstruct_range(range);
632        if from >= till {
633            return false;
634        }
635        match self.contains_query(from) {
636            Some(pos) => {
637                if let Some(x) = self.inv_list.get(pos + 1) {
638                    (till) <= x.into()
639                } else {
640                    if true {
    if !false {
        {
            ::core::panicking::panic_fmt(format_args!("Inversion list query should not return out of bounds index"));
        }
    };
};debug_assert!(
641                        false,
642                        "Inversion list query should not return out of bounds index"
643                    );
644                    false
645                }
646            }
647            None => false,
648        }
649    }
650
651    /// Check if the calling [`CodePointInversionList`] contains all the characters of the given [`CodePointInversionList`]
652    ///
653    /// # Examples
654    ///
655    /// ```
656    /// use icu::collections::codepointinvlist::CodePointInversionList;
657    /// let example_list = [0x41, 0x46, 0x55, 0x5B]; // A - E, U - Z
658    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
659    ///     &example_list,
660    /// )
661    /// .unwrap();
662    /// let a_to_d = CodePointInversionList::try_from_u32_inversion_list_slice(&[
663    ///     0x41, 0x45,
664    /// ])
665    /// .unwrap();
666    /// let f_to_t = CodePointInversionList::try_from_u32_inversion_list_slice(&[
667    ///     0x46, 0x55,
668    /// ])
669    /// .unwrap();
670    /// let r_to_x = CodePointInversionList::try_from_u32_inversion_list_slice(&[
671    ///     0x52, 0x58,
672    /// ])
673    /// .unwrap();
674    /// assert!(example.contains_set(&a_to_d)); // contains all
675    /// assert!(!example.contains_set(&f_to_t)); // contains none
676    /// assert!(!example.contains_set(&r_to_x)); // contains some
677    /// ```
678    pub fn contains_set(&self, set: &Self) -> bool {
679        if set.size() > self.size() {
680            return false;
681        }
682
683        let mut set_ranges = set.iter_ranges();
684        let mut check_elem = set_ranges.next();
685
686        let ranges = self.iter_ranges();
687        for range in ranges {
688            match check_elem {
689                Some(ref check_range) => {
690                    if check_range.start() >= range.start()
691                        && check_range.end() <= &(range.end() + 1)
692                    {
693                        check_elem = set_ranges.next();
694                    }
695                }
696                _ => break,
697            }
698        }
699        check_elem.is_none()
700    }
701
702    /// Returns the end of the initial substring where the characters are either contained/not contained
703    /// in the set.
704    ///
705    /// # Examples
706    ///
707    /// ```
708    /// use icu::collections::codepointinvlist::CodePointInversionList;
709    /// let example_list = [0x41, 0x44]; // {A, B, C}
710    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
711    ///     &example_list,
712    /// )
713    /// .unwrap();
714    /// assert_eq!(example.span("CABXYZ", true), 3);
715    /// assert_eq!(example.span("XYZC", false), 3);
716    /// assert_eq!(example.span("XYZ", true), 0);
717    /// assert_eq!(example.span("ABC", false), 0);
718    /// ```
719    pub fn span(&self, span_str: &str, contained: bool) -> usize {
720        span_str
721            .chars()
722            .take_while(|&x| self.contains(x) == contained)
723            .count()
724    }
725
726    /// Returns the start of the trailing substring (starting from end of string) where the characters are
727    /// either contained/not contained in the set. Returns the length of the string if no valid return.
728    ///
729    /// # Examples
730    ///
731    /// ```
732    /// use icu::collections::codepointinvlist::CodePointInversionList;
733    /// let example_list = [0x41, 0x44]; // {A, B, C}
734    /// let example = CodePointInversionList::try_from_u32_inversion_list_slice(
735    ///     &example_list,
736    /// )
737    /// .unwrap();
738    /// assert_eq!(example.span_back("XYZCAB", true), 3);
739    /// assert_eq!(example.span_back("ABCXYZ", true), 6);
740    /// assert_eq!(example.span_back("CABXYZ", false), 3);
741    /// ```
742    pub fn span_back(&self, span_str: &str, contained: bool) -> usize {
743        span_str.len()
744            - span_str
745                .chars()
746                .rev()
747                .take_while(|&x| self.contains(x) == contained)
748                .count()
749    }
750}
751
752#[cfg(test)]
753mod tests {
754    use super::{CodePointInversionList, InvalidSetError};
755    use std::{char, vec::Vec};
756    use zerovec::ZeroVec;
757
758    #[test]
759    fn test_codepointinversionlist_try_from_vec() {
760        let ex = vec![0x2, 0x3, 0x4, 0x5];
761        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
762        assert_eq!(ex, check.get_inversion_list_vec());
763        assert_eq!(2, check.size());
764    }
765
766    #[test]
767    fn test_codepointinversionlist_try_from_vec_error() {
768        let check = vec![0x1, 0x1, 0x2, 0x3, 0x4];
769        let set = CodePointInversionList::try_from_u32_inversion_list_slice(&check);
770        assert!(matches!(set, Err(InvalidSetError(_))));
771        if let Err(InvalidSetError(actual)) = set {
772            assert_eq!(
773                &check,
774                &actual.into_iter().map(u32::from).collect::<Vec<_>>()
775            );
776        }
777    }
778
779    // CodePointInversionList membership functions
780    #[test]
781    fn test_codepointinversionlist_contains_query() {
782        let ex = vec![0x41, 0x46, 0x4B, 0x55];
783        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
784        assert!(check.contains_query(0x40).is_none());
785        assert_eq!(check.contains_query(0x41).unwrap(), 0);
786        assert_eq!(check.contains_query(0x44).unwrap(), 0);
787        assert!(check.contains_query(0x46).is_none());
788        assert_eq!(check.contains_query(0x4C).unwrap(), 2);
789        assert!(check.contains_query(0x56).is_none());
790    }
791
792    #[test]
793    fn test_codepointinversionlist_contains() {
794        let ex = vec![0x2, 0x5, 0xA, 0xF];
795        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
796        assert!(check.contains(0x2 as char));
797        assert!(check.contains(0x4 as char));
798        assert!(check.contains(0xA as char));
799        assert!(check.contains(0xE as char));
800    }
801
802    #[test]
803    fn test_codepointinversionlist_contains_false() {
804        let ex = vec![0x2, 0x5, 0xA, 0xF];
805        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
806        assert!(!check.contains(0x1 as char));
807        assert!(!check.contains(0x5 as char));
808        assert!(!check.contains(0x9 as char));
809        assert!(!check.contains(0xF as char));
810        assert!(!check.contains(0x10 as char));
811    }
812
813    #[test]
814    fn test_codepointinversionlist_contains_range() {
815        let ex = vec![0x41, 0x46, 0x4B, 0x55];
816        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
817        assert!(check.contains_range('A'..='E')); // 65 - 69
818        assert!(check.contains_range('C'..'D')); // 67 - 67
819        assert!(check.contains_range('L'..'P')); // 76 - 80
820        assert!(!check.contains_range('L'..='U')); // 76 - 85
821    }
822
823    #[test]
824    fn test_codepointinversionlist_contains_range_false() {
825        let ex = vec![0x41, 0x46, 0x4B, 0x55];
826        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
827        assert!(!check.contains_range('!'..'A')); // 33 - 65
828        assert!(!check.contains_range('F'..'K')); // 70 - 74
829        assert!(!check.contains_range('U'..)); // 85 - ..
830    }
831
832    #[test]
833    fn test_codepointinversionlist_contains_range_invalid() {
834        let check = CodePointInversionList::all();
835        assert!(!check.contains_range('A'..'!')); // 65 - 33
836        assert!(!check.contains_range('A'..'A')); // 65 - 65
837    }
838
839    #[test]
840    fn test_codepointinversionlist_contains_set_u() {
841        let ex = vec![0xA, 0x14, 0x28, 0x32, 0x46, 0x50, 0x64, 0x6E];
842        let u = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
843        let inside = vec![0xF, 0x14, 0x2C, 0x31, 0x46, 0x50, 0x64, 0x6D];
844        let s = CodePointInversionList::try_from_u32_inversion_list_slice(&inside).unwrap();
845        assert!(u.contains_set(&s));
846    }
847
848    #[test]
849    fn test_codepointinversionlist_contains_set_u_false() {
850        let ex = vec![0xA, 0x14, 0x28, 0x32, 0x46, 0x50, 0x64, 0x78];
851        let u = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
852        let outside = vec![0x0, 0xA, 0x16, 0x2C, 0x32, 0x46, 0x4F, 0x51, 0x6D, 0x6F];
853        let s = CodePointInversionList::try_from_u32_inversion_list_slice(&outside).unwrap();
854        assert!(!u.contains_set(&s));
855    }
856
857    #[test]
858    fn test_codepointinversionlist_size() {
859        let ex = vec![0x2, 0x5, 0xA, 0xF];
860        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
861        assert_eq!(8, check.size());
862        let check = CodePointInversionList::all();
863        let expected = (char::MAX as u32) + 1;
864        assert_eq!(expected as usize, check.size());
865        let inv_list_vec = vec![];
866        let check = CodePointInversionList {
867            inv_list: ZeroVec::from_slice_or_alloc(&inv_list_vec),
868            size: 0,
869        };
870        assert_eq!(check.size(), 0);
871    }
872
873    #[test]
874    fn test_codepointinversionlist_is_empty() {
875        let inv_list_vec = vec![];
876        let check = CodePointInversionList {
877            inv_list: ZeroVec::from_slice_or_alloc(&inv_list_vec),
878            size: 0,
879        };
880        assert!(check.is_empty());
881    }
882
883    #[test]
884    fn test_codepointinversionlist_is_not_empty() {
885        let check = CodePointInversionList::all();
886        assert!(!check.is_empty());
887    }
888
889    #[test]
890    fn test_codepointinversionlist_iter_chars() {
891        let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
892        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
893        let mut iter = check.iter_chars();
894        assert_eq!(Some('A'), iter.next());
895        assert_eq!(Some('B'), iter.next());
896        assert_eq!(Some('C'), iter.next());
897        assert_eq!(Some('E'), iter.next());
898        assert_eq!(None, iter.next());
899    }
900
901    #[test]
902    fn test_codepointinversionlist_iter_ranges() {
903        let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
904        let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
905        let mut ranges = set.iter_ranges();
906        assert_eq!(Some(0x41..=0x43), ranges.next());
907        assert_eq!(Some(0x45..=0x45), ranges.next());
908        assert_eq!(Some(0xD800..=0xD800), ranges.next());
909        assert_eq!(None, ranges.next());
910    }
911
912    #[test]
913    fn test_codepointinversionlist_iter_ranges_exactsizeiter_trait() {
914        let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
915        let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
916        let ranges = set.iter_ranges();
917        assert_eq!(3, ranges.len());
918    }
919
920    #[test]
921    fn test_codepointinversionlist_range_count() {
922        let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
923        let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
924        assert_eq!(3, set.get_range_count());
925    }
926
927    #[test]
928    fn test_codepointinversionlist_get_nth_range() {
929        let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
930        let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
931        assert_eq!(Some(0x41..=0x43), set.get_nth_range(0));
932        assert_eq!(Some(0x45..=0x45), set.get_nth_range(1));
933        assert_eq!(Some(0xD800..=0xD800), set.get_nth_range(2));
934        assert_eq!(None, set.get_nth_range(3));
935    }
936
937    // Range<char> cannot represent the upper bound (non-inclusive) for
938    // char::MAX, whereas Range<u32> can.
939    #[test]
940    fn test_codepointinversionlist_iter_ranges_with_max_code_point() {
941        let ex = vec![0x80, (char::MAX as u32) + 1];
942        let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
943        let mut ranges = set.iter_ranges();
944        assert_eq!(Some(0x80..=(char::MAX as u32)), ranges.next());
945        assert_eq!(None, ranges.next());
946    }
947
948    #[test]
949    fn test_codepointinversionlist_span_contains() {
950        let ex = vec![0x41, 0x44, 0x46, 0x4B]; // A - D, F - K
951        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
952        assert_eq!(check.span("ABCDE", true), 3);
953        assert_eq!(check.span("E", true), 0);
954    }
955
956    #[test]
957    fn test_codepointinversionlist_span_does_not_contain() {
958        let ex = vec![0x41, 0x44, 0x46, 0x4B]; // A - D, F - K
959        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
960        assert_eq!(check.span("DEF", false), 2);
961        assert_eq!(check.span("KLMA", false), 3);
962    }
963
964    #[test]
965    fn test_codepointinversionlist_span_back_contains() {
966        let ex = vec![0x41, 0x44, 0x46, 0x4B]; // A - D, F - K
967        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
968        assert_eq!(check.span_back("XYZABFH", true), 3);
969        assert_eq!(check.span_back("ABCXYZ", true), 6);
970    }
971
972    #[test]
973    fn test_codepointinversionlist_span_back_does_not_contain() {
974        let ex = vec![0x41, 0x44, 0x46, 0x4B]; // A - D, F - K
975        let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
976        assert_eq!(check.span_back("ABCXYZ", false), 3);
977        assert_eq!(check.span_back("XYZABC", false), 6);
978    }
979
980    #[test]
981    fn test_uniset_to_inv_list() {
982        let inv_list = [
983            0x9, 0xE, 0x20, 0x21, 0x85, 0x86, 0xA0, 0xA1, 0x1626, 0x1627, 0x2000, 0x2003, 0x2028,
984            0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
985        ];
986        let s: CodePointInversionList =
987            CodePointInversionList::try_from_u32_inversion_list_slice(&inv_list).unwrap();
988        let round_trip_inv_list = s.get_inversion_list_vec();
989        assert_eq!(
990            round_trip_inv_list.into_iter().collect::<ZeroVec<u32>>(),
991            inv_list
992        );
993    }
994
995    #[test]
996    fn test_serde_serialize() {
997        let inv_list = [0x41, 0x46, 0x4B, 0x55];
998        let uniset = CodePointInversionList::try_from_u32_inversion_list_slice(&inv_list).unwrap();
999        let json_str = serde_json::to_string(&uniset).unwrap();
1000        assert_eq!(json_str, r#"["A-E","K-T"]"#);
1001    }
1002
1003    #[test]
1004    fn test_serde_serialize_surrogates() {
1005        let inv_list = [0xDFAB, 0xDFFF];
1006        let uniset = CodePointInversionList::try_from_u32_inversion_list_slice(&inv_list).unwrap();
1007        let json_str = serde_json::to_string(&uniset).unwrap();
1008        assert_eq!(json_str, r#"["U+DFAB-U+DFFE"]"#);
1009    }
1010
1011    #[test]
1012    fn test_serde_deserialize() {
1013        let inv_list_str = r#"["A-E","K-T"]"#;
1014        let exp_inv_list = [0x41, 0x46, 0x4B, 0x55];
1015        let exp_uniset =
1016            CodePointInversionList::try_from_u32_inversion_list_slice(&exp_inv_list).unwrap();
1017        let act_uniset: CodePointInversionList = serde_json::from_str(inv_list_str).unwrap();
1018        assert_eq!(act_uniset, exp_uniset);
1019    }
1020
1021    #[test]
1022    fn test_serde_deserialize_surrogates() {
1023        let inv_list_str = r#"["U+DFAB-U+DFFE"]"#;
1024        let exp_inv_list = [0xDFAB, 0xDFFF];
1025        let exp_uniset =
1026            CodePointInversionList::try_from_u32_inversion_list_slice(&exp_inv_list).unwrap();
1027        let act_uniset: CodePointInversionList = serde_json::from_str(inv_list_str).unwrap();
1028        assert_eq!(act_uniset, exp_uniset);
1029    }
1030
1031    #[test]
1032    fn test_serde_deserialize_invalid() {
1033        assert!(serde_json::from_str::<CodePointInversionList>("[65,70,98775,85]").is_err());
1034        assert!(serde_json::from_str::<CodePointInversionList>("[65,70,U+FFFFFFFFFF,85]").is_err());
1035    }
1036
1037    #[test]
1038    fn test_serde_with_postcard_roundtrip() -> Result<(), postcard::Error> {
1039        let set = CodePointInversionList::bmp();
1040        let set_serialized: Vec<u8> = postcard::to_allocvec(&set).unwrap();
1041        let set_deserialized: CodePointInversionList =
1042            postcard::from_bytes::<CodePointInversionList>(&set_serialized)?;
1043
1044        assert_eq!(&set, &set_deserialized);
1045        assert!(!set_deserialized.inv_list.is_owned());
1046
1047        Ok(())
1048    }
1049
1050    #[test]
1051    fn databake() {
1052        databake::test_bake!(
1053            CodePointInversionList<'static>,
1054            const,
1055            unsafe {
1056                #[allow(unused_unsafe)]
1057                crate::codepointinvlist::CodePointInversionList::from_parts_unchecked(
1058                    unsafe {
1059                        zerovec::ZeroVec::from_bytes_unchecked(
1060                            b"0\0\0\0:\0\0\0A\0\0\0G\0\0\0a\0\0\0g\0\0\0",
1061                        )
1062                    },
1063                    22u32,
1064                )
1065            },
1066            icu_collections,
1067            [zerovec],
1068        );
1069    }
1070}