1#[cfg(feature = "serde")]
6use alloc::format;
7#[cfg(feature = "serde")]
8use alloc::string::String;
9#[cfg(feature = "alloc")]
10use alloc::vec::Vec;
11use core::{char, ops::RangeBounds, ops::RangeInclusive};
12use potential_utf::PotentialCodePoint;
13use yoke::Yokeable;
14use zerofrom::ZeroFrom;
15use zerovec::{ule::AsULE, zerovec, ZeroVec};
16
17use super::InvalidSetError;
18use crate::codepointinvlist::utils::{deconstruct_range, is_valid_zv};
19
20const BMP_MAX: u32 = 0xFFFF;
22
23const BMP_INV_LIST_VEC: ZeroVec<PotentialCodePoint> = ::zerovec::ZeroSlice::<PotentialCodePoint>::from_ule_slice(const {
&[PotentialCodePoint::to_unaligned(PotentialCodePoint::from_u24(0x0)),
PotentialCodePoint::to_unaligned(PotentialCodePoint::from_u24(BMP_MAX
+ 1))]
}).as_zerovec()zerovec!(PotentialCodePoint; PotentialCodePoint::to_unaligned; [PotentialCodePoint::from_u24(0x0), PotentialCodePoint::from_u24(BMP_MAX + 1)]);
25
26const ALL_VEC: ZeroVec<PotentialCodePoint> = ::zerovec::ZeroSlice::<PotentialCodePoint>::from_ule_slice(const {
&[PotentialCodePoint::to_unaligned(PotentialCodePoint::from_u24(0x0)),
PotentialCodePoint::to_unaligned(PotentialCodePoint::from_u24((char::MAX
as u32) + 1))]
}).as_zerovec()zerovec!(PotentialCodePoint; PotentialCodePoint::to_unaligned; [PotentialCodePoint::from_u24(0x0), PotentialCodePoint::from_u24((char::MAX as u32) + 1)]);
28
29impl core::fmt::Debug for CodePointInversionListULE {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
let this =
<CodePointInversionList as
zerovec::__zerovec_internal_reexport::ZeroFrom<CodePointInversionListULE>>::zero_from(self);
<CodePointInversionList as core::fmt::Debug>::fmt(&this, f)
}
}#[zerovec::make_varule(CodePointInversionListULE)]
34#[zerovec::skip_derive(Ord)]
35#[zerovec::derive(Debug)]
36#[derive(#[automatically_derived]
impl<'data> ::core::fmt::Debug for CodePointInversionList<'data> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f,
"CodePointInversionList", "inv_list", &self.inv_list, "size",
&&self.size)
}
}Debug, #[automatically_derived]
impl<'data> ::core::cmp::Eq for CodePointInversionList<'data> {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_receiver_is_total_eq(&self) -> () {
let _:
::core::cmp::AssertParamIsEq<ZeroVec<'data,
PotentialCodePoint>>;
let _: ::core::cmp::AssertParamIsEq<u32>;
}
}Eq, #[automatically_derived]
impl<'data> ::core::cmp::PartialEq for CodePointInversionList<'data> {
#[inline]
fn eq(&self, other: &CodePointInversionList<'data>) -> bool {
self.size == other.size && self.inv_list == other.inv_list
}
}PartialEq, #[automatically_derived]
impl<'data> ::core::clone::Clone for CodePointInversionList<'data> {
#[inline]
fn clone(&self) -> CodePointInversionList<'data> {
CodePointInversionList {
inv_list: ::core::clone::Clone::clone(&self.inv_list),
size: ::core::clone::Clone::clone(&self.size),
}
}
}Clone, unsafe impl<'a> yoke::Yokeable<'a> for CodePointInversionList<'static> where
{
type Output = CodePointInversionList<'a>;
#[inline]
fn transform(&'a self) -> &'a Self::Output { self }
#[inline]
fn transform_owned(self) -> Self::Output { self }
#[inline]
unsafe fn make(this: Self::Output) -> Self {
use core::{mem, ptr};
if true {
if !(mem::size_of::<Self::Output>() == mem::size_of::<Self>()) {
::core::panicking::panic("assertion failed: mem::size_of::<Self::Output>() == mem::size_of::<Self>()")
};
};
let ptr: *const Self = (&this as *const Self::Output).cast();
#[allow(forgetting_copy_types, clippy :: forget_copy, clippy ::
forget_non_drop, clippy :: mem_forget)]
mem::forget(this);
ptr::read(ptr)
}
#[inline]
fn transform_mut<F>(&'a mut self, f: F) where F: 'static +
for<'b> FnOnce(&'b mut Self::Output) {
unsafe {
f(core::mem::transmute::<&'a mut Self,
&'a mut Self::Output>(self))
}
}
}Yokeable, impl<'zf, 'zf_inner>
zerofrom::ZeroFrom<'zf, CodePointInversionList<'zf_inner>> for
CodePointInversionList<'zf> where {
fn zero_from(this: &'zf CodePointInversionList<'zf_inner>) -> Self {
match *this {
CodePointInversionList {
inv_list: ref __binding_0, size: ref __binding_1 } => {
CodePointInversionList {
inv_list: <ZeroVec<'zf, PotentialCodePoint> as
zerofrom::ZeroFrom<'zf,
ZeroVec<'zf_inner,
PotentialCodePoint>>>::zero_from(__binding_0),
size: *__binding_1,
}
}
}
}
}ZeroFrom)]
37#[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV, ToOwned))]
38pub struct CodePointInversionList<'data> {
39 inv_list: ZeroVec<'data, PotentialCodePoint>,
45 size: u32,
46}
47
48#[cfg(feature = "serde")]
49impl<'de: 'a, 'a> serde::Deserialize<'de> for CodePointInversionList<'a> {
50 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
51 where
52 D: serde::Deserializer<'de>,
53 {
54 use serde::de::Error;
55
56 let parsed_inv_list = if deserializer.is_human_readable() {
57 let parsed_strings = Vec::<alloc::borrow::Cow<'de, str>>::deserialize(deserializer)?;
58 let mut inv_list = ZeroVec::new_owned(Vec::with_capacity(parsed_strings.len() * 2));
59 for range in parsed_strings {
60 fn internal(range: &str) -> Option<(u32, u32)> {
61 let (start, range) = UnicodeCodePoint::parse(range)?;
62 if range.is_empty() {
63 return Some((start.0, start.0));
64 }
65 let (hyphen, range) = UnicodeCodePoint::parse(range)?;
66 if hyphen.0 != '-' as u32 {
67 return None;
68 }
69 let (end, range) = UnicodeCodePoint::parse(range)?;
70 range.is_empty().then_some((start.0, end.0))
71 }
72 let (start, end) = internal(&range).ok_or_else(|| Error::custom(format!(
73 "Cannot deserialize invalid inversion list for CodePointInversionList: {range:?}"
74 )))?;
75 inv_list.with_mut(|v| {
76 v.push(PotentialCodePoint::from_u24(start).to_unaligned());
77 v.push(PotentialCodePoint::from_u24(end + 1).to_unaligned());
78 });
79 }
80 inv_list
81 } else {
82 ZeroVec::<PotentialCodePoint>::deserialize(deserializer)?
83 };
84 CodePointInversionList::try_from_inversion_list(parsed_inv_list).map_err(|e| {
85 Error::custom(format!(
86 "Cannot deserialize invalid inversion list for CodePointInversionList: {e:?}"
87 ))
88 })
89 }
90}
91
92#[cfg(feature = "databake")]
93impl databake::Bake for CodePointInversionList<'_> {
94 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
95 env.insert("icu_collections");
96 let inv_list = self.inv_list.bake(env);
97 let size = self.size.bake(env);
98 databake::quote! { unsafe {
100 #[allow(unused_unsafe)]
101 icu_collections::codepointinvlist::CodePointInversionList::from_parts_unchecked(#inv_list, #size)
102 }}
103 }
104}
105
106#[cfg(feature = "databake")]
107impl databake::BakeSize for CodePointInversionList<'_> {
108 fn borrows_size(&self) -> usize {
109 self.inv_list.borrows_size()
110 }
111}
112
113#[cfg(feature = "serde")]
114#[derive(Debug, Copy, Clone)]
115struct UnicodeCodePoint(u32);
116
117#[cfg(feature = "serde")]
118impl UnicodeCodePoint {
119 fn from_u32(cp: u32) -> Result<Self, String> {
120 if cp <= char::MAX as u32 {
121 Ok(Self(cp))
122 } else {
123 Err(format!("Not a Unicode code point {cp}"))
124 }
125 }
126
127 fn parse(value: &str) -> Option<(Self, &str)> {
128 Some(if let Some(hex) = value.strip_prefix("U+") {
129 let (escape, remainder) = (hex.get(..4)?, hex.get(4..)?);
130 (Self(u32::from_str_radix(escape, 16).ok()?), remainder)
131 } else {
132 let c = value.chars().next()?;
133 (Self(c as u32), value.get(c.len_utf8()..)?)
134 })
135 }
136}
137
138#[cfg(feature = "serde")]
139impl core::fmt::Display for UnicodeCodePoint {
140 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
141 match self.0 {
142 s @ 0xD800..=0xDFFF => write!(f, "U+{s:X}"),
143 c => write!(
146 f,
147 "{}",
148 char::from_u32(c).unwrap_or(char::REPLACEMENT_CHARACTER)
149 ),
150 }
151 }
152}
153
154#[cfg(feature = "serde")]
155impl serde::Serialize for CodePointInversionList<'_> {
156 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
157 where
158 S: serde::Serializer,
159 {
160 if serializer.is_human_readable() {
161 use serde::ser::Error;
162 use serde::ser::SerializeSeq;
163 let mut seq = serializer.serialize_seq(Some(self.inv_list.len() / 2))?;
164 for range in self.iter_ranges() {
165 let start = UnicodeCodePoint::from_u32(*range.start()).map_err(S::Error::custom)?;
166 if range.start() == range.end() {
167 seq.serialize_element(&format!("{start}"))?;
168 } else {
169 let end = UnicodeCodePoint::from_u32(*range.end()).map_err(S::Error::custom)?;
170 seq.serialize_element(&format!("{start}-{end}",))?;
171 }
172 }
173 seq.end()
174 } else {
175 self.inv_list.serialize(serializer)
179 }
180 }
181}
182
183impl<'data> CodePointInversionList<'data> {
184 pub fn try_from_inversion_list(
222 inv_list: ZeroVec<'data, PotentialCodePoint>,
223 ) -> Result<Self, InvalidSetError> {
224 #[expect(clippy::indexing_slicing)] if is_valid_zv(&inv_list) {
226 let size = inv_list
227 .as_ule_slice()
228 .chunks(2)
229 .map(|end_points| {
230 u32::from(<PotentialCodePoint as AsULE>::from_unaligned(end_points[1]))
231 - u32::from(<PotentialCodePoint as AsULE>::from_unaligned(end_points[0]))
232 })
233 .sum::<u32>();
234 Ok(Self { inv_list, size })
235 } else {
236 Err(InvalidSetError(
237 #[cfg(feature = "alloc")]
238 inv_list.to_vec(),
239 ))
240 }
241 }
242
243 #[doc(hidden)] pub const unsafe fn from_parts_unchecked(
246 inv_list: ZeroVec<'data, PotentialCodePoint>,
247 size: u32,
248 ) -> Self {
249 Self { inv_list, size }
250 }
251
252 #[cfg(feature = "alloc")]
285 pub fn try_from_u32_inversion_list_slice(inv_list: &[u32]) -> Result<Self, InvalidSetError> {
286 let inv_list_zv: ZeroVec<PotentialCodePoint> = inv_list
287 .iter()
288 .copied()
289 .map(PotentialCodePoint::from_u24)
290 .collect();
291 CodePointInversionList::try_from_inversion_list(inv_list_zv)
292 }
293
294 #[cfg(feature = "alloc")]
298 pub fn into_owned(self) -> CodePointInversionList<'static> {
299 CodePointInversionList {
300 inv_list: self.inv_list.into_owned(),
301 size: self.size,
302 }
303 }
304
305 #[cfg(feature = "alloc")]
309 pub fn get_inversion_list_vec(&self) -> Vec<u32> {
310 self.as_inversion_list().iter().map(u32::from).collect()
311 }
312
313 pub fn all() -> Self {
333 Self {
334 inv_list: ALL_VEC,
335 size: (char::MAX as u32) + 1,
336 }
337 }
338
339 pub fn bmp() -> Self {
361 Self {
362 inv_list: BMP_INV_LIST_VEC,
363 size: BMP_MAX + 1,
364 }
365 }
366
367 #[cfg(feature = "alloc")]
371 pub(crate) fn as_inversion_list(&self) -> &ZeroVec<'_, PotentialCodePoint> {
372 &self.inv_list
373 }
374
375 pub fn iter_chars(&self) -> impl Iterator<Item = char> + '_ {
394 #[expect(clippy::indexing_slicing)] self.inv_list
396 .as_ule_slice()
397 .chunks(2)
398 .flat_map(|pair| {
399 u32::from(PotentialCodePoint::from_unaligned(pair[0]))
400 ..u32::from(PotentialCodePoint::from_unaligned(pair[1]))
401 })
402 .filter_map(char::from_u32)
403 }
404
405 pub fn iter_ranges(&self) -> impl ExactSizeIterator<Item = RangeInclusive<u32>> + '_ {
427 #[expect(clippy::indexing_slicing)] self.inv_list.as_ule_slice().chunks(2).map(|pair| {
429 let range_start = u32::from(PotentialCodePoint::from_unaligned(pair[0]));
430 let range_limit = u32::from(PotentialCodePoint::from_unaligned(pair[1]));
431 range_start..=(range_limit - 1)
432 })
433 }
434
435 pub fn iter_ranges_complemented(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
458 let inv_ule = self.inv_list.as_ule_slice();
459 let middle = inv_ule.get(1..inv_ule.len() - 1).unwrap_or(&[]);
460 let beginning = if let Some(first) = self.inv_list.first() {
461 let first = u32::from(first);
462 if first == 0 {
463 None
464 } else {
465 Some(0..=first - 1)
466 }
467 } else {
468 None
469 };
470 let end = if let Some(last) = self.inv_list.last() {
471 let last = u32::from(last);
472 if last == char::MAX as u32 {
473 None
474 } else {
475 Some(last..=char::MAX as u32)
476 }
477 } else {
478 None
479 };
480 #[expect(clippy::indexing_slicing)] let chunks = middle.chunks(2).map(|pair| {
482 let range_start = u32::from(PotentialCodePoint::from_unaligned(pair[0]));
483 let range_limit = u32::from(PotentialCodePoint::from_unaligned(pair[1]));
484 range_start..=(range_limit - 1)
485 });
486 beginning.into_iter().chain(chunks).chain(end)
487 }
488
489 pub fn get_range_count(&self) -> usize {
491 self.inv_list.len() / 2
492 }
493
494 pub fn get_nth_range(&self, idx: usize) -> Option<RangeInclusive<u32>> {
497 let start_idx = idx * 2;
498 let end_idx = start_idx + 1;
499 let start = u32::from(self.inv_list.get(start_idx)?);
500 let end = u32::from(self.inv_list.get(end_idx)?);
501 Some(start..=(end - 1))
502 }
503
504 pub fn size(&self) -> usize {
506 if self.is_empty() {
507 return 0;
508 }
509 self.size as usize
510 }
511
512 pub fn is_empty(&self) -> bool {
514 self.inv_list.is_empty()
515 }
516
517 fn contains_query(&self, query: u32) -> Option<usize> {
522 let query = PotentialCodePoint::try_from(query).ok()?;
523 match self.inv_list.binary_search(&query) {
524 Ok(pos) => {
525 if pos % 2 == 0 {
526 Some(pos)
527 } else {
528 None
529 }
530 }
531 Err(pos) => {
532 if pos % 2 != 0 && pos < self.inv_list.len() {
533 Some(pos - 1)
534 } else {
535 None
536 }
537 }
538 }
539 }
540
541 pub fn contains(&self, query: char) -> bool {
559 self.contains_query(query as u32).is_some()
560 }
561
562 pub fn contains32(&self, query: u32) -> bool {
585 self.contains_query(query).is_some()
586 }
587
588 pub fn contains_range(&self, range: impl RangeBounds<char>) -> bool {
631 let (from, till) = deconstruct_range(range);
632 if from >= till {
633 return false;
634 }
635 match self.contains_query(from) {
636 Some(pos) => {
637 if let Some(x) = self.inv_list.get(pos + 1) {
638 (till) <= x.into()
639 } else {
640 if true {
if !false {
{
::core::panicking::panic_fmt(format_args!("Inversion list query should not return out of bounds index"));
}
};
};debug_assert!(
641 false,
642 "Inversion list query should not return out of bounds index"
643 );
644 false
645 }
646 }
647 None => false,
648 }
649 }
650
651 pub fn contains_set(&self, set: &Self) -> bool {
679 if set.size() > self.size() {
680 return false;
681 }
682
683 let mut set_ranges = set.iter_ranges();
684 let mut check_elem = set_ranges.next();
685
686 let ranges = self.iter_ranges();
687 for range in ranges {
688 match check_elem {
689 Some(ref check_range) => {
690 if check_range.start() >= range.start()
691 && check_range.end() <= &(range.end() + 1)
692 {
693 check_elem = set_ranges.next();
694 }
695 }
696 _ => break,
697 }
698 }
699 check_elem.is_none()
700 }
701
702 pub fn span(&self, span_str: &str, contained: bool) -> usize {
720 span_str
721 .chars()
722 .take_while(|&x| self.contains(x) == contained)
723 .count()
724 }
725
726 pub fn span_back(&self, span_str: &str, contained: bool) -> usize {
743 span_str.len()
744 - span_str
745 .chars()
746 .rev()
747 .take_while(|&x| self.contains(x) == contained)
748 .count()
749 }
750}
751
752#[cfg(test)]
753mod tests {
754 use super::{CodePointInversionList, InvalidSetError};
755 use std::{char, vec::Vec};
756 use zerovec::ZeroVec;
757
758 #[test]
759 fn test_codepointinversionlist_try_from_vec() {
760 let ex = vec![0x2, 0x3, 0x4, 0x5];
761 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
762 assert_eq!(ex, check.get_inversion_list_vec());
763 assert_eq!(2, check.size());
764 }
765
766 #[test]
767 fn test_codepointinversionlist_try_from_vec_error() {
768 let check = vec![0x1, 0x1, 0x2, 0x3, 0x4];
769 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&check);
770 assert!(matches!(set, Err(InvalidSetError(_))));
771 if let Err(InvalidSetError(actual)) = set {
772 assert_eq!(
773 &check,
774 &actual.into_iter().map(u32::from).collect::<Vec<_>>()
775 );
776 }
777 }
778
779 #[test]
781 fn test_codepointinversionlist_contains_query() {
782 let ex = vec![0x41, 0x46, 0x4B, 0x55];
783 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
784 assert!(check.contains_query(0x40).is_none());
785 assert_eq!(check.contains_query(0x41).unwrap(), 0);
786 assert_eq!(check.contains_query(0x44).unwrap(), 0);
787 assert!(check.contains_query(0x46).is_none());
788 assert_eq!(check.contains_query(0x4C).unwrap(), 2);
789 assert!(check.contains_query(0x56).is_none());
790 }
791
792 #[test]
793 fn test_codepointinversionlist_contains() {
794 let ex = vec![0x2, 0x5, 0xA, 0xF];
795 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
796 assert!(check.contains(0x2 as char));
797 assert!(check.contains(0x4 as char));
798 assert!(check.contains(0xA as char));
799 assert!(check.contains(0xE as char));
800 }
801
802 #[test]
803 fn test_codepointinversionlist_contains_false() {
804 let ex = vec![0x2, 0x5, 0xA, 0xF];
805 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
806 assert!(!check.contains(0x1 as char));
807 assert!(!check.contains(0x5 as char));
808 assert!(!check.contains(0x9 as char));
809 assert!(!check.contains(0xF as char));
810 assert!(!check.contains(0x10 as char));
811 }
812
813 #[test]
814 fn test_codepointinversionlist_contains_range() {
815 let ex = vec![0x41, 0x46, 0x4B, 0x55];
816 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
817 assert!(check.contains_range('A'..='E')); assert!(check.contains_range('C'..'D')); assert!(check.contains_range('L'..'P')); assert!(!check.contains_range('L'..='U')); }
822
823 #[test]
824 fn test_codepointinversionlist_contains_range_false() {
825 let ex = vec![0x41, 0x46, 0x4B, 0x55];
826 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
827 assert!(!check.contains_range('!'..'A')); assert!(!check.contains_range('F'..'K')); assert!(!check.contains_range('U'..)); }
831
832 #[test]
833 fn test_codepointinversionlist_contains_range_invalid() {
834 let check = CodePointInversionList::all();
835 assert!(!check.contains_range('A'..'!')); assert!(!check.contains_range('A'..'A')); }
838
839 #[test]
840 fn test_codepointinversionlist_contains_set_u() {
841 let ex = vec![0xA, 0x14, 0x28, 0x32, 0x46, 0x50, 0x64, 0x6E];
842 let u = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
843 let inside = vec![0xF, 0x14, 0x2C, 0x31, 0x46, 0x50, 0x64, 0x6D];
844 let s = CodePointInversionList::try_from_u32_inversion_list_slice(&inside).unwrap();
845 assert!(u.contains_set(&s));
846 }
847
848 #[test]
849 fn test_codepointinversionlist_contains_set_u_false() {
850 let ex = vec![0xA, 0x14, 0x28, 0x32, 0x46, 0x50, 0x64, 0x78];
851 let u = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
852 let outside = vec![0x0, 0xA, 0x16, 0x2C, 0x32, 0x46, 0x4F, 0x51, 0x6D, 0x6F];
853 let s = CodePointInversionList::try_from_u32_inversion_list_slice(&outside).unwrap();
854 assert!(!u.contains_set(&s));
855 }
856
857 #[test]
858 fn test_codepointinversionlist_size() {
859 let ex = vec![0x2, 0x5, 0xA, 0xF];
860 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
861 assert_eq!(8, check.size());
862 let check = CodePointInversionList::all();
863 let expected = (char::MAX as u32) + 1;
864 assert_eq!(expected as usize, check.size());
865 let inv_list_vec = vec![];
866 let check = CodePointInversionList {
867 inv_list: ZeroVec::from_slice_or_alloc(&inv_list_vec),
868 size: 0,
869 };
870 assert_eq!(check.size(), 0);
871 }
872
873 #[test]
874 fn test_codepointinversionlist_is_empty() {
875 let inv_list_vec = vec![];
876 let check = CodePointInversionList {
877 inv_list: ZeroVec::from_slice_or_alloc(&inv_list_vec),
878 size: 0,
879 };
880 assert!(check.is_empty());
881 }
882
883 #[test]
884 fn test_codepointinversionlist_is_not_empty() {
885 let check = CodePointInversionList::all();
886 assert!(!check.is_empty());
887 }
888
889 #[test]
890 fn test_codepointinversionlist_iter_chars() {
891 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
892 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
893 let mut iter = check.iter_chars();
894 assert_eq!(Some('A'), iter.next());
895 assert_eq!(Some('B'), iter.next());
896 assert_eq!(Some('C'), iter.next());
897 assert_eq!(Some('E'), iter.next());
898 assert_eq!(None, iter.next());
899 }
900
901 #[test]
902 fn test_codepointinversionlist_iter_ranges() {
903 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
904 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
905 let mut ranges = set.iter_ranges();
906 assert_eq!(Some(0x41..=0x43), ranges.next());
907 assert_eq!(Some(0x45..=0x45), ranges.next());
908 assert_eq!(Some(0xD800..=0xD800), ranges.next());
909 assert_eq!(None, ranges.next());
910 }
911
912 #[test]
913 fn test_codepointinversionlist_iter_ranges_exactsizeiter_trait() {
914 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
915 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
916 let ranges = set.iter_ranges();
917 assert_eq!(3, ranges.len());
918 }
919
920 #[test]
921 fn test_codepointinversionlist_range_count() {
922 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
923 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
924 assert_eq!(3, set.get_range_count());
925 }
926
927 #[test]
928 fn test_codepointinversionlist_get_nth_range() {
929 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
930 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
931 assert_eq!(Some(0x41..=0x43), set.get_nth_range(0));
932 assert_eq!(Some(0x45..=0x45), set.get_nth_range(1));
933 assert_eq!(Some(0xD800..=0xD800), set.get_nth_range(2));
934 assert_eq!(None, set.get_nth_range(3));
935 }
936
937 #[test]
940 fn test_codepointinversionlist_iter_ranges_with_max_code_point() {
941 let ex = vec![0x80, (char::MAX as u32) + 1];
942 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
943 let mut ranges = set.iter_ranges();
944 assert_eq!(Some(0x80..=(char::MAX as u32)), ranges.next());
945 assert_eq!(None, ranges.next());
946 }
947
948 #[test]
949 fn test_codepointinversionlist_span_contains() {
950 let ex = vec![0x41, 0x44, 0x46, 0x4B]; let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
952 assert_eq!(check.span("ABCDE", true), 3);
953 assert_eq!(check.span("E", true), 0);
954 }
955
956 #[test]
957 fn test_codepointinversionlist_span_does_not_contain() {
958 let ex = vec![0x41, 0x44, 0x46, 0x4B]; let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
960 assert_eq!(check.span("DEF", false), 2);
961 assert_eq!(check.span("KLMA", false), 3);
962 }
963
964 #[test]
965 fn test_codepointinversionlist_span_back_contains() {
966 let ex = vec![0x41, 0x44, 0x46, 0x4B]; let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
968 assert_eq!(check.span_back("XYZABFH", true), 3);
969 assert_eq!(check.span_back("ABCXYZ", true), 6);
970 }
971
972 #[test]
973 fn test_codepointinversionlist_span_back_does_not_contain() {
974 let ex = vec![0x41, 0x44, 0x46, 0x4B]; let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
976 assert_eq!(check.span_back("ABCXYZ", false), 3);
977 assert_eq!(check.span_back("XYZABC", false), 6);
978 }
979
980 #[test]
981 fn test_uniset_to_inv_list() {
982 let inv_list = [
983 0x9, 0xE, 0x20, 0x21, 0x85, 0x86, 0xA0, 0xA1, 0x1626, 0x1627, 0x2000, 0x2003, 0x2028,
984 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
985 ];
986 let s: CodePointInversionList =
987 CodePointInversionList::try_from_u32_inversion_list_slice(&inv_list).unwrap();
988 let round_trip_inv_list = s.get_inversion_list_vec();
989 assert_eq!(
990 round_trip_inv_list.into_iter().collect::<ZeroVec<u32>>(),
991 inv_list
992 );
993 }
994
995 #[test]
996 fn test_serde_serialize() {
997 let inv_list = [0x41, 0x46, 0x4B, 0x55];
998 let uniset = CodePointInversionList::try_from_u32_inversion_list_slice(&inv_list).unwrap();
999 let json_str = serde_json::to_string(&uniset).unwrap();
1000 assert_eq!(json_str, r#"["A-E","K-T"]"#);
1001 }
1002
1003 #[test]
1004 fn test_serde_serialize_surrogates() {
1005 let inv_list = [0xDFAB, 0xDFFF];
1006 let uniset = CodePointInversionList::try_from_u32_inversion_list_slice(&inv_list).unwrap();
1007 let json_str = serde_json::to_string(&uniset).unwrap();
1008 assert_eq!(json_str, r#"["U+DFAB-U+DFFE"]"#);
1009 }
1010
1011 #[test]
1012 fn test_serde_deserialize() {
1013 let inv_list_str = r#"["A-E","K-T"]"#;
1014 let exp_inv_list = [0x41, 0x46, 0x4B, 0x55];
1015 let exp_uniset =
1016 CodePointInversionList::try_from_u32_inversion_list_slice(&exp_inv_list).unwrap();
1017 let act_uniset: CodePointInversionList = serde_json::from_str(inv_list_str).unwrap();
1018 assert_eq!(act_uniset, exp_uniset);
1019 }
1020
1021 #[test]
1022 fn test_serde_deserialize_surrogates() {
1023 let inv_list_str = r#"["U+DFAB-U+DFFE"]"#;
1024 let exp_inv_list = [0xDFAB, 0xDFFF];
1025 let exp_uniset =
1026 CodePointInversionList::try_from_u32_inversion_list_slice(&exp_inv_list).unwrap();
1027 let act_uniset: CodePointInversionList = serde_json::from_str(inv_list_str).unwrap();
1028 assert_eq!(act_uniset, exp_uniset);
1029 }
1030
1031 #[test]
1032 fn test_serde_deserialize_invalid() {
1033 assert!(serde_json::from_str::<CodePointInversionList>("[65,70,98775,85]").is_err());
1034 assert!(serde_json::from_str::<CodePointInversionList>("[65,70,U+FFFFFFFFFF,85]").is_err());
1035 }
1036
1037 #[test]
1038 fn test_serde_with_postcard_roundtrip() -> Result<(), postcard::Error> {
1039 let set = CodePointInversionList::bmp();
1040 let set_serialized: Vec<u8> = postcard::to_allocvec(&set).unwrap();
1041 let set_deserialized: CodePointInversionList =
1042 postcard::from_bytes::<CodePointInversionList>(&set_serialized)?;
1043
1044 assert_eq!(&set, &set_deserialized);
1045 assert!(!set_deserialized.inv_list.is_owned());
1046
1047 Ok(())
1048 }
1049
1050 #[test]
1051 fn databake() {
1052 databake::test_bake!(
1053 CodePointInversionList<'static>,
1054 const,
1055 unsafe {
1056 #[allow(unused_unsafe)]
1057 crate::codepointinvlist::CodePointInversionList::from_parts_unchecked(
1058 unsafe {
1059 zerovec::ZeroVec::from_bytes_unchecked(
1060 b"0\0\0\0:\0\0\0A\0\0\0G\0\0\0a\0\0\0g\0\0\0",
1061 )
1062 },
1063 22u32,
1064 )
1065 },
1066 icu_collections,
1067 [zerovec],
1068 );
1069 }
1070}