icu_collections/codepointinvliststringlist/
mod.rs
1use crate::codepointinvlist::{
13 CodePointInversionList, CodePointInversionListBuilder, CodePointInversionListError,
14 CodePointInversionListULE,
15};
16use alloc::string::{String, ToString};
17use alloc::vec::Vec;
18use displaydoc::Display;
19use yoke::Yokeable;
20use zerofrom::ZeroFrom;
21use zerovec::{VarZeroSlice, VarZeroVec};
22
23#[zerovec::make_varule(CodePointInversionListAndStringListULE)]
27#[zerovec::skip_derive(Ord)]
28#[zerovec::derive(Debug)]
29#[derive(Debug, Eq, PartialEq, Clone, Yokeable, ZeroFrom)]
30#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
32#[cfg_attr(feature = "serde", zerovec::derive(Serialize, Deserialize, Debug))]
33pub struct CodePointInversionListAndStringList<'data> {
34 #[cfg_attr(feature = "serde", serde(borrow))]
35 #[zerovec::varule(CodePointInversionListULE)]
36 cp_inv_list: CodePointInversionList<'data>,
37 #[cfg_attr(feature = "serde", serde(borrow))]
42 str_list: VarZeroVec<'data, str>,
43}
44
45#[cfg(feature = "databake")]
46impl databake::Bake for CodePointInversionListAndStringList<'_> {
47 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
48 env.insert("icu_collections");
49 let cp_inv_list = self.cp_inv_list.bake(env);
50 let str_list = self.str_list.bake(env);
51 databake::quote! {
53 icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList::from_parts_unchecked(#cp_inv_list, #str_list)
54 }
55 }
56}
57
58impl<'data> CodePointInversionListAndStringList<'data> {
59 pub fn try_from(
62 cp_inv_list: CodePointInversionList<'data>,
63 str_list: VarZeroVec<'data, str>,
64 ) -> Result<Self, CodePointInversionListAndStringListError> {
65 {
72 let mut it = str_list.iter();
73 if let Some(mut x) = it.next() {
74 if x.len() == 1 {
75 return Err(
76 CodePointInversionListAndStringListError::InvalidStringLength(
77 x.to_string(),
78 ),
79 );
80 }
81 for y in it {
82 if x.len() == 1 {
83 return Err(
84 CodePointInversionListAndStringListError::InvalidStringLength(
85 x.to_string(),
86 ),
87 );
88 } else if x == y {
89 return Err(
90 CodePointInversionListAndStringListError::StringListNotUnique(
91 x.to_string(),
92 ),
93 );
94 } else if x > y {
95 return Err(
96 CodePointInversionListAndStringListError::StringListNotSorted(
97 x.to_string(),
98 y.to_string(),
99 ),
100 );
101 }
102
103 x = y;
105 }
106 }
107 }
108
109 Ok(CodePointInversionListAndStringList {
110 cp_inv_list,
111 str_list,
112 })
113 }
114
115 #[doc(hidden)]
116 pub const fn from_parts_unchecked(
117 cp_inv_list: CodePointInversionList<'data>,
118 str_list: VarZeroVec<'data, str>,
119 ) -> Self {
120 CodePointInversionListAndStringList {
121 cp_inv_list,
122 str_list,
123 }
124 }
125
126 pub fn size(&self) -> usize {
130 self.cp_inv_list.size() + self.str_list.len()
131 }
132
133 pub fn has_strings(&self) -> bool {
135 !self.str_list.is_empty()
136 }
137
138 pub fn contains(&self, s: &str) -> bool {
160 let mut chars = s.chars();
161 if let Some(first_char) = chars.next() {
162 if chars.next().is_none() {
163 return self.contains_char(first_char);
164 }
165 }
166 self.str_list.binary_search(s).is_ok()
167 }
168
169 pub fn contains32(&self, cp: u32) -> bool {
189 self.cp_inv_list.contains32(cp)
190 }
191
192 pub fn contains_char(&self, ch: char) -> bool {
212 self.contains32(ch as u32)
213 }
214
215 pub fn code_points(&self) -> &CodePointInversionList<'data> {
217 &self.cp_inv_list
218 }
219
220 pub fn strings(&self) -> &VarZeroSlice<str> {
222 &self.str_list
223 }
224}
225
226impl<'a> FromIterator<&'a str> for CodePointInversionListAndStringList<'_> {
227 fn from_iter<I>(it: I) -> Self
228 where
229 I: IntoIterator<Item = &'a str>,
230 {
231 let mut builder = CodePointInversionListBuilder::new();
232 let mut strings = Vec::<&str>::new();
233 for s in it {
234 let mut chars = s.chars();
235 if let Some(first_char) = chars.next() {
236 if chars.next().is_none() {
237 builder.add_char(first_char);
238 continue;
239 }
240 }
241 strings.push(s);
242 }
243
244 strings.sort_unstable();
247 strings.dedup();
248
249 let cp_inv_list = builder.build();
250 let str_list = VarZeroVec::<str>::from(&strings);
251
252 CodePointInversionListAndStringList {
253 cp_inv_list,
254 str_list,
255 }
256 }
257}
258
259#[derive(Display, Debug)]
263pub enum CodePointInversionListAndStringListError {
264 #[displaydoc("Invalid code point inversion list: {0:?}")]
266 InvalidCodePointInversionList(CodePointInversionListError),
267 #[displaydoc("Invalid string length for string: {0}")]
269 InvalidStringLength(String),
270 #[displaydoc("String list has duplicate: {0}")]
272 StringListNotUnique(String),
273 #[displaydoc("Strings in string list not in sorted order: ({0}, {1})")]
275 StringListNotSorted(String, String),
276}
277
278#[doc(no_inline)]
279pub use CodePointInversionListAndStringListError as Error;
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
286 fn test_size_has_strings() {
287 let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
288 let cp_list =
289 CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
290 let str_slice = &["ascii_max", "bmp_max", "unicode_max", "zero"];
291 let str_list = VarZeroVec::<str>::from(str_slice);
292
293 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
294
295 assert!(cpilsl.has_strings());
296 assert_eq!(8, cpilsl.size());
297 }
298
299 #[test]
300 fn test_empty_string_allowed() {
301 let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
302 let cp_list =
303 CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
304 let str_slice = &["", "ascii_max", "bmp_max", "unicode_max", "zero"];
305 let str_list = VarZeroVec::<str>::from(str_slice);
306
307 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
308
309 assert!(cpilsl.has_strings());
310 assert_eq!(9, cpilsl.size());
311 }
312
313 #[test]
314 fn test_invalid_string() {
315 let cp_slice = &[0, 1];
316 let cp_list =
317 CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
318 let str_slice = &["a"];
319 let str_list = VarZeroVec::<str>::from(str_slice);
320
321 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
322
323 assert!(matches!(
324 cpilsl,
325 Err(CodePointInversionListAndStringListError::InvalidStringLength(_))
326 ));
327 }
328
329 #[test]
330 fn test_invalid_string_list_has_duplicate() {
331 let cp_slice = &[0, 1];
332 let cp_list =
333 CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
334 let str_slice = &["abc", "abc"];
335 let str_list = VarZeroVec::<str>::from(str_slice);
336
337 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
338
339 assert!(matches!(
340 cpilsl,
341 Err(CodePointInversionListAndStringListError::StringListNotUnique(_))
342 ));
343 }
344
345 #[test]
346 fn test_invalid_string_list_not_sorted() {
347 let cp_slice = &[0, 1];
348 let cp_list =
349 CodePointInversionList::try_clone_from_inversion_list_slice(cp_slice).unwrap();
350 let str_slice = &["xyz", "abc"];
351 let str_list = VarZeroVec::<str>::from(str_slice);
352
353 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
354
355 assert!(matches!(
356 cpilsl,
357 Err(CodePointInversionListAndStringListError::StringListNotSorted(_, _))
358 ));
359 }
360
361 #[test]
362 fn test_from_iter_invariants() {
363 let in_strs_1 = ["a", "abc", "xyz", "abc"];
364 let in_strs_2 = ["xyz", "abc", "a", "abc"];
365
366 let cpilsl_1 = CodePointInversionListAndStringList::from_iter(in_strs_1);
367 let cpilsl_2 = CodePointInversionListAndStringList::from_iter(in_strs_2);
368
369 assert_eq!(cpilsl_1, cpilsl_2);
370
371 assert!(cpilsl_1.has_strings());
372 assert!(cpilsl_1.contains("abc"));
373 assert!(cpilsl_1.contains("xyz"));
374 assert!(!cpilsl_1.contains("def"));
375
376 assert_eq!(1, cpilsl_1.cp_inv_list.size());
377 assert!(cpilsl_1.contains_char('a'));
378 assert!(!cpilsl_1.contains_char('0'));
379 assert!(!cpilsl_1.contains_char('q'));
380
381 assert_eq!(3, cpilsl_1.size());
382 }
383}