icu_properties/sets.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! The functions in this module return a [`CodePointSetData`] containing
6//! the set of characters with a particular Unicode property.
7//!
8//! The descriptions of most properties are taken from [`TR44`], the documentation for the
9//! Unicode Character Database. Some properties are instead defined in [`TR18`], the
10//! documentation for Unicode regular expressions. In particular, Annex C of this document
11//! defines properties for POSIX compatibility.
12//!
13//! [`CodePointSetData`]: crate::sets::CodePointSetData
14//! [`TR44`]: https://www.unicode.org/reports/tr44
15//! [`TR18`]: https://www.unicode.org/reports/tr18
16
17use crate::error::PropertiesError;
18use crate::provider::*;
19use crate::*;
20use core::iter::FromIterator;
21use core::ops::RangeInclusive;
22use icu_collections::codepointinvlist::CodePointInversionList;
23use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
24use icu_provider::prelude::*;
25
26//
27// CodePointSet* structs, impls, & macros
28// (a set with only code points)
29//
30
31/// A wrapper around code point set data. It is returned by APIs that return Unicode
32/// property data in a set-like form, ex: a set of code points sharing the same
33/// value for a Unicode property. Access its data via the borrowed version,
34/// [`CodePointSetDataBorrowed`].
35#[derive(Debug)]
36pub struct CodePointSetData {
37 data: DataPayload<ErasedSetlikeMarker>,
38}
39
40/// Private marker type for CodePointSetData
41/// to work for all set properties at once
42#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
43pub(crate) struct ErasedSetlikeMarker;
44impl DataMarker for ErasedSetlikeMarker {
45 type Yokeable = PropertyCodePointSetV1<'static>;
46}
47
48impl CodePointSetData {
49 /// Construct a borrowed version of this type that can be queried.
50 ///
51 /// This owned version if returned by functions that use a runtime data provider.
52 #[inline]
53 pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> {
54 CodePointSetDataBorrowed {
55 set: self.data.get(),
56 }
57 }
58
59 /// Construct a new one from loaded data
60 ///
61 /// Typically it is preferable to use getters like [`load_ascii_hex_digit()`] instead
62 pub fn from_data<M>(data: DataPayload<M>) -> Self
63 where
64 M: DataMarker<Yokeable = PropertyCodePointSetV1<'static>>,
65 {
66 Self { data: data.cast() }
67 }
68
69 /// Construct a new owned [`CodePointInversionList`]
70 pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self {
71 let set = PropertyCodePointSetV1::from_code_point_inversion_list(set);
72 CodePointSetData::from_data(DataPayload::<ErasedSetlikeMarker>::from_owned(set))
73 }
74
75 /// Convert this type to a [`CodePointInversionList`] as a borrowed value.
76 ///
77 /// The data backing this is extensible and supports multiple implementations.
78 /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be
79 /// added, and users may select which at data generation time.
80 ///
81 /// This method returns an `Option` in order to return `None` when the backing data provider
82 /// cannot return a [`CodePointInversionList`], or cannot do so within the expected constant time
83 /// constraint.
84 pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> {
85 self.data.get().as_code_point_inversion_list()
86 }
87
88 /// Convert this type to a [`CodePointInversionList`], borrowing if possible,
89 /// otherwise allocating a new [`CodePointInversionList`].
90 ///
91 /// The data backing this is extensible and supports multiple implementations.
92 /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be
93 /// added, and users may select which at data generation time.
94 ///
95 /// The performance of the conversion to this specific return type will vary
96 /// depending on the data structure that is backing `self`.
97 pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> {
98 self.data.get().to_code_point_inversion_list()
99 }
100}
101
102/// A borrowed wrapper around code point set data, returned by
103/// [`CodePointSetData::as_borrowed()`]. More efficient to query.
104#[derive(Clone, Copy, Debug)]
105pub struct CodePointSetDataBorrowed<'a> {
106 set: &'a PropertyCodePointSetV1<'a>,
107}
108
109impl CodePointSetDataBorrowed<'static> {
110 /// Cheaply converts a [`CodePointSetDataBorrowed<'static>`] into a [`CodePointSetData`].
111 ///
112 /// Note: Due to branching and indirection, using [`CodePointSetData`] might inhibit some
113 /// compile-time optimizations that are possible with [`CodePointSetDataBorrowed`].
114 pub const fn static_to_owned(self) -> CodePointSetData {
115 CodePointSetData {
116 data: DataPayload::from_static_ref(self.set),
117 }
118 }
119}
120
121impl<'a> CodePointSetDataBorrowed<'a> {
122 /// Check if the set contains a character
123 ///
124 /// ```rust
125 /// use icu::properties::sets;
126 ///
127 /// let alphabetic = sets::alphabetic();
128 ///
129 /// assert!(!alphabetic.contains('3'));
130 /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
131 /// assert!(alphabetic.contains('A'));
132 /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
133 /// ```
134 #[inline]
135 pub fn contains(self, ch: char) -> bool {
136 self.set.contains(ch)
137 }
138
139 /// Check if the set contains a character as a UTF32 code unit
140 ///
141 /// ```rust
142 /// use icu::properties::sets;
143 ///
144 /// let alphabetic = sets::alphabetic();
145 ///
146 /// assert!(!alphabetic.contains32(0x0A69)); // U+0A69 GURMUKHI DIGIT THREE
147 /// assert!(alphabetic.contains32(0x00C4)); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
148 /// ```
149 #[inline]
150 pub fn contains32(self, ch: u32) -> bool {
151 self.set.contains32(ch)
152 }
153
154 // Yields an [`Iterator`] returning the ranges of the code points that are
155 /// included in the [`CodePointSetData`]
156 ///
157 /// Ranges are returned as [`RangeInclusive`], which is inclusive of its
158 /// `end` bound value. An end-inclusive behavior matches the ICU4C/J
159 /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`.
160 ///
161 /// # Example
162 ///
163 /// ```
164 /// use icu::properties::sets;
165 ///
166 /// let alphabetic = sets::alphabetic();
167 /// let mut ranges = alphabetic.iter_ranges();
168 ///
169 /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z'
170 /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z'
171 /// ```
172 #[inline]
173 pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
174 self.set.iter_ranges()
175 }
176
177 // Yields an [`Iterator`] returning the ranges of the code points that are
178 /// *not* included in the [`CodePointSetData`]
179 ///
180 /// Ranges are returned as [`RangeInclusive`], which is inclusive of its
181 /// `end` bound value. An end-inclusive behavior matches the ICU4C/J
182 /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`.
183 ///
184 /// # Example
185 ///
186 /// ```
187 /// use icu::properties::sets;
188 ///
189 /// let alphabetic = sets::alphabetic();
190 /// let mut ranges = alphabetic.iter_ranges();
191 ///
192 /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z'
193 /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z'
194 /// ```
195 #[inline]
196 pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
197 self.set.iter_ranges_complemented()
198 }
199}
200
201//
202// UnicodeSet* structs, impls, & macros
203// (a set with code points + strings)
204//
205
206/// A wrapper around `UnicodeSet` data (characters and strings)
207#[derive(Debug)]
208pub struct UnicodeSetData {
209 data: DataPayload<ErasedUnicodeSetlikeMarker>,
210}
211
212#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
213pub(crate) struct ErasedUnicodeSetlikeMarker;
214impl DataMarker for ErasedUnicodeSetlikeMarker {
215 type Yokeable = PropertyUnicodeSetV1<'static>;
216}
217
218impl UnicodeSetData {
219 /// Construct a borrowed version of this type that can be queried.
220 ///
221 /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
222 /// up front.
223 #[inline]
224 pub fn as_borrowed(&self) -> UnicodeSetDataBorrowed<'_> {
225 UnicodeSetDataBorrowed {
226 set: self.data.get(),
227 }
228 }
229
230 /// Construct a new one from loaded data
231 ///
232 /// Typically it is preferable to use getters instead
233 pub fn from_data<M>(data: DataPayload<M>) -> Self
234 where
235 M: DataMarker<Yokeable = PropertyUnicodeSetV1<'static>>,
236 {
237 Self { data: data.cast() }
238 }
239
240 /// Construct a new owned [`CodePointInversionListAndStringList`]
241 pub fn from_code_point_inversion_list_string_list(
242 set: CodePointInversionListAndStringList<'static>,
243 ) -> Self {
244 let set = PropertyUnicodeSetV1::from_code_point_inversion_list_string_list(set);
245 UnicodeSetData::from_data(DataPayload::<ErasedUnicodeSetlikeMarker>::from_owned(set))
246 }
247
248 /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value.
249 ///
250 /// The data backing this is extensible and supports multiple implementations.
251 /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
252 /// added, and users may select which at data generation time.
253 ///
254 /// This method returns an `Option` in order to return `None` when the backing data provider
255 /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time
256 /// constraint.
257 pub fn as_code_point_inversion_list_string_list(
258 &self,
259 ) -> Option<&CodePointInversionListAndStringList<'_>> {
260 self.data.get().as_code_point_inversion_list_string_list()
261 }
262
263 /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible,
264 /// otherwise allocating a new [`CodePointInversionListAndStringList`].
265 ///
266 /// The data backing this is extensible and supports multiple implementations.
267 /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
268 /// added, and users may select which at data generation time.
269 ///
270 /// The performance of the conversion to this specific return type will vary
271 /// depending on the data structure that is backing `self`.
272 pub fn to_code_point_inversion_list_string_list(
273 &self,
274 ) -> CodePointInversionListAndStringList<'_> {
275 self.data.get().to_code_point_inversion_list_string_list()
276 }
277}
278
279/// A borrowed wrapper around code point set data, returned by
280/// [`UnicodeSetData::as_borrowed()`]. More efficient to query.
281#[derive(Clone, Copy, Debug)]
282pub struct UnicodeSetDataBorrowed<'a> {
283 set: &'a PropertyUnicodeSetV1<'a>,
284}
285
286impl<'a> UnicodeSetDataBorrowed<'a> {
287 /// Check if the set contains the string. Strings consisting of one character
288 /// are treated as a character/code point.
289 ///
290 /// This matches ICU behavior for ICU's `UnicodeSet`.
291 #[inline]
292 pub fn contains(self, s: &str) -> bool {
293 self.set.contains(s)
294 }
295
296 /// Check if the set contains a character as a UTF32 code unit
297 #[inline]
298 pub fn contains32(&self, cp: u32) -> bool {
299 self.set.contains32(cp)
300 }
301
302 /// Check if the set contains the code point corresponding to the Rust character.
303 #[inline]
304 pub fn contains_char(&self, ch: char) -> bool {
305 self.set.contains_char(ch)
306 }
307}
308
309impl UnicodeSetDataBorrowed<'static> {
310 /// Cheaply converts a [`UnicodeSetDataBorrowed<'static>`] into a [`UnicodeSetData`].
311 ///
312 /// Note: Due to branching and indirection, using [`UnicodeSetData`] might inhibit some
313 /// compile-time optimizations that are possible with [`UnicodeSetDataBorrowed`].
314 pub const fn static_to_owned(self) -> UnicodeSetData {
315 UnicodeSetData {
316 data: DataPayload::from_static_ref(self.set),
317 }
318 }
319}
320
321pub(crate) fn load_set_data<M, P>(provider: &P) -> Result<CodePointSetData, PropertiesError>
322where
323 M: KeyedDataMarker<Yokeable = PropertyCodePointSetV1<'static>>,
324 P: DataProvider<M> + ?Sized,
325{
326 Ok(provider
327 .load(Default::default())
328 .and_then(DataResponse::take_payload)
329 .map(CodePointSetData::from_data)?)
330}
331
332//
333// Binary property getter fns
334// (data as code point sets)
335//
336
337macro_rules! make_code_point_set_property {
338 (
339 // currently unused
340 property: $property:expr;
341 // currently unused
342 marker: $marker_name:ident;
343 keyed_data_marker: $keyed_data_marker:ty;
344 func:
345 $(#[$doc:meta])+
346 $cvis:vis const fn $constname:ident() => $singleton_name:ident;
347 $vis:vis fn $funcname:ident();
348 ) => {
349 #[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")]
350 ///
351 /// Note that this will return an owned version of the data. Functionality is available on
352 /// the borrowed version, accessible through [`CodePointSetData::as_borrowed`].
353 $vis fn $funcname(
354 provider: &(impl DataProvider<$keyed_data_marker> + ?Sized)
355 ) -> Result<CodePointSetData, PropertiesError> {
356 load_set_data(provider)
357 }
358
359 $(#[$doc])*
360 #[cfg(feature = "compiled_data")]
361 $cvis const fn $constname() -> CodePointSetDataBorrowed<'static> {
362 CodePointSetDataBorrowed {
363 set: crate::provider::Baked::$singleton_name,
364 }
365 }
366 }
367}
368
369make_code_point_set_property! {
370 property: "ASCII_Hex_Digit";
371 marker: AsciiHexDigitProperty;
372 keyed_data_marker: AsciiHexDigitV1Marker;
373 func:
374 /// ASCII characters commonly used for the representation of hexadecimal numbers
375 ///
376 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
377 ///
378 /// [📚 Help choosing a constructor](icu_provider::constructors)
379 ///
380 /// # Example
381 ///
382 /// ```
383 /// use icu::properties::sets;
384 ///
385 /// let ascii_hex_digit = sets::ascii_hex_digit();
386 ///
387 /// assert!(ascii_hex_digit.contains('3'));
388 /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
389 /// assert!(ascii_hex_digit.contains('A'));
390 /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
391 /// ```
392 pub const fn ascii_hex_digit() => SINGLETON_PROPS_AHEX_V1;
393 pub fn load_ascii_hex_digit();
394}
395
396make_code_point_set_property! {
397 property: "Alnum";
398 marker: AlnumProperty;
399 keyed_data_marker: AlnumV1Marker;
400 func:
401 /// Characters with the Alphabetic or Decimal_Number property
402 /// This is defined for POSIX compatibility.
403
404 pub const fn alnum() => SINGLETON_PROPS_ALNUM_V1;
405 pub fn load_alnum();
406}
407
408make_code_point_set_property! {
409 property: "Alphabetic";
410 marker: AlphabeticProperty;
411 keyed_data_marker: AlphabeticV1Marker;
412 func:
413 /// Alphabetic characters
414 ///
415 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
416 ///
417 /// [📚 Help choosing a constructor](icu_provider::constructors)
418 ///
419 /// # Example
420 ///
421 /// ```
422 /// use icu::properties::sets;
423 ///
424 /// let alphabetic = sets::alphabetic();
425 ///
426 /// assert!(!alphabetic.contains('3'));
427 /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
428 /// assert!(alphabetic.contains('A'));
429 /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
430 /// ```
431
432 pub const fn alphabetic() => SINGLETON_PROPS_ALPHA_V1;
433 pub fn load_alphabetic();
434}
435
436make_code_point_set_property! {
437 property: "Bidi_Control";
438 marker: BidiControlProperty;
439 keyed_data_marker: BidiControlV1Marker;
440 func:
441 /// Format control characters which have specific functions in the Unicode Bidirectional
442 /// Algorithm
443 ///
444 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
445 ///
446 /// [📚 Help choosing a constructor](icu_provider::constructors)
447 ///
448 /// # Example
449 ///
450 /// ```
451 /// use icu::properties::sets;
452 ///
453 /// let bidi_control = sets::bidi_control();
454 ///
455 /// assert!(bidi_control.contains32(0x200F)); // RIGHT-TO-LEFT MARK
456 /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN
457 /// ```
458
459 pub const fn bidi_control() => SINGLETON_PROPS_BIDI_C_V1;
460 pub fn load_bidi_control();
461}
462
463make_code_point_set_property! {
464 property: "Bidi_Mirrored";
465 marker: BidiMirroredProperty;
466 keyed_data_marker: BidiMirroredV1Marker;
467 func:
468 /// Characters that are mirrored in bidirectional text
469 ///
470 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
471 ///
472 /// [📚 Help choosing a constructor](icu_provider::constructors)
473 ///
474 /// # Example
475 ///
476 /// ```
477 /// use icu::properties::sets;
478 ///
479 /// let bidi_mirrored = sets::bidi_mirrored();
480 ///
481 /// assert!(bidi_mirrored.contains('['));
482 /// assert!(bidi_mirrored.contains(']'));
483 /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION
484 /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA
485 /// ```
486
487 pub const fn bidi_mirrored() => SINGLETON_PROPS_BIDI_M_V1;
488 pub fn load_bidi_mirrored();
489}
490
491make_code_point_set_property! {
492 property: "Blank";
493 marker: BlankProperty;
494 keyed_data_marker: BlankV1Marker;
495 func:
496 /// Horizontal whitespace characters
497
498 pub const fn blank() => SINGLETON_PROPS_BLANK_V1;
499 pub fn load_blank();
500}
501
502make_code_point_set_property! {
503 property: "Cased";
504 marker: CasedProperty;
505 keyed_data_marker: CasedV1Marker;
506 func:
507 /// Uppercase, lowercase, and titlecase characters
508 ///
509 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
510 ///
511 /// [📚 Help choosing a constructor](icu_provider::constructors)
512 ///
513 /// # Example
514 ///
515 /// ```
516 /// use icu::properties::sets;
517 ///
518 /// let cased = sets::cased();
519 ///
520 /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
521 /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU
522 /// ```
523
524 pub const fn cased() => SINGLETON_PROPS_CASED_V1;
525 pub fn load_cased();
526}
527
528make_code_point_set_property! {
529 property: "Case_Ignorable";
530 marker: CaseIgnorableProperty;
531 keyed_data_marker: CaseIgnorableV1Marker;
532 func:
533 /// Characters which are ignored for casing purposes
534 ///
535 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
536 ///
537 /// [📚 Help choosing a constructor](icu_provider::constructors)
538 ///
539 /// # Example
540 ///
541 /// ```
542 /// use icu::properties::sets;
543 ///
544 /// let case_ignorable = sets::case_ignorable();
545 ///
546 /// assert!(case_ignorable.contains(':'));
547 /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMDA
548 /// ```
549
550 pub const fn case_ignorable() => SINGLETON_PROPS_CI_V1;
551 pub fn load_case_ignorable();
552}
553
554make_code_point_set_property! {
555 property: "Full_Composition_Exclusion";
556 marker: FullCompositionExclusionProperty;
557 keyed_data_marker: FullCompositionExclusionV1Marker;
558 func:
559 /// Characters that are excluded from composition
560 /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
561
562 pub const fn full_composition_exclusion() => SINGLETON_PROPS_COMP_EX_V1;
563 pub fn load_full_composition_exclusion();
564}
565
566make_code_point_set_property! {
567 property: "Changes_When_Casefolded";
568 marker: ChangesWhenCasefoldedProperty;
569 keyed_data_marker: ChangesWhenCasefoldedV1Marker;
570 func:
571 /// Characters whose normalized forms are not stable under case folding
572 ///
573 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
574 ///
575 /// [📚 Help choosing a constructor](icu_provider::constructors)
576 ///
577 /// # Example
578 ///
579 /// ```
580 /// use icu::properties::sets;
581 ///
582 /// let changes_when_casefolded = sets::changes_when_casefolded();
583 ///
584 /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S
585 /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA
586 /// ```
587
588 pub const fn changes_when_casefolded() => SINGLETON_PROPS_CWCF_V1;
589 pub fn load_changes_when_casefolded();
590}
591
592make_code_point_set_property! {
593 property: "Changes_When_Casemapped";
594 marker: ChangesWhenCasemappedProperty;
595 keyed_data_marker: ChangesWhenCasemappedV1Marker;
596 func:
597 /// Characters which may change when they undergo case mapping
598
599 pub const fn changes_when_casemapped() => SINGLETON_PROPS_CWCM_V1;
600 pub fn load_changes_when_casemapped();
601}
602
603make_code_point_set_property! {
604 property: "Changes_When_NFKC_Casefolded";
605 marker: ChangesWhenNfkcCasefoldedProperty;
606 keyed_data_marker: ChangesWhenNfkcCasefoldedV1Marker;
607 func:
608 /// Characters which are not identical to their NFKC_Casefold mapping
609 ///
610 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
611 ///
612 /// [📚 Help choosing a constructor](icu_provider::constructors)
613 ///
614 /// # Example
615 ///
616 /// ```
617 /// use icu::properties::sets;
618 ///
619 /// let changes_when_nfkc_casefolded = sets::changes_when_nfkc_casefolded();
620 ///
621 /// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F
622 /// assert!(!changes_when_nfkc_casefolded.contains('f'));
623 /// ```
624
625 pub const fn changes_when_nfkc_casefolded() => SINGLETON_PROPS_CWKCF_V1;
626 pub fn load_changes_when_nfkc_casefolded();
627}
628
629make_code_point_set_property! {
630 property: "Changes_When_Lowercased";
631 marker: ChangesWhenLowercasedProperty;
632 keyed_data_marker: ChangesWhenLowercasedV1Marker;
633 func:
634 /// Characters whose normalized forms are not stable under a toLowercase mapping
635 ///
636 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
637 ///
638 /// [📚 Help choosing a constructor](icu_provider::constructors)
639 ///
640 /// # Example
641 ///
642 /// ```
643 /// use icu::properties::sets;
644 ///
645 /// let changes_when_lowercased = sets::changes_when_lowercased();
646 ///
647 /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR
648 /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR
649 /// ```
650
651 pub const fn changes_when_lowercased() => SINGLETON_PROPS_CWL_V1;
652 pub fn load_changes_when_lowercased();
653}
654
655make_code_point_set_property! {
656 property: "Changes_When_Titlecased";
657 marker: ChangesWhenTitlecasedProperty;
658 keyed_data_marker: ChangesWhenTitlecasedV1Marker;
659 func:
660 /// Characters whose normalized forms are not stable under a toTitlecase mapping
661 ///
662 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
663 ///
664 /// [📚 Help choosing a constructor](icu_provider::constructors)
665 ///
666 /// # Example
667 ///
668 /// ```
669 /// use icu::properties::sets;
670 ///
671 /// let changes_when_titlecased = sets::changes_when_titlecased();
672 ///
673 /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE
674 /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE
675 /// ```
676
677 pub const fn changes_when_titlecased() => SINGLETON_PROPS_CWT_V1;
678 pub fn load_changes_when_titlecased();
679}
680
681make_code_point_set_property! {
682 property: "Changes_When_Uppercased";
683 marker: ChangesWhenUppercasedProperty;
684 keyed_data_marker: ChangesWhenUppercasedV1Marker;
685 func:
686 /// Characters whose normalized forms are not stable under a toUppercase mapping
687 ///
688 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
689 ///
690 /// [📚 Help choosing a constructor](icu_provider::constructors)
691 ///
692 /// # Example
693 ///
694 /// ```
695 /// use icu::properties::sets;
696 ///
697 /// let changes_when_uppercased = sets::changes_when_uppercased();
698 ///
699 /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN
700 /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN
701 /// ```
702
703 pub const fn changes_when_uppercased() => SINGLETON_PROPS_CWU_V1;
704 pub fn load_changes_when_uppercased();
705}
706
707make_code_point_set_property! {
708 property: "Dash";
709 marker: DashProperty;
710 keyed_data_marker: DashV1Marker;
711 func:
712 /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
713 /// their compatibility equivalents
714 ///
715 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
716 ///
717 /// [📚 Help choosing a constructor](icu_provider::constructors)
718 ///
719 /// # Example
720 ///
721 /// ```
722 /// use icu::properties::sets;
723 ///
724 /// let dash = sets::dash();
725 ///
726 /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH
727 /// assert!(dash.contains('-')); // U+002D
728 /// assert!(!dash.contains('=')); // U+003D
729 /// ```
730
731 pub const fn dash() => SINGLETON_PROPS_DASH_V1;
732 pub fn load_dash();
733}
734
735make_code_point_set_property! {
736 property: "Deprecated";
737 marker: DeprecatedProperty;
738 keyed_data_marker: DeprecatedV1Marker;
739 func:
740 /// Deprecated characters. No characters will ever be removed from the standard, but the
741 /// usage of deprecated characters is strongly discouraged.
742 ///
743 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
744 ///
745 /// [📚 Help choosing a constructor](icu_provider::constructors)
746 ///
747 /// # Example
748 ///
749 /// ```
750 /// use icu::properties::sets;
751 ///
752 /// let deprecated = sets::deprecated();
753 ///
754 /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ
755 /// assert!(!deprecated.contains('A'));
756 /// ```
757
758 pub const fn deprecated() => SINGLETON_PROPS_DEP_V1;
759 pub fn load_deprecated();
760}
761
762make_code_point_set_property! {
763 property: "Default_Ignorable_Code_Point";
764 marker: DefaultIgnorableCodePointProperty;
765 keyed_data_marker: DefaultIgnorableCodePointV1Marker;
766 func:
767 /// For programmatic determination of default ignorable code points. New characters that
768 /// should be ignored in rendering (unless explicitly supported) will be assigned in these
769 /// ranges, permitting programs to correctly handle the default rendering of such
770 /// characters when not otherwise supported.
771 ///
772 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
773 ///
774 /// [📚 Help choosing a constructor](icu_provider::constructors)
775 ///
776 /// # Example
777 ///
778 /// ```
779 /// use icu::properties::sets;
780 ///
781 /// let default_ignorable_code_point = sets::default_ignorable_code_point();
782 ///
783 /// assert!(default_ignorable_code_point.contains32(0x180B)); // MONGOLIAN FREE VARIATION SELECTOR ONE
784 /// assert!(!default_ignorable_code_point.contains('E'));
785 /// ```
786
787 pub const fn default_ignorable_code_point() => SINGLETON_PROPS_DI_V1;
788 pub fn load_default_ignorable_code_point();
789}
790
791make_code_point_set_property! {
792 property: "Diacritic";
793 marker: DiacriticProperty;
794 keyed_data_marker: DiacriticV1Marker;
795 func:
796 /// Characters that linguistically modify the meaning of another character to which they apply
797 ///
798 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
799 ///
800 /// [📚 Help choosing a constructor](icu_provider::constructors)
801 ///
802 /// # Example
803 ///
804 /// ```
805 /// use icu::properties::sets;
806 ///
807 /// let diacritic = sets::diacritic();
808 ///
809 /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS
810 /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF
811 /// ```
812
813 pub const fn diacritic() => SINGLETON_PROPS_DIA_V1;
814 pub fn load_diacritic();
815}
816
817make_code_point_set_property! {
818 property: "Emoji_Modifier_Base";
819 marker: EmojiModifierBaseProperty;
820 keyed_data_marker: EmojiModifierBaseV1Marker;
821 func:
822 /// Characters that can serve as a base for emoji modifiers
823 ///
824 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
825 ///
826 /// [📚 Help choosing a constructor](icu_provider::constructors)
827 ///
828 /// # Example
829 ///
830 /// ```
831 /// use icu::properties::sets;
832 ///
833 /// let emoji_modifier_base = sets::emoji_modifier_base();
834 ///
835 /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST
836 /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN
837 /// ```
838
839 pub const fn emoji_modifier_base() => SINGLETON_PROPS_EBASE_V1;
840 pub fn load_emoji_modifier_base();
841}
842
843make_code_point_set_property! {
844 property: "Emoji_Component";
845 marker: EmojiComponentProperty;
846 keyed_data_marker: EmojiComponentV1Marker;
847 func:
848 /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
849 /// separate choices, such as base characters for emoji keycaps
850 ///
851 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
852 ///
853 /// [📚 Help choosing a constructor](icu_provider::constructors)
854 ///
855 /// # Example
856 ///
857 /// ```
858 /// use icu::properties::sets;
859 ///
860 /// let emoji_component = sets::emoji_component();
861 ///
862 /// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
863 /// assert!(emoji_component.contains32(0x20E3)); // COMBINING ENCLOSING KEYCAP
864 /// assert!(emoji_component.contains('7'));
865 /// assert!(!emoji_component.contains('T'));
866 /// ```
867
868 pub const fn emoji_component() => SINGLETON_PROPS_ECOMP_V1;
869 pub fn load_emoji_component();
870}
871
872make_code_point_set_property! {
873 property: "Emoji_Modifier";
874 marker: EmojiModifierProperty;
875 keyed_data_marker: EmojiModifierV1Marker;
876 func:
877 /// Characters that are emoji modifiers
878 ///
879 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
880 ///
881 /// [📚 Help choosing a constructor](icu_provider::constructors)
882 ///
883 /// # Example
884 ///
885 /// ```
886 /// use icu::properties::sets;
887 ///
888 /// let emoji_modifier = sets::emoji_modifier();
889 ///
890 /// assert!(emoji_modifier.contains32(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4
891 /// assert!(!emoji_modifier.contains32(0x200C)); // ZERO WIDTH NON-JOINER
892 /// ```
893
894 pub const fn emoji_modifier() => SINGLETON_PROPS_EMOD_V1;
895 pub fn load_emoji_modifier();
896}
897
898make_code_point_set_property! {
899 property: "Emoji";
900 marker: EmojiProperty;
901 keyed_data_marker: EmojiV1Marker;
902 func:
903 /// Characters that are emoji
904 ///
905 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
906 ///
907 /// [📚 Help choosing a constructor](icu_provider::constructors)
908 ///
909 /// # Example
910 ///
911 /// ```
912 /// use icu::properties::sets;
913 ///
914 /// let emoji = sets::emoji();
915 ///
916 /// assert!(emoji.contains('🔥')); // U+1F525 FIRE
917 /// assert!(!emoji.contains('V'));
918 /// ```
919
920 pub const fn emoji() => SINGLETON_PROPS_EMOJI_V1;
921 pub fn load_emoji();
922}
923
924make_code_point_set_property! {
925 property: "Emoji_Presentation";
926 marker: EmojiPresentationProperty;
927 keyed_data_marker: EmojiPresentationV1Marker;
928 func:
929 /// Characters that have emoji presentation by default
930 ///
931 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
932 ///
933 /// [📚 Help choosing a constructor](icu_provider::constructors)
934 ///
935 /// # Example
936 ///
937 /// ```
938 /// use icu::properties::sets;
939 ///
940 /// let emoji_presentation = sets::emoji_presentation();
941 ///
942 /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
943 /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
944 /// ```
945
946 pub const fn emoji_presentation() => SINGLETON_PROPS_EPRES_V1;
947 pub fn load_emoji_presentation();
948}
949
950make_code_point_set_property! {
951 property: "Extender";
952 marker: ExtenderProperty;
953 keyed_data_marker: ExtenderV1Marker;
954 func:
955 /// Characters whose principal function is to extend the value of a preceding alphabetic
956 /// character or to extend the shape of adjacent characters.
957 ///
958 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
959 ///
960 /// [📚 Help choosing a constructor](icu_provider::constructors)
961 ///
962 /// # Example
963 ///
964 /// ```
965 /// use icu::properties::sets;
966 ///
967 /// let extender = sets::extender();
968 ///
969 /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK
970 /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
971 /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT
972 /// ```
973
974 pub const fn extender() => SINGLETON_PROPS_EXT_V1;
975 pub fn load_extender();
976}
977
978make_code_point_set_property! {
979 property: "Extended_Pictographic";
980 marker: ExtendedPictographicProperty;
981 keyed_data_marker: ExtendedPictographicV1Marker;
982 func:
983 /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
984 /// emoji characters
985 ///
986 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
987 ///
988 /// [📚 Help choosing a constructor](icu_provider::constructors)
989 ///
990 /// # Example
991 ///
992 /// ```
993 /// use icu::properties::sets;
994 ///
995 /// let extended_pictographic = sets::extended_pictographic();
996 ///
997 /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
998 /// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
999 /// ```
1000
1001 pub const fn extended_pictographic() => SINGLETON_PROPS_EXTPICT_V1;
1002 pub fn load_extended_pictographic();
1003}
1004
1005make_code_point_set_property! {
1006 property: "Graph";
1007 marker: GraphProperty;
1008 keyed_data_marker: GraphV1Marker;
1009 func:
1010 /// Visible characters.
1011 /// This is defined for POSIX compatibility.
1012
1013 pub const fn graph() => SINGLETON_PROPS_GRAPH_V1;
1014 pub fn load_graph();
1015}
1016
1017make_code_point_set_property! {
1018 property: "Grapheme_Base";
1019 marker: GraphemeBaseProperty;
1020 keyed_data_marker: GraphemeBaseV1Marker;
1021 func:
1022 /// Property used together with the definition of Standard Korean Syllable Block to define
1023 /// "Grapheme base". See D58 in Chapter 3, Conformance in the Unicode Standard.
1024 ///
1025 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1026 ///
1027 /// [📚 Help choosing a constructor](icu_provider::constructors)
1028 ///
1029 /// # Example
1030 ///
1031 /// ```
1032 /// use icu::properties::sets;
1033 ///
1034 /// let grapheme_base = sets::grapheme_base();
1035 ///
1036 /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA
1037 /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
1038 /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
1039 /// ```
1040
1041 pub const fn grapheme_base() => SINGLETON_PROPS_GR_BASE_V1;
1042 pub fn load_grapheme_base();
1043}
1044
1045make_code_point_set_property! {
1046 property: "Grapheme_Extend";
1047 marker: GraphemeExtendProperty;
1048 keyed_data_marker: GraphemeExtendV1Marker;
1049 func:
1050 /// Property used to define "Grapheme extender". See D59 in Chapter 3, Conformance in the
1051 /// Unicode Standard.
1052 ///
1053 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1054 ///
1055 /// [📚 Help choosing a constructor](icu_provider::constructors)
1056 ///
1057 /// # Example
1058 ///
1059 /// ```
1060 /// use icu::properties::sets;
1061 ///
1062 /// let grapheme_extend = sets::grapheme_extend();
1063 ///
1064 /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA
1065 /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
1066 /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
1067 /// ```
1068
1069 pub const fn grapheme_extend() => SINGLETON_PROPS_GR_EXT_V1;
1070 pub fn load_grapheme_extend();
1071}
1072
1073make_code_point_set_property! {
1074 property: "Grapheme_Link";
1075 marker: GraphemeLinkProperty;
1076 keyed_data_marker: GraphemeLinkV1Marker;
1077 func:
1078 /// Deprecated property. Formerly proposed for programmatic determination of grapheme
1079 /// cluster boundaries.
1080
1081 pub const fn grapheme_link() => SINGLETON_PROPS_GR_LINK_V1;
1082 pub fn load_grapheme_link();
1083}
1084
1085make_code_point_set_property! {
1086 property: "Hex_Digit";
1087 marker: HexDigitProperty;
1088 keyed_data_marker: HexDigitV1Marker;
1089 func:
1090 /// Characters commonly used for the representation of hexadecimal numbers, plus their
1091 /// compatibility equivalents
1092 ///
1093 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1094 ///
1095 /// [📚 Help choosing a constructor](icu_provider::constructors)
1096 ///
1097 /// # Example
1098 ///
1099 /// ```
1100 /// use icu::properties::sets;
1101 ///
1102 /// let hex_digit = sets::hex_digit();
1103 ///
1104 /// assert!(hex_digit.contains('0'));
1105 /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
1106 /// assert!(hex_digit.contains('f'));
1107 /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F
1108 /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
1109 /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1110 /// ```
1111
1112 pub const fn hex_digit() => SINGLETON_PROPS_HEX_V1;
1113 pub fn load_hex_digit();
1114}
1115
1116make_code_point_set_property! {
1117 property: "Hyphen";
1118 marker: HyphenProperty;
1119 keyed_data_marker: HyphenV1Marker;
1120 func:
1121 /// Deprecated property. Dashes which are used to mark connections between pieces of
1122 /// words, plus the Katakana middle dot.
1123
1124 pub const fn hyphen() => SINGLETON_PROPS_HYPHEN_V1;
1125 pub fn load_hyphen();
1126}
1127
1128make_code_point_set_property! {
1129 property: "Id_Continue";
1130 marker: IdContinueProperty;
1131 keyed_data_marker: IdContinueV1Marker;
1132 func:
1133 /// Characters that can come after the first character in an identifier. If using NFKC to
1134 /// fold differences between characters, use [`load_xid_continue`] instead. See
1135 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
1136 /// more details.
1137 ///
1138 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1139 ///
1140 /// [📚 Help choosing a constructor](icu_provider::constructors)
1141 ///
1142 /// # Example
1143 ///
1144 /// ```
1145 /// use icu::properties::sets;
1146 ///
1147 /// let id_continue = sets::id_continue();
1148 ///
1149 /// assert!(id_continue.contains('x'));
1150 /// assert!(id_continue.contains('1'));
1151 /// assert!(id_continue.contains('_'));
1152 /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA
1153 /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
1154 /// assert!(id_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
1155 /// ```
1156
1157 pub const fn id_continue() => SINGLETON_PROPS_IDC_V1;
1158 pub fn load_id_continue();
1159}
1160
1161make_code_point_set_property! {
1162 property: "Ideographic";
1163 marker: IdeographicProperty;
1164 keyed_data_marker: IdeographicV1Marker;
1165 func:
1166 /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
1167 /// ideographs, or related siniform ideographs
1168 ///
1169 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1170 ///
1171 /// [📚 Help choosing a constructor](icu_provider::constructors)
1172 ///
1173 /// # Example
1174 ///
1175 /// ```
1176 /// use icu::properties::sets;
1177 ///
1178 /// let ideographic = sets::ideographic();
1179 ///
1180 /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
1181 /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB
1182 /// ```
1183
1184 pub const fn ideographic() => SINGLETON_PROPS_IDEO_V1;
1185 pub fn load_ideographic();
1186}
1187
1188make_code_point_set_property! {
1189 property: "Id_Start";
1190 marker: IdStartProperty;
1191 keyed_data_marker: IdStartV1Marker;
1192 func:
1193 /// Characters that can begin an identifier. If using NFKC to fold differences between
1194 /// characters, use [`load_xid_start`] instead. See [`Unicode Standard Annex
1195 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
1196 ///
1197 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1198 ///
1199 /// [📚 Help choosing a constructor](icu_provider::constructors)
1200 ///
1201 /// # Example
1202 ///
1203 /// ```
1204 /// use icu::properties::sets;
1205 ///
1206 /// let id_start = sets::id_start();
1207 ///
1208 /// assert!(id_start.contains('x'));
1209 /// assert!(!id_start.contains('1'));
1210 /// assert!(!id_start.contains('_'));
1211 /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA
1212 /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
1213 /// assert!(id_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
1214 /// ```
1215
1216 pub const fn id_start() => SINGLETON_PROPS_IDS_V1;
1217 pub fn load_id_start();
1218}
1219
1220make_code_point_set_property! {
1221 property: "Ids_Binary_Operator";
1222 marker: IdsBinaryOperatorProperty;
1223 keyed_data_marker: IdsBinaryOperatorV1Marker;
1224 func:
1225 /// Characters used in Ideographic Description Sequences
1226 ///
1227 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1228 ///
1229 /// [📚 Help choosing a constructor](icu_provider::constructors)
1230 ///
1231 /// # Example
1232 ///
1233 /// ```
1234 /// use icu::properties::sets;
1235 ///
1236 /// let ids_binary_operator = sets::ids_binary_operator();
1237 ///
1238 /// assert!(ids_binary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
1239 /// assert!(!ids_binary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK
1240 /// ```
1241
1242 pub const fn ids_binary_operator() => SINGLETON_PROPS_IDSB_V1;
1243 pub fn load_ids_binary_operator();
1244}
1245
1246make_code_point_set_property! {
1247 property: "Ids_Trinary_Operator";
1248 marker: IdsTrinaryOperatorProperty;
1249 keyed_data_marker: IdsTrinaryOperatorV1Marker;
1250 func:
1251 /// Characters used in Ideographic Description Sequences
1252 ///
1253 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1254 ///
1255 /// [📚 Help choosing a constructor](icu_provider::constructors)
1256 ///
1257 /// # Example
1258 ///
1259 /// ```
1260 /// use icu::properties::sets;
1261 ///
1262 /// let ids_trinary_operator = sets::ids_trinary_operator();
1263 ///
1264 /// assert!(ids_trinary_operator.contains32(0x2FF2)); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
1265 /// assert!(ids_trinary_operator.contains32(0x2FF3)); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
1266 /// assert!(!ids_trinary_operator.contains32(0x2FF4));
1267 /// assert!(!ids_trinary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
1268 /// assert!(!ids_trinary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK
1269 /// ```
1270
1271 pub const fn ids_trinary_operator() => SINGLETON_PROPS_IDST_V1;
1272 pub fn load_ids_trinary_operator();
1273}
1274
1275make_code_point_set_property! {
1276 property: "Join_Control";
1277 marker: JoinControlProperty;
1278 keyed_data_marker: JoinControlV1Marker;
1279 func:
1280 /// Format control characters which have specific functions for control of cursive joining
1281 /// and ligation
1282 ///
1283 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1284 ///
1285 /// [📚 Help choosing a constructor](icu_provider::constructors)
1286 ///
1287 /// # Example
1288 ///
1289 /// ```
1290 /// use icu::properties::sets;
1291 ///
1292 /// let join_control = sets::join_control();
1293 ///
1294 /// assert!(join_control.contains32(0x200C)); // ZERO WIDTH NON-JOINER
1295 /// assert!(join_control.contains32(0x200D)); // ZERO WIDTH JOINER
1296 /// assert!(!join_control.contains32(0x200E));
1297 /// ```
1298
1299 pub const fn join_control() => SINGLETON_PROPS_JOIN_C_V1;
1300 pub fn load_join_control();
1301}
1302
1303make_code_point_set_property! {
1304 property: "Logical_Order_Exception";
1305 marker: LogicalOrderExceptionProperty;
1306 keyed_data_marker: LogicalOrderExceptionV1Marker;
1307 func:
1308 /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao
1309 ///
1310 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1311 ///
1312 /// [📚 Help choosing a constructor](icu_provider::constructors)
1313 ///
1314 /// # Example
1315 ///
1316 /// ```
1317 /// use icu::properties::sets;
1318 ///
1319 /// let logical_order_exception = sets::logical_order_exception();
1320 ///
1321 /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI
1322 /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A
1323 /// ```
1324
1325 pub const fn logical_order_exception() => SINGLETON_PROPS_LOE_V1;
1326 pub fn load_logical_order_exception();
1327}
1328
1329make_code_point_set_property! {
1330 property: "Lowercase";
1331 marker: LowercaseProperty;
1332 keyed_data_marker: LowercaseV1Marker;
1333 func:
1334 /// Lowercase characters
1335 ///
1336 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1337 ///
1338 /// [📚 Help choosing a constructor](icu_provider::constructors)
1339 ///
1340 /// # Example
1341 ///
1342 /// ```
1343 /// use icu::properties::sets;
1344 ///
1345 /// let lowercase = sets::lowercase();
1346 ///
1347 /// assert!(lowercase.contains('a'));
1348 /// assert!(!lowercase.contains('A'));
1349 /// ```
1350
1351 pub const fn lowercase() => SINGLETON_PROPS_LOWER_V1;
1352 pub fn load_lowercase();
1353}
1354
1355make_code_point_set_property! {
1356 property: "Math";
1357 marker: MathProperty;
1358 keyed_data_marker: MathV1Marker;
1359 func:
1360 /// Characters used in mathematical notation
1361 ///
1362 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1363 ///
1364 /// [📚 Help choosing a constructor](icu_provider::constructors)
1365 ///
1366 /// # Example
1367 ///
1368 /// ```
1369 /// use icu::properties::sets;
1370 ///
1371 /// let math = sets::math();
1372 ///
1373 /// assert!(math.contains('='));
1374 /// assert!(math.contains('+'));
1375 /// assert!(!math.contains('-'));
1376 /// assert!(math.contains('−')); // U+2212 MINUS SIGN
1377 /// assert!(!math.contains('/'));
1378 /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH
1379 /// ```
1380
1381 pub const fn math() => SINGLETON_PROPS_MATH_V1;
1382 pub fn load_math();
1383}
1384
1385make_code_point_set_property! {
1386 property: "Noncharacter_Code_Point";
1387 marker: NoncharacterCodePointProperty;
1388 keyed_data_marker: NoncharacterCodePointV1Marker;
1389 func:
1390 /// Code points permanently reserved for internal use
1391 ///
1392 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1393 ///
1394 /// [📚 Help choosing a constructor](icu_provider::constructors)
1395 ///
1396 /// # Example
1397 ///
1398 /// ```
1399 /// use icu::properties::sets;
1400 ///
1401 /// let noncharacter_code_point = sets::noncharacter_code_point();
1402 ///
1403 /// assert!(noncharacter_code_point.contains32(0xFDD0));
1404 /// assert!(noncharacter_code_point.contains32(0xFFFF));
1405 /// assert!(!noncharacter_code_point.contains32(0x10000));
1406 /// ```
1407
1408 pub const fn noncharacter_code_point() => SINGLETON_PROPS_NCHAR_V1;
1409 pub fn load_noncharacter_code_point();
1410}
1411
1412make_code_point_set_property! {
1413 property: "NFC_Inert";
1414 marker: NfcInertProperty;
1415 keyed_data_marker: NfcInertV1Marker;
1416 func:
1417 /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters
1418
1419 pub const fn nfc_inert() => SINGLETON_PROPS_NFCINERT_V1;
1420 pub fn load_nfc_inert();
1421}
1422
1423make_code_point_set_property! {
1424 property: "NFD_Inert";
1425 marker: NfdInertProperty;
1426 keyed_data_marker: NfdInertV1Marker;
1427 func:
1428 /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters
1429
1430 pub const fn nfd_inert() => SINGLETON_PROPS_NFDINERT_V1;
1431 pub fn load_nfd_inert();
1432}
1433
1434make_code_point_set_property! {
1435 property: "NFKC_Inert";
1436 marker: NfkcInertProperty;
1437 keyed_data_marker: NfkcInertV1Marker;
1438 func:
1439 /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters
1440
1441 pub const fn nfkc_inert() => SINGLETON_PROPS_NFKCINERT_V1;
1442 pub fn load_nfkc_inert();
1443}
1444
1445make_code_point_set_property! {
1446 property: "NFKD_Inert";
1447 marker: NfkdInertProperty;
1448 keyed_data_marker: NfkdInertV1Marker;
1449 func:
1450 /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters
1451
1452 pub const fn nfkd_inert() => SINGLETON_PROPS_NFKDINERT_V1;
1453 pub fn load_nfkd_inert();
1454}
1455
1456make_code_point_set_property! {
1457 property: "Pattern_Syntax";
1458 marker: PatternSyntaxProperty;
1459 keyed_data_marker: PatternSyntaxV1Marker;
1460 func:
1461 /// Characters used as syntax in patterns (such as regular expressions). See [`Unicode
1462 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
1463 /// details.
1464 ///
1465 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1466 ///
1467 /// [📚 Help choosing a constructor](icu_provider::constructors)
1468 ///
1469 /// # Example
1470 ///
1471 /// ```
1472 /// use icu::properties::sets;
1473 ///
1474 /// let pattern_syntax = sets::pattern_syntax();
1475 ///
1476 /// assert!(pattern_syntax.contains('{'));
1477 /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW
1478 /// assert!(!pattern_syntax.contains('0'));
1479 /// ```
1480
1481 pub const fn pattern_syntax() => SINGLETON_PROPS_PAT_SYN_V1;
1482 pub fn load_pattern_syntax();
1483}
1484
1485make_code_point_set_property! {
1486 property: "Pattern_White_Space";
1487 marker: PatternWhiteSpaceProperty;
1488 keyed_data_marker: PatternWhiteSpaceV1Marker;
1489 func:
1490 /// Characters used as whitespace in patterns (such as regular expressions). See
1491 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
1492 /// more details.
1493 ///
1494 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1495 ///
1496 /// [📚 Help choosing a constructor](icu_provider::constructors)
1497 ///
1498 /// # Example
1499 ///
1500 /// ```
1501 /// use icu::properties::sets;
1502 ///
1503 /// let pattern_white_space = sets::pattern_white_space();
1504 ///
1505 /// assert!(pattern_white_space.contains(' '));
1506 /// assert!(pattern_white_space.contains32(0x2029)); // PARAGRAPH SEPARATOR
1507 /// assert!(pattern_white_space.contains32(0x000A)); // NEW LINE
1508 /// assert!(!pattern_white_space.contains32(0x00A0)); // NO-BREAK SPACE
1509 /// ```
1510
1511 pub const fn pattern_white_space() => SINGLETON_PROPS_PAT_WS_V1;
1512 pub fn load_pattern_white_space();
1513}
1514
1515make_code_point_set_property! {
1516 property: "Prepended_Concatenation_Mark";
1517 marker: PrependedConcatenationMarkProperty;
1518 keyed_data_marker: PrependedConcatenationMarkV1Marker;
1519 func:
1520 /// A small class of visible format controls, which precede and then span a sequence of
1521 /// other characters, usually digits.
1522
1523 pub const fn prepended_concatenation_mark() => SINGLETON_PROPS_PCM_V1;
1524 pub fn load_prepended_concatenation_mark();
1525}
1526
1527make_code_point_set_property! {
1528 property: "Print";
1529 marker: PrintProperty;
1530 keyed_data_marker: PrintV1Marker;
1531 func:
1532 /// Printable characters (visible characters and whitespace).
1533 /// This is defined for POSIX compatibility.
1534
1535 pub const fn print() => SINGLETON_PROPS_PRINT_V1;
1536 pub fn load_print();
1537}
1538
1539make_code_point_set_property! {
1540 property: "Quotation_Mark";
1541 marker: QuotationMarkProperty;
1542 keyed_data_marker: QuotationMarkV1Marker;
1543 func:
1544 /// Punctuation characters that function as quotation marks.
1545 ///
1546 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1547 ///
1548 /// [📚 Help choosing a constructor](icu_provider::constructors)
1549 ///
1550 /// # Example
1551 ///
1552 /// ```
1553 /// use icu::properties::sets;
1554 ///
1555 /// let quotation_mark = sets::quotation_mark();
1556 ///
1557 /// assert!(quotation_mark.contains('\''));
1558 /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK
1559 /// assert!(!quotation_mark.contains('<'));
1560 /// ```
1561
1562 pub const fn quotation_mark() => SINGLETON_PROPS_QMARK_V1;
1563 pub fn load_quotation_mark();
1564}
1565
1566make_code_point_set_property! {
1567 property: "Radical";
1568 marker: RadicalProperty;
1569 keyed_data_marker: RadicalV1Marker;
1570 func:
1571 /// Characters used in the definition of Ideographic Description Sequences
1572 ///
1573 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1574 ///
1575 /// [📚 Help choosing a constructor](icu_provider::constructors)
1576 ///
1577 /// # Example
1578 ///
1579 /// ```
1580 /// use icu::properties::sets;
1581 ///
1582 /// let radical = sets::radical();
1583 ///
1584 /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX
1585 /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
1586 /// ```
1587
1588 pub const fn radical() => SINGLETON_PROPS_RADICAL_V1;
1589 pub fn load_radical();
1590}
1591
1592make_code_point_set_property! {
1593 property: "Regional_Indicator";
1594 marker: RegionalIndicatorProperty;
1595 keyed_data_marker: RegionalIndicatorV1Marker;
1596 func:
1597 /// Regional indicator characters, U+1F1E6..U+1F1FF
1598 ///
1599 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1600 ///
1601 /// [📚 Help choosing a constructor](icu_provider::constructors)
1602 ///
1603 /// # Example
1604 ///
1605 /// ```
1606 /// use icu::properties::sets;
1607 ///
1608 /// let regional_indicator = sets::regional_indicator();
1609 ///
1610 /// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
1611 /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T
1612 /// assert!(!regional_indicator.contains('T'));
1613 /// ```
1614
1615 pub const fn regional_indicator() => SINGLETON_PROPS_RI_V1;
1616 pub fn load_regional_indicator();
1617}
1618
1619make_code_point_set_property! {
1620 property: "Soft_Dotted";
1621 marker: SoftDottedProperty;
1622 keyed_data_marker: SoftDottedV1Marker;
1623 func:
1624 /// Characters with a "soft dot", like i or j. An accent placed on these characters causes
1625 /// the dot to disappear.
1626 ///
1627 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1628 ///
1629 /// [📚 Help choosing a constructor](icu_provider::constructors)
1630 ///
1631 /// # Example
1632 ///
1633 /// ```
1634 /// use icu::properties::sets;
1635 ///
1636 /// let soft_dotted = sets::soft_dotted();
1637 ///
1638 /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
1639 /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I
1640 /// ```
1641
1642 pub const fn soft_dotted() => SINGLETON_PROPS_SD_V1;
1643 pub fn load_soft_dotted();
1644}
1645
1646make_code_point_set_property! {
1647 property: "Segment_Starter";
1648 marker: SegmentStarterProperty;
1649 keyed_data_marker: SegmentStarterV1Marker;
1650 func:
1651 /// Characters that are starters in terms of Unicode normalization and combining character
1652 /// sequences
1653
1654 pub const fn segment_starter() => SINGLETON_PROPS_SEGSTART_V1;
1655 pub fn load_segment_starter();
1656}
1657
1658make_code_point_set_property! {
1659 property: "Case_Sensitive";
1660 marker: CaseSensitiveProperty;
1661 keyed_data_marker: CaseSensitiveV1Marker;
1662 func:
1663 /// Characters that are either the source of a case mapping or in the target of a case
1664 /// mapping
1665
1666 pub const fn case_sensitive() => SINGLETON_PROPS_SENSITIVE_V1;
1667 pub fn load_case_sensitive();
1668}
1669
1670make_code_point_set_property! {
1671 property: "Sentence_Terminal";
1672 marker: SentenceTerminalProperty;
1673 keyed_data_marker: SentenceTerminalV1Marker;
1674 func:
1675 /// Punctuation characters that generally mark the end of sentences
1676 ///
1677 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1678 ///
1679 /// [📚 Help choosing a constructor](icu_provider::constructors)
1680 ///
1681 /// # Example
1682 ///
1683 /// ```
1684 /// use icu::properties::sets;
1685 ///
1686 /// let sentence_terminal = sets::sentence_terminal();
1687 ///
1688 /// assert!(sentence_terminal.contains('.'));
1689 /// assert!(sentence_terminal.contains('?'));
1690 /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
1691 /// assert!(!sentence_terminal.contains(','));
1692 /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK
1693 /// ```
1694
1695 pub const fn sentence_terminal() => SINGLETON_PROPS_STERM_V1;
1696 pub fn load_sentence_terminal();
1697}
1698
1699make_code_point_set_property! {
1700 property: "Terminal_Punctuation";
1701 marker: TerminalPunctuationProperty;
1702 keyed_data_marker: TerminalPunctuationV1Marker;
1703 func:
1704 /// Punctuation characters that generally mark the end of textual units
1705 ///
1706 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1707 ///
1708 /// [📚 Help choosing a constructor](icu_provider::constructors)
1709 ///
1710 /// # Example
1711 ///
1712 /// ```
1713 /// use icu::properties::sets;
1714 ///
1715 /// let terminal_punctuation = sets::terminal_punctuation();
1716 ///
1717 /// assert!(terminal_punctuation.contains('.'));
1718 /// assert!(terminal_punctuation.contains('?'));
1719 /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
1720 /// assert!(terminal_punctuation.contains(','));
1721 /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK
1722 /// ```
1723
1724 pub const fn terminal_punctuation() => SINGLETON_PROPS_TERM_V1;
1725 pub fn load_terminal_punctuation();
1726}
1727
1728make_code_point_set_property! {
1729 property: "Unified_Ideograph";
1730 marker: UnifiedIdeographProperty;
1731 keyed_data_marker: UnifiedIdeographV1Marker;
1732 func:
1733 /// A property which specifies the exact set of Unified CJK Ideographs in the standard
1734 ///
1735 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1736 ///
1737 /// [📚 Help choosing a constructor](icu_provider::constructors)
1738 ///
1739 /// # Example
1740 ///
1741 /// ```
1742 /// use icu::properties::sets;
1743 ///
1744 /// let unified_ideograph = sets::unified_ideograph();
1745 ///
1746 /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
1747 /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728
1748 /// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178
1749 /// ```
1750
1751 pub const fn unified_ideograph() => SINGLETON_PROPS_UIDEO_V1;
1752 pub fn load_unified_ideograph();
1753}
1754
1755make_code_point_set_property! {
1756 property: "Uppercase";
1757 marker: UppercaseProperty;
1758 keyed_data_marker: UppercaseV1Marker;
1759 func:
1760 /// Uppercase characters
1761 ///
1762 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1763 ///
1764 /// [📚 Help choosing a constructor](icu_provider::constructors)
1765 ///
1766 /// # Example
1767 ///
1768 /// ```
1769 /// use icu::properties::sets;
1770 ///
1771 /// let uppercase = sets::uppercase();
1772 ///
1773 /// assert!(uppercase.contains('U'));
1774 /// assert!(!uppercase.contains('u'));
1775 /// ```
1776
1777 pub const fn uppercase() => SINGLETON_PROPS_UPPER_V1;
1778 pub fn load_uppercase();
1779}
1780
1781make_code_point_set_property! {
1782 property: "Variation_Selector";
1783 marker: VariationSelectorProperty;
1784 keyed_data_marker: VariationSelectorV1Marker;
1785 func:
1786 /// Characters that are Variation Selectors.
1787 ///
1788 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1789 ///
1790 /// [📚 Help choosing a constructor](icu_provider::constructors)
1791 ///
1792 /// # Example
1793 ///
1794 /// ```
1795 /// use icu::properties::sets;
1796 ///
1797 /// let variation_selector = sets::variation_selector();
1798 ///
1799 /// assert!(variation_selector.contains32(0x180D)); // MONGOLIAN FREE VARIATION SELECTOR THREE
1800 /// assert!(!variation_selector.contains32(0x303E)); // IDEOGRAPHIC VARIATION INDICATOR
1801 /// assert!(variation_selector.contains32(0xFE0F)); // VARIATION SELECTOR-16
1802 /// assert!(!variation_selector.contains32(0xFE10)); // PRESENTATION FORM FOR VERTICAL COMMA
1803 /// assert!(variation_selector.contains32(0xE01EF)); // VARIATION SELECTOR-256
1804 /// ```
1805
1806 pub const fn variation_selector() => SINGLETON_PROPS_VS_V1;
1807 pub fn load_variation_selector();
1808}
1809
1810make_code_point_set_property! {
1811 property: "White_Space";
1812 marker: WhiteSpaceProperty;
1813 keyed_data_marker: WhiteSpaceV1Marker;
1814 func:
1815 /// Spaces, separator characters and other control characters which should be treated by
1816 /// programming languages as "white space" for the purpose of parsing elements
1817 ///
1818 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1819 ///
1820 /// [📚 Help choosing a constructor](icu_provider::constructors)
1821 ///
1822 /// # Example
1823 ///
1824 /// ```
1825 /// use icu::properties::sets;
1826 ///
1827 /// let white_space = sets::white_space();
1828 ///
1829 /// assert!(white_space.contains(' '));
1830 /// assert!(white_space.contains32(0x000A)); // NEW LINE
1831 /// assert!(white_space.contains32(0x00A0)); // NO-BREAK SPACE
1832 /// assert!(!white_space.contains32(0x200B)); // ZERO WIDTH SPACE
1833 /// ```
1834
1835 pub const fn white_space() => SINGLETON_PROPS_WSPACE_V1;
1836 pub fn load_white_space();
1837}
1838
1839make_code_point_set_property! {
1840 property: "Xdigit";
1841 marker: XdigitProperty;
1842 keyed_data_marker: XdigitV1Marker;
1843 func:
1844 /// Hexadecimal digits
1845 /// This is defined for POSIX compatibility.
1846
1847 pub const fn xdigit() => SINGLETON_PROPS_XDIGIT_V1;
1848 pub fn load_xdigit();
1849}
1850
1851make_code_point_set_property! {
1852 property: "XID_Continue";
1853 marker: XidContinueProperty;
1854 keyed_data_marker: XidContinueV1Marker;
1855 func:
1856 /// Characters that can come after the first character in an identifier. See [`Unicode Standard Annex
1857 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
1858 ///
1859 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1860 ///
1861 /// [📚 Help choosing a constructor](icu_provider::constructors)
1862 ///
1863 /// # Example
1864 ///
1865 /// ```
1866 /// use icu::properties::sets;
1867 ///
1868 /// let xid_continue = sets::xid_continue();
1869 ///
1870 /// assert!(xid_continue.contains('x'));
1871 /// assert!(xid_continue.contains('1'));
1872 /// assert!(xid_continue.contains('_'));
1873 /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA
1874 /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
1875 /// assert!(!xid_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
1876 /// ```
1877
1878 pub const fn xid_continue() => SINGLETON_PROPS_XIDC_V1;
1879 pub fn load_xid_continue();
1880}
1881
1882make_code_point_set_property! {
1883 property: "XID_Start";
1884 marker: XidStartProperty;
1885 keyed_data_marker: XidStartV1Marker;
1886 func:
1887 /// Characters that can begin an identifier. See [`Unicode
1888 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
1889 /// details.
1890 ///
1891 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1892 ///
1893 /// [📚 Help choosing a constructor](icu_provider::constructors)
1894 ///
1895 /// # Example
1896 ///
1897 /// ```
1898 /// use icu::properties::sets;
1899 ///
1900 /// let xid_start = sets::xid_start();
1901 ///
1902 /// assert!(xid_start.contains('x'));
1903 /// assert!(!xid_start.contains('1'));
1904 /// assert!(!xid_start.contains('_'));
1905 /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA
1906 /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
1907 /// assert!(!xid_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
1908 /// ```
1909
1910 pub const fn xid_start() => SINGLETON_PROPS_XIDS_V1;
1911 pub fn load_xid_start();
1912}
1913
1914//
1915// Binary property getter fns
1916// (data as sets of strings + code points)
1917//
1918
1919macro_rules! make_unicode_set_property {
1920 (
1921 // currently unused
1922 property: $property:expr;
1923 // currently unused
1924 marker: $marker_name:ident;
1925 keyed_data_marker: $keyed_data_marker:ty;
1926 func:
1927 $(#[$doc:meta])+
1928 $cvis:vis const fn $constname:ident() => $singleton:ident;
1929 $vis:vis fn $funcname:ident();
1930 ) => {
1931 #[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")]
1932 $vis fn $funcname(
1933 provider: &(impl DataProvider<$keyed_data_marker> + ?Sized)
1934 ) -> Result<UnicodeSetData, PropertiesError> {
1935 Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map(UnicodeSetData::from_data)?)
1936 }
1937 $(#[$doc])*
1938 #[cfg(feature = "compiled_data")]
1939 $cvis const fn $constname() -> UnicodeSetDataBorrowed<'static> {
1940 UnicodeSetDataBorrowed {
1941 set: crate::provider::Baked::$singleton
1942 }
1943 }
1944 }
1945}
1946
1947make_unicode_set_property! {
1948 property: "Basic_Emoji";
1949 marker: BasicEmojiProperty;
1950 keyed_data_marker: BasicEmojiV1Marker;
1951 func:
1952 /// Characters and character sequences intended for general-purpose, independent, direct input.
1953 /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
1954 /// details.
1955 ///
1956 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1957 ///
1958 /// [📚 Help choosing a constructor](icu_provider::constructors)
1959 ///
1960 /// # Example
1961 ///
1962 /// ```
1963 /// use icu::properties::sets;
1964 ///
1965 /// let basic_emoji = sets::basic_emoji();
1966 ///
1967 /// assert!(!basic_emoji.contains32(0x0020));
1968 /// assert!(!basic_emoji.contains_char('\n'));
1969 /// assert!(basic_emoji.contains_char('🦃')); // U+1F983 TURKEY
1970 /// assert!(basic_emoji.contains("\u{1F983}"));
1971 /// assert!(basic_emoji.contains("\u{1F6E4}\u{FE0F}")); // railway track
1972 /// assert!(!basic_emoji.contains("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3
1973 /// ```
1974 pub const fn basic_emoji() => SINGLETON_PROPS_BASIC_EMOJI_V1;
1975 pub fn load_basic_emoji();
1976}
1977
1978//
1979// Enumerated property getter fns
1980//
1981
1982/// A version of [`for_general_category_group()`] that uses custom data provided by a [`DataProvider`].
1983///
1984/// [📚 Help choosing a constructor](icu_provider::constructors)
1985pub fn load_for_general_category_group(
1986 provider: &(impl DataProvider<GeneralCategoryV1Marker> + ?Sized),
1987 enum_val: GeneralCategoryGroup,
1988) -> Result<CodePointSetData, PropertiesError> {
1989 let gc_map_payload = maps::load_general_category(provider)?;
1990 let gc_map = gc_map_payload.as_borrowed();
1991 let matching_gc_ranges = gc_map
1992 .iter_ranges()
1993 .filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0)
1994 .map(|cpm_range| cpm_range.range);
1995 let set = CodePointInversionList::from_iter(matching_gc_ranges);
1996 Ok(CodePointSetData::from_code_point_inversion_list(set))
1997}
1998
1999/// Return a [`CodePointSetData`] for a value or a grouping of values of the General_Category property. See [`GeneralCategoryGroup`].
2000///
2001/// ✨ *Enabled with the `compiled_data` Cargo feature.*
2002///
2003/// [📚 Help choosing a constructor](icu_provider::constructors)
2004#[cfg(feature = "compiled_data")]
2005pub fn for_general_category_group(enum_val: GeneralCategoryGroup) -> CodePointSetData {
2006 let matching_gc_ranges = maps::general_category()
2007 .iter_ranges()
2008 .filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0)
2009 .map(|cpm_range| cpm_range.range);
2010 let set = CodePointInversionList::from_iter(matching_gc_ranges);
2011 CodePointSetData::from_code_point_inversion_list(set)
2012}
2013
2014/// Returns a type capable of looking up values for a property specified as a string, as long as it is a
2015/// [binary property listed in ECMA-262][ecma], using strict matching on the names in the spec.
2016///
2017/// This handles every property required by ECMA-262 `/u` regular expressions, except for:
2018///
2019/// - `Script` and `General_Category`: handle these directly with [`maps::load_general_category()`] and
2020/// [`maps::load_script()`].
2021/// using property values parsed via [`GeneralCategory::get_name_to_enum_mapper()`] and [`Script::get_name_to_enum_mapper()`]
2022/// if necessary.
2023/// - `Script_Extensions`: handle this directly using APIs from [`crate::script`], like [`script::load_script_with_extensions_unstable()`]
2024/// - `General_Category` mask values: Handle this alongside `General_Category` using [`GeneralCategoryGroup`],
2025/// using property values parsed via [`GeneralCategoryGroup::get_name_to_enum_mapper()`] if necessary
2026/// - `Assigned`, `All`, and `ASCII` pseudoproperties: Handle these using their equivalent sets:
2027/// - `Any` can be expressed as the range `[\u{0}-\u{10FFFF}]`
2028/// - `Assigned` can be expressed as the inverse of the set `gc=Cn` (i.e., `\P{gc=Cn}`).
2029/// - `ASCII` can be expressed as the range `[\u{0}-\u{7F}]`
2030/// - `General_Category` property values can themselves be treated like properties using a shorthand in ECMA262,
2031/// simply create the corresponding `GeneralCategory` set.
2032///
2033/// ✨ *Enabled with the `compiled_data` Cargo feature.*
2034///
2035/// [📚 Help choosing a constructor](icu_provider::constructors)
2036///
2037/// ```
2038/// use icu::properties::sets;
2039///
2040/// let emoji = sets::load_for_ecma262("Emoji").expect("loading data failed");
2041///
2042/// assert!(emoji.contains('🔥')); // U+1F525 FIRE
2043/// assert!(!emoji.contains('V'));
2044/// ```
2045///
2046/// [ecma]: https://tc39.es/ecma262/#table-binary-unicode-properties
2047#[cfg(feature = "compiled_data")]
2048pub fn load_for_ecma262(name: &str) -> Result<CodePointSetDataBorrowed<'static>, PropertiesError> {
2049 use crate::runtime::UnicodeProperty;
2050
2051 let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) {
2052 prop
2053 } else {
2054 return Err(PropertiesError::UnexpectedPropertyName);
2055 };
2056 Ok(match prop {
2057 UnicodeProperty::AsciiHexDigit => ascii_hex_digit(),
2058 UnicodeProperty::Alphabetic => alphabetic(),
2059 UnicodeProperty::BidiControl => bidi_control(),
2060 UnicodeProperty::BidiMirrored => bidi_mirrored(),
2061 UnicodeProperty::CaseIgnorable => case_ignorable(),
2062 UnicodeProperty::Cased => cased(),
2063 UnicodeProperty::ChangesWhenCasefolded => changes_when_casefolded(),
2064 UnicodeProperty::ChangesWhenCasemapped => changes_when_casemapped(),
2065 UnicodeProperty::ChangesWhenLowercased => changes_when_lowercased(),
2066 UnicodeProperty::ChangesWhenNfkcCasefolded => changes_when_nfkc_casefolded(),
2067 UnicodeProperty::ChangesWhenTitlecased => changes_when_titlecased(),
2068 UnicodeProperty::ChangesWhenUppercased => changes_when_uppercased(),
2069 UnicodeProperty::Dash => dash(),
2070 UnicodeProperty::DefaultIgnorableCodePoint => default_ignorable_code_point(),
2071 UnicodeProperty::Deprecated => deprecated(),
2072 UnicodeProperty::Diacritic => diacritic(),
2073 UnicodeProperty::Emoji => emoji(),
2074 UnicodeProperty::EmojiComponent => emoji_component(),
2075 UnicodeProperty::EmojiModifier => emoji_modifier(),
2076 UnicodeProperty::EmojiModifierBase => emoji_modifier_base(),
2077 UnicodeProperty::EmojiPresentation => emoji_presentation(),
2078 UnicodeProperty::ExtendedPictographic => extended_pictographic(),
2079 UnicodeProperty::Extender => extender(),
2080 UnicodeProperty::GraphemeBase => grapheme_base(),
2081 UnicodeProperty::GraphemeExtend => grapheme_extend(),
2082 UnicodeProperty::HexDigit => hex_digit(),
2083 UnicodeProperty::IdsBinaryOperator => ids_binary_operator(),
2084 UnicodeProperty::IdsTrinaryOperator => ids_trinary_operator(),
2085 UnicodeProperty::IdContinue => id_continue(),
2086 UnicodeProperty::IdStart => id_start(),
2087 UnicodeProperty::Ideographic => ideographic(),
2088 UnicodeProperty::JoinControl => join_control(),
2089 UnicodeProperty::LogicalOrderException => logical_order_exception(),
2090 UnicodeProperty::Lowercase => lowercase(),
2091 UnicodeProperty::Math => math(),
2092 UnicodeProperty::NoncharacterCodePoint => noncharacter_code_point(),
2093 UnicodeProperty::PatternSyntax => pattern_syntax(),
2094 UnicodeProperty::PatternWhiteSpace => pattern_white_space(),
2095 UnicodeProperty::QuotationMark => quotation_mark(),
2096 UnicodeProperty::Radical => radical(),
2097 UnicodeProperty::RegionalIndicator => regional_indicator(),
2098 UnicodeProperty::SentenceTerminal => sentence_terminal(),
2099 UnicodeProperty::SoftDotted => soft_dotted(),
2100 UnicodeProperty::TerminalPunctuation => terminal_punctuation(),
2101 UnicodeProperty::UnifiedIdeograph => unified_ideograph(),
2102 UnicodeProperty::Uppercase => uppercase(),
2103 UnicodeProperty::VariationSelector => variation_selector(),
2104 UnicodeProperty::WhiteSpace => white_space(),
2105 UnicodeProperty::XidContinue => xid_continue(),
2106 UnicodeProperty::XidStart => xid_start(),
2107 _ => return Err(PropertiesError::UnexpectedPropertyName),
2108 })
2109}
2110
2111icu_provider::gen_any_buffer_data_constructors!(
2112 locale: skip,
2113 name: &str,
2114 result: Result<CodePointSetData, PropertiesError>,
2115 #[cfg(skip)]
2116 functions: [
2117 load_for_ecma262,
2118 load_for_ecma262_with_any_provider,
2119 load_for_ecma262_with_buffer_provider,
2120 load_for_ecma262_unstable,
2121 ]
2122);
2123
2124#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, load_for_ecma262)]
2125pub fn load_for_ecma262_unstable<P>(
2126 provider: &P,
2127 name: &str,
2128) -> Result<CodePointSetData, PropertiesError>
2129where
2130 P: ?Sized
2131 + DataProvider<AsciiHexDigitV1Marker>
2132 + DataProvider<AlphabeticV1Marker>
2133 + DataProvider<BidiControlV1Marker>
2134 + DataProvider<BidiMirroredV1Marker>
2135 + DataProvider<CaseIgnorableV1Marker>
2136 + DataProvider<CasedV1Marker>
2137 + DataProvider<ChangesWhenCasefoldedV1Marker>
2138 + DataProvider<ChangesWhenCasemappedV1Marker>
2139 + DataProvider<ChangesWhenLowercasedV1Marker>
2140 + DataProvider<ChangesWhenNfkcCasefoldedV1Marker>
2141 + DataProvider<ChangesWhenTitlecasedV1Marker>
2142 + DataProvider<ChangesWhenUppercasedV1Marker>
2143 + DataProvider<DashV1Marker>
2144 + DataProvider<DefaultIgnorableCodePointV1Marker>
2145 + DataProvider<DeprecatedV1Marker>
2146 + DataProvider<DiacriticV1Marker>
2147 + DataProvider<EmojiV1Marker>
2148 + DataProvider<EmojiComponentV1Marker>
2149 + DataProvider<EmojiModifierV1Marker>
2150 + DataProvider<EmojiModifierBaseV1Marker>
2151 + DataProvider<EmojiPresentationV1Marker>
2152 + DataProvider<ExtendedPictographicV1Marker>
2153 + DataProvider<ExtenderV1Marker>
2154 + DataProvider<GraphemeBaseV1Marker>
2155 + DataProvider<GraphemeExtendV1Marker>
2156 + DataProvider<HexDigitV1Marker>
2157 + DataProvider<IdsBinaryOperatorV1Marker>
2158 + DataProvider<IdsTrinaryOperatorV1Marker>
2159 + DataProvider<IdContinueV1Marker>
2160 + DataProvider<IdStartV1Marker>
2161 + DataProvider<IdeographicV1Marker>
2162 + DataProvider<JoinControlV1Marker>
2163 + DataProvider<LogicalOrderExceptionV1Marker>
2164 + DataProvider<LowercaseV1Marker>
2165 + DataProvider<MathV1Marker>
2166 + DataProvider<NoncharacterCodePointV1Marker>
2167 + DataProvider<PatternSyntaxV1Marker>
2168 + DataProvider<PatternWhiteSpaceV1Marker>
2169 + DataProvider<QuotationMarkV1Marker>
2170 + DataProvider<RadicalV1Marker>
2171 + DataProvider<RegionalIndicatorV1Marker>
2172 + DataProvider<SentenceTerminalV1Marker>
2173 + DataProvider<SoftDottedV1Marker>
2174 + DataProvider<TerminalPunctuationV1Marker>
2175 + DataProvider<UnifiedIdeographV1Marker>
2176 + DataProvider<UppercaseV1Marker>
2177 + DataProvider<VariationSelectorV1Marker>
2178 + DataProvider<WhiteSpaceV1Marker>
2179 + DataProvider<XidContinueV1Marker>
2180 + DataProvider<XidStartV1Marker>,
2181{
2182 use crate::runtime::UnicodeProperty;
2183
2184 let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) {
2185 prop
2186 } else {
2187 return Err(PropertiesError::UnexpectedPropertyName);
2188 };
2189 match prop {
2190 UnicodeProperty::AsciiHexDigit => load_ascii_hex_digit(provider),
2191 UnicodeProperty::Alphabetic => load_alphabetic(provider),
2192 UnicodeProperty::BidiControl => load_bidi_control(provider),
2193 UnicodeProperty::BidiMirrored => load_bidi_mirrored(provider),
2194 UnicodeProperty::CaseIgnorable => load_case_ignorable(provider),
2195 UnicodeProperty::Cased => load_cased(provider),
2196 UnicodeProperty::ChangesWhenCasefolded => load_changes_when_casefolded(provider),
2197 UnicodeProperty::ChangesWhenCasemapped => load_changes_when_casemapped(provider),
2198 UnicodeProperty::ChangesWhenLowercased => load_changes_when_lowercased(provider),
2199 UnicodeProperty::ChangesWhenNfkcCasefolded => load_changes_when_nfkc_casefolded(provider),
2200 UnicodeProperty::ChangesWhenTitlecased => load_changes_when_titlecased(provider),
2201 UnicodeProperty::ChangesWhenUppercased => load_changes_when_uppercased(provider),
2202 UnicodeProperty::Dash => load_dash(provider),
2203 UnicodeProperty::DefaultIgnorableCodePoint => load_default_ignorable_code_point(provider),
2204 UnicodeProperty::Deprecated => load_deprecated(provider),
2205 UnicodeProperty::Diacritic => load_diacritic(provider),
2206 UnicodeProperty::Emoji => load_emoji(provider),
2207 UnicodeProperty::EmojiComponent => load_emoji_component(provider),
2208 UnicodeProperty::EmojiModifier => load_emoji_modifier(provider),
2209 UnicodeProperty::EmojiModifierBase => load_emoji_modifier_base(provider),
2210 UnicodeProperty::EmojiPresentation => load_emoji_presentation(provider),
2211 UnicodeProperty::ExtendedPictographic => load_extended_pictographic(provider),
2212 UnicodeProperty::Extender => load_extender(provider),
2213 UnicodeProperty::GraphemeBase => load_grapheme_base(provider),
2214 UnicodeProperty::GraphemeExtend => load_grapheme_extend(provider),
2215 UnicodeProperty::HexDigit => load_hex_digit(provider),
2216 UnicodeProperty::IdsBinaryOperator => load_ids_binary_operator(provider),
2217 UnicodeProperty::IdsTrinaryOperator => load_ids_trinary_operator(provider),
2218 UnicodeProperty::IdContinue => load_id_continue(provider),
2219 UnicodeProperty::IdStart => load_id_start(provider),
2220 UnicodeProperty::Ideographic => load_ideographic(provider),
2221 UnicodeProperty::JoinControl => load_join_control(provider),
2222 UnicodeProperty::LogicalOrderException => load_logical_order_exception(provider),
2223 UnicodeProperty::Lowercase => load_lowercase(provider),
2224 UnicodeProperty::Math => load_math(provider),
2225 UnicodeProperty::NoncharacterCodePoint => load_noncharacter_code_point(provider),
2226 UnicodeProperty::PatternSyntax => load_pattern_syntax(provider),
2227 UnicodeProperty::PatternWhiteSpace => load_pattern_white_space(provider),
2228 UnicodeProperty::QuotationMark => load_quotation_mark(provider),
2229 UnicodeProperty::Radical => load_radical(provider),
2230 UnicodeProperty::RegionalIndicator => load_regional_indicator(provider),
2231 UnicodeProperty::SentenceTerminal => load_sentence_terminal(provider),
2232 UnicodeProperty::SoftDotted => load_soft_dotted(provider),
2233 UnicodeProperty::TerminalPunctuation => load_terminal_punctuation(provider),
2234 UnicodeProperty::UnifiedIdeograph => load_unified_ideograph(provider),
2235 UnicodeProperty::Uppercase => load_uppercase(provider),
2236 UnicodeProperty::VariationSelector => load_variation_selector(provider),
2237 UnicodeProperty::WhiteSpace => load_white_space(provider),
2238 UnicodeProperty::XidContinue => load_xid_continue(provider),
2239 UnicodeProperty::XidStart => load_xid_start(provider),
2240 _ => Err(PropertiesError::UnexpectedPropertyName),
2241 }
2242}
2243
2244#[cfg(test)]
2245mod tests {
2246
2247 #[test]
2248 fn test_general_category() {
2249 use icu::properties::sets;
2250 use icu::properties::GeneralCategoryGroup;
2251
2252 let digits_data = sets::for_general_category_group(GeneralCategoryGroup::Number);
2253 let digits = digits_data.as_borrowed();
2254
2255 assert!(digits.contains('5'));
2256 assert!(digits.contains('\u{0665}')); // U+0665 ARABIC-INDIC DIGIT FIVE
2257 assert!(digits.contains('\u{096b}')); // U+0969 DEVANAGARI DIGIT FIVE
2258
2259 assert!(!digits.contains('A'));
2260 }
2261
2262 #[test]
2263 fn test_script() {
2264 use icu::properties::maps;
2265 use icu::properties::Script;
2266
2267 let thai_data = maps::script().get_set_for_value(Script::Thai);
2268 let thai = thai_data.as_borrowed();
2269
2270 assert!(thai.contains('\u{0e01}')); // U+0E01 THAI CHARACTER KO KAI
2271 assert!(thai.contains('\u{0e50}')); // U+0E50 THAI DIGIT ZERO
2272
2273 assert!(!thai.contains('A'));
2274 assert!(!thai.contains('\u{0e3f}')); // U+0E50 THAI CURRENCY SYMBOL BAHT
2275 }
2276
2277 #[test]
2278 fn test_gc_groupings() {
2279 use icu::properties::{maps, sets};
2280 use icu::properties::{GeneralCategory, GeneralCategoryGroup};
2281 use icu_collections::codepointinvlist::CodePointInversionListBuilder;
2282
2283 let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| {
2284 let category_set = sets::for_general_category_group(category);
2285 let category_set = category_set
2286 .as_code_point_inversion_list()
2287 .expect("The data should be valid");
2288
2289 let mut builder = CodePointInversionListBuilder::new();
2290 for subcategory in subcategories {
2291 let gc_set_data = &maps::general_category().get_set_for_value(*subcategory);
2292 let gc_set = gc_set_data.as_borrowed();
2293 for range in gc_set.iter_ranges() {
2294 builder.add_range32(&range);
2295 }
2296 }
2297 let combined_set = builder.build();
2298 println!("{category:?} {subcategories:?}");
2299 assert_eq!(
2300 category_set.get_inversion_list_vec(),
2301 combined_set.get_inversion_list_vec()
2302 );
2303 };
2304
2305 test_group(
2306 GeneralCategoryGroup::Letter,
2307 &[
2308 GeneralCategory::UppercaseLetter,
2309 GeneralCategory::LowercaseLetter,
2310 GeneralCategory::TitlecaseLetter,
2311 GeneralCategory::ModifierLetter,
2312 GeneralCategory::OtherLetter,
2313 ],
2314 );
2315 test_group(
2316 GeneralCategoryGroup::Other,
2317 &[
2318 GeneralCategory::Control,
2319 GeneralCategory::Format,
2320 GeneralCategory::Unassigned,
2321 GeneralCategory::PrivateUse,
2322 GeneralCategory::Surrogate,
2323 ],
2324 );
2325 test_group(
2326 GeneralCategoryGroup::Mark,
2327 &[
2328 GeneralCategory::SpacingMark,
2329 GeneralCategory::EnclosingMark,
2330 GeneralCategory::NonspacingMark,
2331 ],
2332 );
2333 test_group(
2334 GeneralCategoryGroup::Number,
2335 &[
2336 GeneralCategory::DecimalNumber,
2337 GeneralCategory::LetterNumber,
2338 GeneralCategory::OtherNumber,
2339 ],
2340 );
2341 test_group(
2342 GeneralCategoryGroup::Punctuation,
2343 &[
2344 GeneralCategory::ConnectorPunctuation,
2345 GeneralCategory::DashPunctuation,
2346 GeneralCategory::ClosePunctuation,
2347 GeneralCategory::FinalPunctuation,
2348 GeneralCategory::InitialPunctuation,
2349 GeneralCategory::OtherPunctuation,
2350 GeneralCategory::OpenPunctuation,
2351 ],
2352 );
2353 test_group(
2354 GeneralCategoryGroup::Symbol,
2355 &[
2356 GeneralCategory::CurrencySymbol,
2357 GeneralCategory::ModifierSymbol,
2358 GeneralCategory::MathSymbol,
2359 GeneralCategory::OtherSymbol,
2360 ],
2361 );
2362 test_group(
2363 GeneralCategoryGroup::Separator,
2364 &[
2365 GeneralCategory::LineSeparator,
2366 GeneralCategory::ParagraphSeparator,
2367 GeneralCategory::SpaceSeparator,
2368 ],
2369 );
2370 }
2371
2372 #[test]
2373 fn test_gc_surrogate() {
2374 use icu::properties::maps;
2375 use icu::properties::GeneralCategory;
2376
2377 let surrogates_data =
2378 maps::general_category().get_set_for_value(GeneralCategory::Surrogate);
2379 let surrogates = surrogates_data.as_borrowed();
2380
2381 assert!(surrogates.contains32(0xd800));
2382 assert!(surrogates.contains32(0xd900));
2383 assert!(surrogates.contains32(0xdfff));
2384
2385 assert!(!surrogates.contains('A'));
2386 }
2387}