icu_properties/
sets.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! The functions in this module return a [`CodePointSetData`] containing
6//! the set of characters with a particular Unicode property.
7//!
8//! The descriptions of most properties are taken from [`TR44`], the documentation for the
9//! Unicode Character Database.  Some properties are instead defined in [`TR18`], the
10//! documentation for Unicode regular expressions. In particular, Annex C of this document
11//! defines properties for POSIX compatibility.
12//!
13//! [`CodePointSetData`]: crate::sets::CodePointSetData
14//! [`TR44`]: https://www.unicode.org/reports/tr44
15//! [`TR18`]: https://www.unicode.org/reports/tr18
16
17use crate::error::PropertiesError;
18use crate::provider::*;
19use crate::*;
20use core::iter::FromIterator;
21use core::ops::RangeInclusive;
22use icu_collections::codepointinvlist::CodePointInversionList;
23use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
24use icu_provider::prelude::*;
25
26//
27// CodePointSet* structs, impls, & macros
28// (a set with only code points)
29//
30
31/// A wrapper around code point set data. It is returned by APIs that return Unicode
32/// property data in a set-like form, ex: a set of code points sharing the same
33/// value for a Unicode property. Access its data via the borrowed version,
34/// [`CodePointSetDataBorrowed`].
35#[derive(Debug)]
36pub struct CodePointSetData {
37    data: DataPayload<ErasedSetlikeMarker>,
38}
39
40/// Private marker type for CodePointSetData
41/// to work for all set properties at once
42#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
43pub(crate) struct ErasedSetlikeMarker;
44impl DataMarker for ErasedSetlikeMarker {
45    type Yokeable = PropertyCodePointSetV1<'static>;
46}
47
48impl CodePointSetData {
49    /// Construct a borrowed version of this type that can be queried.
50    ///
51    /// This owned version if returned by functions that use a runtime data provider.
52    #[inline]
53    pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> {
54        CodePointSetDataBorrowed {
55            set: self.data.get(),
56        }
57    }
58
59    /// Construct a new one from loaded data
60    ///
61    /// Typically it is preferable to use getters like [`load_ascii_hex_digit()`] instead
62    pub fn from_data<M>(data: DataPayload<M>) -> Self
63    where
64        M: DataMarker<Yokeable = PropertyCodePointSetV1<'static>>,
65    {
66        Self { data: data.cast() }
67    }
68
69    /// Construct a new owned [`CodePointInversionList`]
70    pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self {
71        let set = PropertyCodePointSetV1::from_code_point_inversion_list(set);
72        CodePointSetData::from_data(DataPayload::<ErasedSetlikeMarker>::from_owned(set))
73    }
74
75    /// Convert this type to a [`CodePointInversionList`] as a borrowed value.
76    ///
77    /// The data backing this is extensible and supports multiple implementations.
78    /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be
79    /// added, and users may select which at data generation time.
80    ///
81    /// This method returns an `Option` in order to return `None` when the backing data provider
82    /// cannot return a [`CodePointInversionList`], or cannot do so within the expected constant time
83    /// constraint.
84    pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> {
85        self.data.get().as_code_point_inversion_list()
86    }
87
88    /// Convert this type to a [`CodePointInversionList`], borrowing if possible,
89    /// otherwise allocating a new [`CodePointInversionList`].
90    ///
91    /// The data backing this is extensible and supports multiple implementations.
92    /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be
93    /// added, and users may select which at data generation time.
94    ///
95    /// The performance of the conversion to this specific return type will vary
96    /// depending on the data structure that is backing `self`.
97    pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> {
98        self.data.get().to_code_point_inversion_list()
99    }
100}
101
102/// A borrowed wrapper around code point set data, returned by
103/// [`CodePointSetData::as_borrowed()`]. More efficient to query.
104#[derive(Clone, Copy, Debug)]
105pub struct CodePointSetDataBorrowed<'a> {
106    set: &'a PropertyCodePointSetV1<'a>,
107}
108
109impl CodePointSetDataBorrowed<'static> {
110    /// Cheaply converts a [`CodePointSetDataBorrowed<'static>`] into a [`CodePointSetData`].
111    ///
112    /// Note: Due to branching and indirection, using [`CodePointSetData`] might inhibit some
113    /// compile-time optimizations that are possible with [`CodePointSetDataBorrowed`].
114    pub const fn static_to_owned(self) -> CodePointSetData {
115        CodePointSetData {
116            data: DataPayload::from_static_ref(self.set),
117        }
118    }
119}
120
121impl<'a> CodePointSetDataBorrowed<'a> {
122    /// Check if the set contains a character
123    ///
124    /// ```rust
125    /// use icu::properties::sets;
126    ///
127    /// let alphabetic = sets::alphabetic();
128    ///
129    /// assert!(!alphabetic.contains('3'));
130    /// assert!(!alphabetic.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
131    /// assert!(alphabetic.contains('A'));
132    /// assert!(alphabetic.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
133    /// ```
134    #[inline]
135    pub fn contains(self, ch: char) -> bool {
136        self.set.contains(ch)
137    }
138
139    /// Check if the set contains a character as a UTF32 code unit
140    ///
141    /// ```rust
142    /// use icu::properties::sets;
143    ///
144    /// let alphabetic = sets::alphabetic();
145    ///
146    /// assert!(!alphabetic.contains32(0x0A69));  // U+0A69 GURMUKHI DIGIT THREE
147    /// assert!(alphabetic.contains32(0x00C4));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
148    /// ```
149    #[inline]
150    pub fn contains32(self, ch: u32) -> bool {
151        self.set.contains32(ch)
152    }
153
154    // Yields an [`Iterator`] returning the ranges of the code points that are
155    /// included in the [`CodePointSetData`]
156    ///
157    /// Ranges are returned as [`RangeInclusive`], which is inclusive of its
158    /// `end` bound value. An end-inclusive behavior matches the ICU4C/J
159    /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`.
160    ///
161    /// # Example
162    ///
163    /// ```
164    /// use icu::properties::sets;
165    ///
166    /// let alphabetic = sets::alphabetic();
167    /// let mut ranges = alphabetic.iter_ranges();
168    ///
169    /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z'
170    /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z'
171    /// ```
172    #[inline]
173    pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
174        self.set.iter_ranges()
175    }
176
177    // Yields an [`Iterator`] returning the ranges of the code points that are
178    /// *not* included in the [`CodePointSetData`]
179    ///
180    /// Ranges are returned as [`RangeInclusive`], which is inclusive of its
181    /// `end` bound value. An end-inclusive behavior matches the ICU4C/J
182    /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`.
183    ///
184    /// # Example
185    ///
186    /// ```
187    /// use icu::properties::sets;
188    ///
189    /// let alphabetic = sets::alphabetic();
190    /// let mut ranges = alphabetic.iter_ranges();
191    ///
192    /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z'
193    /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z'
194    /// ```
195    #[inline]
196    pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a {
197        self.set.iter_ranges_complemented()
198    }
199}
200
201//
202// UnicodeSet* structs, impls, & macros
203// (a set with code points + strings)
204//
205
206/// A wrapper around `UnicodeSet` data (characters and strings)
207#[derive(Debug)]
208pub struct UnicodeSetData {
209    data: DataPayload<ErasedUnicodeSetlikeMarker>,
210}
211
212#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
213pub(crate) struct ErasedUnicodeSetlikeMarker;
214impl DataMarker for ErasedUnicodeSetlikeMarker {
215    type Yokeable = PropertyUnicodeSetV1<'static>;
216}
217
218impl UnicodeSetData {
219    /// Construct a borrowed version of this type that can be queried.
220    ///
221    /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
222    /// up front.
223    #[inline]
224    pub fn as_borrowed(&self) -> UnicodeSetDataBorrowed<'_> {
225        UnicodeSetDataBorrowed {
226            set: self.data.get(),
227        }
228    }
229
230    /// Construct a new one from loaded data
231    ///
232    /// Typically it is preferable to use getters instead
233    pub fn from_data<M>(data: DataPayload<M>) -> Self
234    where
235        M: DataMarker<Yokeable = PropertyUnicodeSetV1<'static>>,
236    {
237        Self { data: data.cast() }
238    }
239
240    /// Construct a new owned [`CodePointInversionListAndStringList`]
241    pub fn from_code_point_inversion_list_string_list(
242        set: CodePointInversionListAndStringList<'static>,
243    ) -> Self {
244        let set = PropertyUnicodeSetV1::from_code_point_inversion_list_string_list(set);
245        UnicodeSetData::from_data(DataPayload::<ErasedUnicodeSetlikeMarker>::from_owned(set))
246    }
247
248    /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value.
249    ///
250    /// The data backing this is extensible and supports multiple implementations.
251    /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
252    /// added, and users may select which at data generation time.
253    ///
254    /// This method returns an `Option` in order to return `None` when the backing data provider
255    /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time
256    /// constraint.
257    pub fn as_code_point_inversion_list_string_list(
258        &self,
259    ) -> Option<&CodePointInversionListAndStringList<'_>> {
260        self.data.get().as_code_point_inversion_list_string_list()
261    }
262
263    /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible,
264    /// otherwise allocating a new [`CodePointInversionListAndStringList`].
265    ///
266    /// The data backing this is extensible and supports multiple implementations.
267    /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
268    /// added, and users may select which at data generation time.
269    ///
270    /// The performance of the conversion to this specific return type will vary
271    /// depending on the data structure that is backing `self`.
272    pub fn to_code_point_inversion_list_string_list(
273        &self,
274    ) -> CodePointInversionListAndStringList<'_> {
275        self.data.get().to_code_point_inversion_list_string_list()
276    }
277}
278
279/// A borrowed wrapper around code point set data, returned by
280/// [`UnicodeSetData::as_borrowed()`]. More efficient to query.
281#[derive(Clone, Copy, Debug)]
282pub struct UnicodeSetDataBorrowed<'a> {
283    set: &'a PropertyUnicodeSetV1<'a>,
284}
285
286impl<'a> UnicodeSetDataBorrowed<'a> {
287    /// Check if the set contains the string. Strings consisting of one character
288    /// are treated as a character/code point.
289    ///
290    /// This matches ICU behavior for ICU's `UnicodeSet`.
291    #[inline]
292    pub fn contains(self, s: &str) -> bool {
293        self.set.contains(s)
294    }
295
296    /// Check if the set contains a character as a UTF32 code unit
297    #[inline]
298    pub fn contains32(&self, cp: u32) -> bool {
299        self.set.contains32(cp)
300    }
301
302    /// Check if the set contains the code point corresponding to the Rust character.
303    #[inline]
304    pub fn contains_char(&self, ch: char) -> bool {
305        self.set.contains_char(ch)
306    }
307}
308
309impl UnicodeSetDataBorrowed<'static> {
310    /// Cheaply converts a [`UnicodeSetDataBorrowed<'static>`] into a [`UnicodeSetData`].
311    ///
312    /// Note: Due to branching and indirection, using [`UnicodeSetData`] might inhibit some
313    /// compile-time optimizations that are possible with [`UnicodeSetDataBorrowed`].
314    pub const fn static_to_owned(self) -> UnicodeSetData {
315        UnicodeSetData {
316            data: DataPayload::from_static_ref(self.set),
317        }
318    }
319}
320
321pub(crate) fn load_set_data<M, P>(provider: &P) -> Result<CodePointSetData, PropertiesError>
322where
323    M: KeyedDataMarker<Yokeable = PropertyCodePointSetV1<'static>>,
324    P: DataProvider<M> + ?Sized,
325{
326    Ok(provider
327        .load(Default::default())
328        .and_then(DataResponse::take_payload)
329        .map(CodePointSetData::from_data)?)
330}
331
332//
333// Binary property getter fns
334// (data as code point sets)
335//
336
337macro_rules! make_code_point_set_property {
338    (
339        // currently unused
340        property: $property:expr;
341        // currently unused
342        marker: $marker_name:ident;
343        keyed_data_marker: $keyed_data_marker:ty;
344        func:
345        $(#[$doc:meta])+
346        $cvis:vis const fn $constname:ident() => $singleton_name:ident;
347        $vis:vis fn $funcname:ident();
348    ) => {
349        #[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")]
350        ///
351        /// Note that this will return an owned version of the data. Functionality is available on
352        /// the borrowed version, accessible through [`CodePointSetData::as_borrowed`].
353        $vis fn $funcname(
354            provider: &(impl DataProvider<$keyed_data_marker> + ?Sized)
355        ) -> Result<CodePointSetData, PropertiesError> {
356            load_set_data(provider)
357        }
358
359        $(#[$doc])*
360        #[cfg(feature = "compiled_data")]
361        $cvis const fn $constname() -> CodePointSetDataBorrowed<'static> {
362            CodePointSetDataBorrowed {
363                set: crate::provider::Baked::$singleton_name,
364            }
365        }
366    }
367}
368
369make_code_point_set_property! {
370    property: "ASCII_Hex_Digit";
371    marker: AsciiHexDigitProperty;
372    keyed_data_marker: AsciiHexDigitV1Marker;
373    func:
374    /// ASCII characters commonly used for the representation of hexadecimal numbers
375    ///
376    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
377    ///
378    /// [📚 Help choosing a constructor](icu_provider::constructors)
379    ///
380    /// # Example
381    ///
382    /// ```
383    /// use icu::properties::sets;
384    ///
385    /// let ascii_hex_digit = sets::ascii_hex_digit();
386    ///
387    /// assert!(ascii_hex_digit.contains('3'));
388    /// assert!(!ascii_hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
389    /// assert!(ascii_hex_digit.contains('A'));
390    /// assert!(!ascii_hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
391    /// ```
392    pub const fn ascii_hex_digit() => SINGLETON_PROPS_AHEX_V1;
393    pub fn load_ascii_hex_digit();
394}
395
396make_code_point_set_property! {
397    property: "Alnum";
398    marker: AlnumProperty;
399    keyed_data_marker: AlnumV1Marker;
400    func:
401    /// Characters with the Alphabetic or Decimal_Number property
402    /// This is defined for POSIX compatibility.
403
404    pub const fn alnum() => SINGLETON_PROPS_ALNUM_V1;
405    pub fn load_alnum();
406}
407
408make_code_point_set_property! {
409    property: "Alphabetic";
410    marker: AlphabeticProperty;
411    keyed_data_marker: AlphabeticV1Marker;
412    func:
413    /// Alphabetic characters
414    ///
415    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
416    ///
417    /// [📚 Help choosing a constructor](icu_provider::constructors)
418    ///
419    /// # Example
420    ///
421    /// ```
422    /// use icu::properties::sets;
423    ///
424    /// let alphabetic = sets::alphabetic();
425    ///
426    /// assert!(!alphabetic.contains('3'));
427    /// assert!(!alphabetic.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
428    /// assert!(alphabetic.contains('A'));
429    /// assert!(alphabetic.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
430    /// ```
431
432    pub const fn alphabetic() => SINGLETON_PROPS_ALPHA_V1;
433    pub fn load_alphabetic();
434}
435
436make_code_point_set_property! {
437    property: "Bidi_Control";
438    marker: BidiControlProperty;
439    keyed_data_marker: BidiControlV1Marker;
440    func:
441    /// Format control characters which have specific functions in the Unicode Bidirectional
442    /// Algorithm
443    ///
444    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
445    ///
446    /// [📚 Help choosing a constructor](icu_provider::constructors)
447    ///
448    /// # Example
449    ///
450    /// ```
451    /// use icu::properties::sets;
452    ///
453    /// let bidi_control = sets::bidi_control();
454    ///
455    /// assert!(bidi_control.contains32(0x200F));  // RIGHT-TO-LEFT MARK
456    /// assert!(!bidi_control.contains('ش'));  // U+0634 ARABIC LETTER SHEEN
457    /// ```
458
459    pub const fn bidi_control() => SINGLETON_PROPS_BIDI_C_V1;
460    pub fn load_bidi_control();
461}
462
463make_code_point_set_property! {
464    property: "Bidi_Mirrored";
465    marker: BidiMirroredProperty;
466    keyed_data_marker: BidiMirroredV1Marker;
467    func:
468    /// Characters that are mirrored in bidirectional text
469    ///
470    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
471    ///
472    /// [📚 Help choosing a constructor](icu_provider::constructors)
473    ///
474    /// # Example
475    ///
476    /// ```
477    /// use icu::properties::sets;
478    ///
479    /// let bidi_mirrored = sets::bidi_mirrored();
480    ///
481    /// assert!(bidi_mirrored.contains('['));
482    /// assert!(bidi_mirrored.contains(']'));
483    /// assert!(bidi_mirrored.contains('∑'));  // U+2211 N-ARY SUMMATION
484    /// assert!(!bidi_mirrored.contains('ཉ'));  // U+0F49 TIBETAN LETTER NYA
485    /// ```
486
487    pub const fn bidi_mirrored() => SINGLETON_PROPS_BIDI_M_V1;
488    pub fn load_bidi_mirrored();
489}
490
491make_code_point_set_property! {
492    property: "Blank";
493    marker: BlankProperty;
494    keyed_data_marker: BlankV1Marker;
495    func:
496    /// Horizontal whitespace characters
497
498    pub const fn blank() => SINGLETON_PROPS_BLANK_V1;
499    pub fn load_blank();
500}
501
502make_code_point_set_property! {
503    property: "Cased";
504    marker: CasedProperty;
505    keyed_data_marker: CasedV1Marker;
506    func:
507    /// Uppercase, lowercase, and titlecase characters
508    ///
509    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
510    ///
511    /// [📚 Help choosing a constructor](icu_provider::constructors)
512    ///
513    /// # Example
514    ///
515    /// ```
516    /// use icu::properties::sets;
517    ///
518    /// let cased = sets::cased();
519    ///
520    /// assert!(cased.contains('Ꙡ'));  // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
521    /// assert!(!cased.contains('ދ'));  // U+078B THAANA LETTER DHAALU
522    /// ```
523
524    pub const fn cased() => SINGLETON_PROPS_CASED_V1;
525    pub fn load_cased();
526}
527
528make_code_point_set_property! {
529    property: "Case_Ignorable";
530    marker: CaseIgnorableProperty;
531    keyed_data_marker: CaseIgnorableV1Marker;
532    func:
533    /// Characters which are ignored for casing purposes
534    ///
535    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
536    ///
537    /// [📚 Help choosing a constructor](icu_provider::constructors)
538    ///
539    /// # Example
540    ///
541    /// ```
542    /// use icu::properties::sets;
543    ///
544    /// let case_ignorable = sets::case_ignorable();
545    ///
546    /// assert!(case_ignorable.contains(':'));
547    /// assert!(!case_ignorable.contains('λ'));  // U+03BB GREEK SMALL LETTER LAMDA
548    /// ```
549
550    pub const fn case_ignorable() => SINGLETON_PROPS_CI_V1;
551    pub fn load_case_ignorable();
552}
553
554make_code_point_set_property! {
555    property: "Full_Composition_Exclusion";
556    marker: FullCompositionExclusionProperty;
557    keyed_data_marker: FullCompositionExclusionV1Marker;
558    func:
559    /// Characters that are excluded from composition
560    /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
561
562    pub const fn full_composition_exclusion() => SINGLETON_PROPS_COMP_EX_V1;
563    pub fn load_full_composition_exclusion();
564}
565
566make_code_point_set_property! {
567    property: "Changes_When_Casefolded";
568    marker: ChangesWhenCasefoldedProperty;
569    keyed_data_marker: ChangesWhenCasefoldedV1Marker;
570    func:
571    /// Characters whose normalized forms are not stable under case folding
572    ///
573    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
574    ///
575    /// [📚 Help choosing a constructor](icu_provider::constructors)
576    ///
577    /// # Example
578    ///
579    /// ```
580    /// use icu::properties::sets;
581    ///
582    /// let changes_when_casefolded = sets::changes_when_casefolded();
583    ///
584    /// assert!(changes_when_casefolded.contains('ß'));  // U+00DF LATIN SMALL LETTER SHARP S
585    /// assert!(!changes_when_casefolded.contains('ᜉ'));  // U+1709 TAGALOG LETTER PA
586    /// ```
587
588    pub const fn changes_when_casefolded() => SINGLETON_PROPS_CWCF_V1;
589    pub fn load_changes_when_casefolded();
590}
591
592make_code_point_set_property! {
593    property: "Changes_When_Casemapped";
594    marker: ChangesWhenCasemappedProperty;
595    keyed_data_marker: ChangesWhenCasemappedV1Marker;
596    func:
597    /// Characters which may change when they undergo case mapping
598
599    pub const fn changes_when_casemapped() => SINGLETON_PROPS_CWCM_V1;
600    pub fn load_changes_when_casemapped();
601}
602
603make_code_point_set_property! {
604    property: "Changes_When_NFKC_Casefolded";
605    marker: ChangesWhenNfkcCasefoldedProperty;
606    keyed_data_marker: ChangesWhenNfkcCasefoldedV1Marker;
607    func:
608    /// Characters which are not identical to their NFKC_Casefold mapping
609    ///
610    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
611    ///
612    /// [📚 Help choosing a constructor](icu_provider::constructors)
613    ///
614    /// # Example
615    ///
616    /// ```
617    /// use icu::properties::sets;
618    ///
619    /// let changes_when_nfkc_casefolded = sets::changes_when_nfkc_casefolded();
620    ///
621    /// assert!(changes_when_nfkc_casefolded.contains('🄵'));  // U+1F135 SQUARED LATIN CAPITAL LETTER F
622    /// assert!(!changes_when_nfkc_casefolded.contains('f'));
623    /// ```
624
625    pub const fn changes_when_nfkc_casefolded() => SINGLETON_PROPS_CWKCF_V1;
626    pub fn load_changes_when_nfkc_casefolded();
627}
628
629make_code_point_set_property! {
630    property: "Changes_When_Lowercased";
631    marker: ChangesWhenLowercasedProperty;
632    keyed_data_marker: ChangesWhenLowercasedV1Marker;
633    func:
634    /// Characters whose normalized forms are not stable under a toLowercase mapping
635    ///
636    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
637    ///
638    /// [📚 Help choosing a constructor](icu_provider::constructors)
639    ///
640    /// # Example
641    ///
642    /// ```
643    /// use icu::properties::sets;
644    ///
645    /// let changes_when_lowercased = sets::changes_when_lowercased();
646    ///
647    /// assert!(changes_when_lowercased.contains('Ⴔ'));  // U+10B4 GEORGIAN CAPITAL LETTER PHAR
648    /// assert!(!changes_when_lowercased.contains('ფ'));  // U+10E4 GEORGIAN LETTER PHAR
649    /// ```
650
651    pub const fn changes_when_lowercased() => SINGLETON_PROPS_CWL_V1;
652    pub fn load_changes_when_lowercased();
653}
654
655make_code_point_set_property! {
656    property: "Changes_When_Titlecased";
657    marker: ChangesWhenTitlecasedProperty;
658    keyed_data_marker: ChangesWhenTitlecasedV1Marker;
659    func:
660    /// Characters whose normalized forms are not stable under a toTitlecase mapping
661    ///
662    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
663    ///
664    /// [📚 Help choosing a constructor](icu_provider::constructors)
665    ///
666    /// # Example
667    ///
668    /// ```
669    /// use icu::properties::sets;
670    ///
671    /// let changes_when_titlecased = sets::changes_when_titlecased();
672    ///
673    /// assert!(changes_when_titlecased.contains('æ'));  // U+00E6 LATIN SMALL LETTER AE
674    /// assert!(!changes_when_titlecased.contains('Æ'));  // U+00E6 LATIN CAPITAL LETTER AE
675    /// ```
676
677    pub const fn changes_when_titlecased() => SINGLETON_PROPS_CWT_V1;
678    pub fn load_changes_when_titlecased();
679}
680
681make_code_point_set_property! {
682    property: "Changes_When_Uppercased";
683    marker: ChangesWhenUppercasedProperty;
684    keyed_data_marker: ChangesWhenUppercasedV1Marker;
685    func:
686    /// Characters whose normalized forms are not stable under a toUppercase mapping
687    ///
688    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
689    ///
690    /// [📚 Help choosing a constructor](icu_provider::constructors)
691    ///
692    /// # Example
693    ///
694    /// ```
695    /// use icu::properties::sets;
696    ///
697    /// let changes_when_uppercased = sets::changes_when_uppercased();
698    ///
699    /// assert!(changes_when_uppercased.contains('ւ'));  // U+0582 ARMENIAN SMALL LETTER YIWN
700    /// assert!(!changes_when_uppercased.contains('Ւ'));  // U+0552 ARMENIAN CAPITAL LETTER YIWN
701    /// ```
702
703    pub const fn changes_when_uppercased() => SINGLETON_PROPS_CWU_V1;
704    pub fn load_changes_when_uppercased();
705}
706
707make_code_point_set_property! {
708    property: "Dash";
709    marker: DashProperty;
710    keyed_data_marker: DashV1Marker;
711    func:
712    /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
713    /// their compatibility equivalents
714    ///
715    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
716    ///
717    /// [📚 Help choosing a constructor](icu_provider::constructors)
718    ///
719    /// # Example
720    ///
721    /// ```
722    /// use icu::properties::sets;
723    ///
724    /// let dash = sets::dash();
725    ///
726    /// assert!(dash.contains('⸺'));  // U+2E3A TWO-EM DASH
727    /// assert!(dash.contains('-'));  // U+002D
728    /// assert!(!dash.contains('='));  // U+003D
729    /// ```
730
731    pub const fn dash() => SINGLETON_PROPS_DASH_V1;
732    pub fn load_dash();
733}
734
735make_code_point_set_property! {
736    property: "Deprecated";
737    marker: DeprecatedProperty;
738    keyed_data_marker: DeprecatedV1Marker;
739    func:
740    /// Deprecated characters. No characters will ever be removed from the standard, but the
741    /// usage of deprecated characters is strongly discouraged.
742    ///
743    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
744    ///
745    /// [📚 Help choosing a constructor](icu_provider::constructors)
746    ///
747    /// # Example
748    ///
749    /// ```
750    /// use icu::properties::sets;
751    ///
752    /// let deprecated = sets::deprecated();
753    ///
754    /// assert!(deprecated.contains('ឣ'));  // U+17A3 KHMER INDEPENDENT VOWEL QAQ
755    /// assert!(!deprecated.contains('A'));
756    /// ```
757
758    pub const fn deprecated() => SINGLETON_PROPS_DEP_V1;
759    pub fn load_deprecated();
760}
761
762make_code_point_set_property! {
763    property: "Default_Ignorable_Code_Point";
764    marker: DefaultIgnorableCodePointProperty;
765    keyed_data_marker: DefaultIgnorableCodePointV1Marker;
766    func:
767    /// For programmatic determination of default ignorable code points.  New characters that
768    /// should be ignored in rendering (unless explicitly supported) will be assigned in these
769    /// ranges, permitting programs to correctly handle the default rendering of such
770    /// characters when not otherwise supported.
771    ///
772    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
773    ///
774    /// [📚 Help choosing a constructor](icu_provider::constructors)
775    ///
776    /// # Example
777    ///
778    /// ```
779    /// use icu::properties::sets;
780    ///
781    /// let default_ignorable_code_point = sets::default_ignorable_code_point();
782    ///
783    /// assert!(default_ignorable_code_point.contains32(0x180B));  // MONGOLIAN FREE VARIATION SELECTOR ONE
784    /// assert!(!default_ignorable_code_point.contains('E'));
785    /// ```
786
787    pub const fn default_ignorable_code_point() => SINGLETON_PROPS_DI_V1;
788    pub fn load_default_ignorable_code_point();
789}
790
791make_code_point_set_property! {
792    property: "Diacritic";
793    marker: DiacriticProperty;
794    keyed_data_marker: DiacriticV1Marker;
795    func:
796    /// Characters that linguistically modify the meaning of another character to which they apply
797    ///
798    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
799    ///
800    /// [📚 Help choosing a constructor](icu_provider::constructors)
801    ///
802    /// # Example
803    ///
804    /// ```
805    /// use icu::properties::sets;
806    ///
807    /// let diacritic = sets::diacritic();
808    ///
809    /// assert!(diacritic.contains('\u{05B3}'));  // HEBREW POINT HATAF QAMATS
810    /// assert!(!diacritic.contains('א'));  // U+05D0 HEBREW LETTER ALEF
811    /// ```
812
813    pub const fn diacritic() => SINGLETON_PROPS_DIA_V1;
814    pub fn load_diacritic();
815}
816
817make_code_point_set_property! {
818    property: "Emoji_Modifier_Base";
819    marker: EmojiModifierBaseProperty;
820    keyed_data_marker: EmojiModifierBaseV1Marker;
821    func:
822    /// Characters that can serve as a base for emoji modifiers
823    ///
824    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
825    ///
826    /// [📚 Help choosing a constructor](icu_provider::constructors)
827    ///
828    /// # Example
829    ///
830    /// ```
831    /// use icu::properties::sets;
832    ///
833    /// let emoji_modifier_base = sets::emoji_modifier_base();
834    ///
835    /// assert!(emoji_modifier_base.contains('✊'));  // U+270A RAISED FIST
836    /// assert!(!emoji_modifier_base.contains('⛰'));  // U+26F0 MOUNTAIN
837    /// ```
838
839    pub const fn emoji_modifier_base() => SINGLETON_PROPS_EBASE_V1;
840    pub fn load_emoji_modifier_base();
841}
842
843make_code_point_set_property! {
844    property: "Emoji_Component";
845    marker: EmojiComponentProperty;
846    keyed_data_marker: EmojiComponentV1Marker;
847    func:
848    /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
849    /// separate choices, such as base characters for emoji keycaps
850    ///
851    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
852    ///
853    /// [📚 Help choosing a constructor](icu_provider::constructors)
854    ///
855    /// # Example
856    ///
857    /// ```
858    /// use icu::properties::sets;
859    ///
860    /// let emoji_component = sets::emoji_component();
861    ///
862    /// assert!(emoji_component.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
863    /// assert!(emoji_component.contains32(0x20E3));  // COMBINING ENCLOSING KEYCAP
864    /// assert!(emoji_component.contains('7'));
865    /// assert!(!emoji_component.contains('T'));
866    /// ```
867
868    pub const fn emoji_component() => SINGLETON_PROPS_ECOMP_V1;
869    pub fn load_emoji_component();
870}
871
872make_code_point_set_property! {
873    property: "Emoji_Modifier";
874    marker: EmojiModifierProperty;
875    keyed_data_marker: EmojiModifierV1Marker;
876    func:
877    /// Characters that are emoji modifiers
878    ///
879    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
880    ///
881    /// [📚 Help choosing a constructor](icu_provider::constructors)
882    ///
883    /// # Example
884    ///
885    /// ```
886    /// use icu::properties::sets;
887    ///
888    /// let emoji_modifier = sets::emoji_modifier();
889    ///
890    /// assert!(emoji_modifier.contains32(0x1F3FD));  // EMOJI MODIFIER FITZPATRICK TYPE-4
891    /// assert!(!emoji_modifier.contains32(0x200C));  // ZERO WIDTH NON-JOINER
892    /// ```
893
894    pub const fn emoji_modifier() => SINGLETON_PROPS_EMOD_V1;
895    pub fn load_emoji_modifier();
896}
897
898make_code_point_set_property! {
899    property: "Emoji";
900    marker: EmojiProperty;
901    keyed_data_marker: EmojiV1Marker;
902    func:
903    /// Characters that are emoji
904    ///
905    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
906    ///
907    /// [📚 Help choosing a constructor](icu_provider::constructors)
908    ///
909    /// # Example
910    ///
911    /// ```
912    /// use icu::properties::sets;
913    ///
914    /// let emoji = sets::emoji();
915    ///
916    /// assert!(emoji.contains('🔥'));  // U+1F525 FIRE
917    /// assert!(!emoji.contains('V'));
918    /// ```
919
920    pub const fn emoji() => SINGLETON_PROPS_EMOJI_V1;
921    pub fn load_emoji();
922}
923
924make_code_point_set_property! {
925    property: "Emoji_Presentation";
926    marker: EmojiPresentationProperty;
927    keyed_data_marker: EmojiPresentationV1Marker;
928    func:
929    /// Characters that have emoji presentation by default
930    ///
931    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
932    ///
933    /// [📚 Help choosing a constructor](icu_provider::constructors)
934    ///
935    /// # Example
936    ///
937    /// ```
938    /// use icu::properties::sets;
939    ///
940    /// let emoji_presentation = sets::emoji_presentation();
941    ///
942    /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
943    /// assert!(!emoji_presentation.contains('♻'));  // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
944    /// ```
945
946    pub const fn emoji_presentation() => SINGLETON_PROPS_EPRES_V1;
947    pub fn load_emoji_presentation();
948}
949
950make_code_point_set_property! {
951    property: "Extender";
952    marker: ExtenderProperty;
953    keyed_data_marker: ExtenderV1Marker;
954    func:
955    /// Characters whose principal function is to extend the value of a preceding alphabetic
956    /// character or to extend the shape of adjacent characters.
957    ///
958    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
959    ///
960    /// [📚 Help choosing a constructor](icu_provider::constructors)
961    ///
962    /// # Example
963    ///
964    /// ```
965    /// use icu::properties::sets;
966    ///
967    /// let extender = sets::extender();
968    ///
969    /// assert!(extender.contains('ヾ'));  // U+30FE KATAKANA VOICED ITERATION MARK
970    /// assert!(extender.contains('ー'));  // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
971    /// assert!(!extender.contains('・'));  // U+30FB KATAKANA MIDDLE DOT
972    /// ```
973
974    pub const fn extender() => SINGLETON_PROPS_EXT_V1;
975    pub fn load_extender();
976}
977
978make_code_point_set_property! {
979    property: "Extended_Pictographic";
980    marker: ExtendedPictographicProperty;
981    keyed_data_marker: ExtendedPictographicV1Marker;
982    func:
983    /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
984    /// emoji characters
985    ///
986    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
987    ///
988    /// [📚 Help choosing a constructor](icu_provider::constructors)
989    ///
990    /// # Example
991    ///
992    /// ```
993    /// use icu::properties::sets;
994    ///
995    /// let extended_pictographic = sets::extended_pictographic();
996    ///
997    /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
998    /// assert!(!extended_pictographic.contains('🇪'));  // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
999    /// ```
1000
1001    pub const fn extended_pictographic() => SINGLETON_PROPS_EXTPICT_V1;
1002    pub fn load_extended_pictographic();
1003}
1004
1005make_code_point_set_property! {
1006    property: "Graph";
1007    marker: GraphProperty;
1008    keyed_data_marker: GraphV1Marker;
1009    func:
1010    /// Visible characters.
1011    /// This is defined for POSIX compatibility.
1012
1013    pub const fn graph() => SINGLETON_PROPS_GRAPH_V1;
1014    pub fn load_graph();
1015}
1016
1017make_code_point_set_property! {
1018    property: "Grapheme_Base";
1019    marker: GraphemeBaseProperty;
1020    keyed_data_marker: GraphemeBaseV1Marker;
1021    func:
1022    /// Property used together with the definition of Standard Korean Syllable Block to define
1023    /// "Grapheme base". See D58 in Chapter 3, Conformance in the Unicode Standard.
1024    ///
1025    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1026    ///
1027    /// [📚 Help choosing a constructor](icu_provider::constructors)
1028    ///
1029    /// # Example
1030    ///
1031    /// ```
1032    /// use icu::properties::sets;
1033    ///
1034    /// let grapheme_base = sets::grapheme_base();
1035    ///
1036    /// assert!(grapheme_base.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
1037    /// assert!(grapheme_base.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
1038    /// assert!(!grapheme_base.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
1039    /// ```
1040
1041    pub const fn grapheme_base() => SINGLETON_PROPS_GR_BASE_V1;
1042    pub fn load_grapheme_base();
1043}
1044
1045make_code_point_set_property! {
1046    property: "Grapheme_Extend";
1047    marker: GraphemeExtendProperty;
1048    keyed_data_marker: GraphemeExtendV1Marker;
1049    func:
1050    /// Property used to define "Grapheme extender". See D59 in Chapter 3, Conformance in the
1051    /// Unicode Standard.
1052    ///
1053    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1054    ///
1055    /// [📚 Help choosing a constructor](icu_provider::constructors)
1056    ///
1057    /// # Example
1058    ///
1059    /// ```
1060    /// use icu::properties::sets;
1061    ///
1062    /// let grapheme_extend = sets::grapheme_extend();
1063    ///
1064    /// assert!(!grapheme_extend.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
1065    /// assert!(!grapheme_extend.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
1066    /// assert!(grapheme_extend.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
1067    /// ```
1068
1069    pub const fn grapheme_extend() => SINGLETON_PROPS_GR_EXT_V1;
1070    pub fn load_grapheme_extend();
1071}
1072
1073make_code_point_set_property! {
1074    property: "Grapheme_Link";
1075    marker: GraphemeLinkProperty;
1076    keyed_data_marker: GraphemeLinkV1Marker;
1077    func:
1078    /// Deprecated property. Formerly proposed for programmatic determination of grapheme
1079    /// cluster boundaries.
1080
1081    pub const fn grapheme_link() => SINGLETON_PROPS_GR_LINK_V1;
1082    pub fn load_grapheme_link();
1083}
1084
1085make_code_point_set_property! {
1086    property: "Hex_Digit";
1087    marker: HexDigitProperty;
1088    keyed_data_marker: HexDigitV1Marker;
1089    func:
1090    /// Characters commonly used for the representation of hexadecimal numbers, plus their
1091    /// compatibility equivalents
1092    ///
1093    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1094    ///
1095    /// [📚 Help choosing a constructor](icu_provider::constructors)
1096    ///
1097    /// # Example
1098    ///
1099    /// ```
1100    /// use icu::properties::sets;
1101    ///
1102    /// let hex_digit = sets::hex_digit();
1103    ///
1104    /// assert!(hex_digit.contains('0'));
1105    /// assert!(!hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1106    /// assert!(hex_digit.contains('f'));
1107    /// assert!(hex_digit.contains('f'));  // U+FF46 FULLWIDTH LATIN SMALL LETTER F
1108    /// assert!(hex_digit.contains('F'));  // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
1109    /// assert!(!hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1110    /// ```
1111
1112    pub const fn hex_digit() => SINGLETON_PROPS_HEX_V1;
1113    pub fn load_hex_digit();
1114}
1115
1116make_code_point_set_property! {
1117    property: "Hyphen";
1118    marker: HyphenProperty;
1119    keyed_data_marker: HyphenV1Marker;
1120    func:
1121    /// Deprecated property. Dashes which are used to mark connections between pieces of
1122    /// words, plus the Katakana middle dot.
1123
1124    pub const fn hyphen() => SINGLETON_PROPS_HYPHEN_V1;
1125    pub fn load_hyphen();
1126}
1127
1128make_code_point_set_property! {
1129    property: "Id_Continue";
1130    marker: IdContinueProperty;
1131    keyed_data_marker: IdContinueV1Marker;
1132    func:
1133    /// Characters that can come after the first character in an identifier. If using NFKC to
1134    /// fold differences between characters, use [`load_xid_continue`] instead.  See
1135    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
1136    /// more details.
1137    ///
1138    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1139    ///
1140    /// [📚 Help choosing a constructor](icu_provider::constructors)
1141    ///
1142    /// # Example
1143    ///
1144    /// ```
1145    /// use icu::properties::sets;
1146    ///
1147    /// let id_continue = sets::id_continue();
1148    ///
1149    /// assert!(id_continue.contains('x'));
1150    /// assert!(id_continue.contains('1'));
1151    /// assert!(id_continue.contains('_'));
1152    /// assert!(id_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
1153    /// assert!(!id_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
1154    /// assert!(id_continue.contains32(0xFC5E));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
1155    /// ```
1156
1157    pub const fn id_continue() => SINGLETON_PROPS_IDC_V1;
1158    pub fn load_id_continue();
1159}
1160
1161make_code_point_set_property! {
1162    property: "Ideographic";
1163    marker: IdeographicProperty;
1164    keyed_data_marker: IdeographicV1Marker;
1165    func:
1166    /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
1167    /// ideographs, or related siniform ideographs
1168    ///
1169    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1170    ///
1171    /// [📚 Help choosing a constructor](icu_provider::constructors)
1172    ///
1173    /// # Example
1174    ///
1175    /// ```
1176    /// use icu::properties::sets;
1177    ///
1178    /// let ideographic = sets::ideographic();
1179    ///
1180    /// assert!(ideographic.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
1181    /// assert!(!ideographic.contains('밥'));  // U+BC25 HANGUL SYLLABLE BAB
1182    /// ```
1183
1184    pub const fn ideographic() => SINGLETON_PROPS_IDEO_V1;
1185    pub fn load_ideographic();
1186}
1187
1188make_code_point_set_property! {
1189    property: "Id_Start";
1190    marker: IdStartProperty;
1191    keyed_data_marker: IdStartV1Marker;
1192    func:
1193    /// Characters that can begin an identifier. If using NFKC to fold differences between
1194    /// characters, use [`load_xid_start`] instead.  See [`Unicode Standard Annex
1195    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
1196    ///
1197    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1198    ///
1199    /// [📚 Help choosing a constructor](icu_provider::constructors)
1200    ///
1201    /// # Example
1202    ///
1203    /// ```
1204    /// use icu::properties::sets;
1205    ///
1206    /// let id_start = sets::id_start();
1207    ///
1208    /// assert!(id_start.contains('x'));
1209    /// assert!(!id_start.contains('1'));
1210    /// assert!(!id_start.contains('_'));
1211    /// assert!(id_start.contains('ߝ'));  // U+07DD NKO LETTER FA
1212    /// assert!(!id_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
1213    /// assert!(id_start.contains32(0xFC5E));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
1214    /// ```
1215
1216    pub const fn id_start() => SINGLETON_PROPS_IDS_V1;
1217    pub fn load_id_start();
1218}
1219
1220make_code_point_set_property! {
1221    property: "Ids_Binary_Operator";
1222    marker: IdsBinaryOperatorProperty;
1223    keyed_data_marker: IdsBinaryOperatorV1Marker;
1224    func:
1225    /// Characters used in Ideographic Description Sequences
1226    ///
1227    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1228    ///
1229    /// [📚 Help choosing a constructor](icu_provider::constructors)
1230    ///
1231    /// # Example
1232    ///
1233    /// ```
1234    /// use icu::properties::sets;
1235    ///
1236    /// let ids_binary_operator = sets::ids_binary_operator();
1237    ///
1238    /// assert!(ids_binary_operator.contains32(0x2FF5));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
1239    /// assert!(!ids_binary_operator.contains32(0x3006));  // IDEOGRAPHIC CLOSING MARK
1240    /// ```
1241
1242    pub const fn ids_binary_operator() => SINGLETON_PROPS_IDSB_V1;
1243    pub fn load_ids_binary_operator();
1244}
1245
1246make_code_point_set_property! {
1247    property: "Ids_Trinary_Operator";
1248    marker: IdsTrinaryOperatorProperty;
1249    keyed_data_marker: IdsTrinaryOperatorV1Marker;
1250    func:
1251    /// Characters used in Ideographic Description Sequences
1252    ///
1253    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1254    ///
1255    /// [📚 Help choosing a constructor](icu_provider::constructors)
1256    ///
1257    /// # Example
1258    ///
1259    /// ```
1260    /// use icu::properties::sets;
1261    ///
1262    /// let ids_trinary_operator = sets::ids_trinary_operator();
1263    ///
1264    /// assert!(ids_trinary_operator.contains32(0x2FF2));  // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
1265    /// assert!(ids_trinary_operator.contains32(0x2FF3));  // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
1266    /// assert!(!ids_trinary_operator.contains32(0x2FF4));
1267    /// assert!(!ids_trinary_operator.contains32(0x2FF5));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
1268    /// assert!(!ids_trinary_operator.contains32(0x3006));  // IDEOGRAPHIC CLOSING MARK
1269    /// ```
1270
1271    pub const fn ids_trinary_operator() => SINGLETON_PROPS_IDST_V1;
1272    pub fn load_ids_trinary_operator();
1273}
1274
1275make_code_point_set_property! {
1276    property: "Join_Control";
1277    marker: JoinControlProperty;
1278    keyed_data_marker: JoinControlV1Marker;
1279    func:
1280    /// Format control characters which have specific functions for control of cursive joining
1281    /// and ligation
1282    ///
1283    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1284    ///
1285    /// [📚 Help choosing a constructor](icu_provider::constructors)
1286    ///
1287    /// # Example
1288    ///
1289    /// ```
1290    /// use icu::properties::sets;
1291    ///
1292    /// let join_control = sets::join_control();
1293    ///
1294    /// assert!(join_control.contains32(0x200C));  // ZERO WIDTH NON-JOINER
1295    /// assert!(join_control.contains32(0x200D));  // ZERO WIDTH JOINER
1296    /// assert!(!join_control.contains32(0x200E));
1297    /// ```
1298
1299    pub const fn join_control() => SINGLETON_PROPS_JOIN_C_V1;
1300    pub fn load_join_control();
1301}
1302
1303make_code_point_set_property! {
1304    property: "Logical_Order_Exception";
1305    marker: LogicalOrderExceptionProperty;
1306    keyed_data_marker: LogicalOrderExceptionV1Marker;
1307    func:
1308    /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao
1309    ///
1310    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1311    ///
1312    /// [📚 Help choosing a constructor](icu_provider::constructors)
1313    ///
1314    /// # Example
1315    ///
1316    /// ```
1317    /// use icu::properties::sets;
1318    ///
1319    /// let logical_order_exception = sets::logical_order_exception();
1320    ///
1321    /// assert!(logical_order_exception.contains('ແ'));  // U+0EC1 LAO VOWEL SIGN EI
1322    /// assert!(!logical_order_exception.contains('ະ'));  // U+0EB0 LAO VOWEL SIGN A
1323    /// ```
1324
1325    pub const fn logical_order_exception() => SINGLETON_PROPS_LOE_V1;
1326    pub fn load_logical_order_exception();
1327}
1328
1329make_code_point_set_property! {
1330    property: "Lowercase";
1331    marker: LowercaseProperty;
1332    keyed_data_marker: LowercaseV1Marker;
1333    func:
1334    /// Lowercase characters
1335    ///
1336    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1337    ///
1338    /// [📚 Help choosing a constructor](icu_provider::constructors)
1339    ///
1340    /// # Example
1341    ///
1342    /// ```
1343    /// use icu::properties::sets;
1344    ///
1345    /// let lowercase = sets::lowercase();
1346    ///
1347    /// assert!(lowercase.contains('a'));
1348    /// assert!(!lowercase.contains('A'));
1349    /// ```
1350
1351    pub const fn lowercase() => SINGLETON_PROPS_LOWER_V1;
1352    pub fn load_lowercase();
1353}
1354
1355make_code_point_set_property! {
1356    property: "Math";
1357    marker: MathProperty;
1358    keyed_data_marker: MathV1Marker;
1359    func:
1360    /// Characters used in mathematical notation
1361    ///
1362    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1363    ///
1364    /// [📚 Help choosing a constructor](icu_provider::constructors)
1365    ///
1366    /// # Example
1367    ///
1368    /// ```
1369    /// use icu::properties::sets;
1370    ///
1371    /// let math = sets::math();
1372    ///
1373    /// assert!(math.contains('='));
1374    /// assert!(math.contains('+'));
1375    /// assert!(!math.contains('-'));
1376    /// assert!(math.contains('−'));  // U+2212 MINUS SIGN
1377    /// assert!(!math.contains('/'));
1378    /// assert!(math.contains('∕'));  // U+2215 DIVISION SLASH
1379    /// ```
1380
1381    pub const fn math() => SINGLETON_PROPS_MATH_V1;
1382    pub fn load_math();
1383}
1384
1385make_code_point_set_property! {
1386    property: "Noncharacter_Code_Point";
1387    marker: NoncharacterCodePointProperty;
1388    keyed_data_marker: NoncharacterCodePointV1Marker;
1389    func:
1390    /// Code points permanently reserved for internal use
1391    ///
1392    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1393    ///
1394    /// [📚 Help choosing a constructor](icu_provider::constructors)
1395    ///
1396    /// # Example
1397    ///
1398    /// ```
1399    /// use icu::properties::sets;
1400    ///
1401    /// let noncharacter_code_point = sets::noncharacter_code_point();
1402    ///
1403    /// assert!(noncharacter_code_point.contains32(0xFDD0));
1404    /// assert!(noncharacter_code_point.contains32(0xFFFF));
1405    /// assert!(!noncharacter_code_point.contains32(0x10000));
1406    /// ```
1407
1408    pub const fn noncharacter_code_point() => SINGLETON_PROPS_NCHAR_V1;
1409    pub fn load_noncharacter_code_point();
1410}
1411
1412make_code_point_set_property! {
1413    property: "NFC_Inert";
1414    marker: NfcInertProperty;
1415    keyed_data_marker: NfcInertV1Marker;
1416    func:
1417    /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters
1418
1419    pub const fn nfc_inert() => SINGLETON_PROPS_NFCINERT_V1;
1420    pub fn load_nfc_inert();
1421}
1422
1423make_code_point_set_property! {
1424    property: "NFD_Inert";
1425    marker: NfdInertProperty;
1426    keyed_data_marker: NfdInertV1Marker;
1427    func:
1428    /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters
1429
1430    pub const fn nfd_inert() => SINGLETON_PROPS_NFDINERT_V1;
1431    pub fn load_nfd_inert();
1432}
1433
1434make_code_point_set_property! {
1435    property: "NFKC_Inert";
1436    marker: NfkcInertProperty;
1437    keyed_data_marker: NfkcInertV1Marker;
1438    func:
1439    /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters
1440
1441    pub const fn nfkc_inert() => SINGLETON_PROPS_NFKCINERT_V1;
1442    pub fn load_nfkc_inert();
1443}
1444
1445make_code_point_set_property! {
1446    property: "NFKD_Inert";
1447    marker: NfkdInertProperty;
1448    keyed_data_marker: NfkdInertV1Marker;
1449    func:
1450    /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters
1451
1452    pub const fn nfkd_inert() => SINGLETON_PROPS_NFKDINERT_V1;
1453    pub fn load_nfkd_inert();
1454}
1455
1456make_code_point_set_property! {
1457    property: "Pattern_Syntax";
1458    marker: PatternSyntaxProperty;
1459    keyed_data_marker: PatternSyntaxV1Marker;
1460    func:
1461    /// Characters used as syntax in patterns (such as regular expressions). See [`Unicode
1462    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
1463    /// details.
1464    ///
1465    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1466    ///
1467    /// [📚 Help choosing a constructor](icu_provider::constructors)
1468    ///
1469    /// # Example
1470    ///
1471    /// ```
1472    /// use icu::properties::sets;
1473    ///
1474    /// let pattern_syntax = sets::pattern_syntax();
1475    ///
1476    /// assert!(pattern_syntax.contains('{'));
1477    /// assert!(pattern_syntax.contains('⇒'));  // U+21D2 RIGHTWARDS DOUBLE ARROW
1478    /// assert!(!pattern_syntax.contains('0'));
1479    /// ```
1480
1481    pub const fn pattern_syntax() => SINGLETON_PROPS_PAT_SYN_V1;
1482    pub fn load_pattern_syntax();
1483}
1484
1485make_code_point_set_property! {
1486    property: "Pattern_White_Space";
1487    marker: PatternWhiteSpaceProperty;
1488    keyed_data_marker: PatternWhiteSpaceV1Marker;
1489    func:
1490    /// Characters used as whitespace in patterns (such as regular expressions).  See
1491    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
1492    /// more details.
1493    ///
1494    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1495    ///
1496    /// [📚 Help choosing a constructor](icu_provider::constructors)
1497    ///
1498    /// # Example
1499    ///
1500    /// ```
1501    /// use icu::properties::sets;
1502    ///
1503    /// let pattern_white_space = sets::pattern_white_space();
1504    ///
1505    /// assert!(pattern_white_space.contains(' '));
1506    /// assert!(pattern_white_space.contains32(0x2029));  // PARAGRAPH SEPARATOR
1507    /// assert!(pattern_white_space.contains32(0x000A));  // NEW LINE
1508    /// assert!(!pattern_white_space.contains32(0x00A0));  // NO-BREAK SPACE
1509    /// ```
1510
1511    pub const fn pattern_white_space() => SINGLETON_PROPS_PAT_WS_V1;
1512    pub fn load_pattern_white_space();
1513}
1514
1515make_code_point_set_property! {
1516    property: "Prepended_Concatenation_Mark";
1517    marker: PrependedConcatenationMarkProperty;
1518    keyed_data_marker: PrependedConcatenationMarkV1Marker;
1519    func:
1520    /// A small class of visible format controls, which precede and then span a sequence of
1521    /// other characters, usually digits.
1522
1523    pub const fn prepended_concatenation_mark() => SINGLETON_PROPS_PCM_V1;
1524    pub fn load_prepended_concatenation_mark();
1525}
1526
1527make_code_point_set_property! {
1528    property: "Print";
1529    marker: PrintProperty;
1530    keyed_data_marker: PrintV1Marker;
1531    func:
1532    /// Printable characters (visible characters and whitespace).
1533    /// This is defined for POSIX compatibility.
1534
1535    pub const fn print() => SINGLETON_PROPS_PRINT_V1;
1536    pub fn load_print();
1537}
1538
1539make_code_point_set_property! {
1540    property: "Quotation_Mark";
1541    marker: QuotationMarkProperty;
1542    keyed_data_marker: QuotationMarkV1Marker;
1543    func:
1544    /// Punctuation characters that function as quotation marks.
1545    ///
1546    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1547    ///
1548    /// [📚 Help choosing a constructor](icu_provider::constructors)
1549    ///
1550    /// # Example
1551    ///
1552    /// ```
1553    /// use icu::properties::sets;
1554    ///
1555    /// let quotation_mark = sets::quotation_mark();
1556    ///
1557    /// assert!(quotation_mark.contains('\''));
1558    /// assert!(quotation_mark.contains('„'));  // U+201E DOUBLE LOW-9 QUOTATION MARK
1559    /// assert!(!quotation_mark.contains('<'));
1560    /// ```
1561
1562    pub const fn quotation_mark() => SINGLETON_PROPS_QMARK_V1;
1563    pub fn load_quotation_mark();
1564}
1565
1566make_code_point_set_property! {
1567    property: "Radical";
1568    marker: RadicalProperty;
1569    keyed_data_marker: RadicalV1Marker;
1570    func:
1571    /// Characters used in the definition of Ideographic Description Sequences
1572    ///
1573    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1574    ///
1575    /// [📚 Help choosing a constructor](icu_provider::constructors)
1576    ///
1577    /// # Example
1578    ///
1579    /// ```
1580    /// use icu::properties::sets;
1581    ///
1582    /// let radical = sets::radical();
1583    ///
1584    /// assert!(radical.contains('⺆'));  // U+2E86 CJK RADICAL BOX
1585    /// assert!(!radical.contains('丹'));  // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
1586    /// ```
1587
1588    pub const fn radical() => SINGLETON_PROPS_RADICAL_V1;
1589    pub fn load_radical();
1590}
1591
1592make_code_point_set_property! {
1593    property: "Regional_Indicator";
1594    marker: RegionalIndicatorProperty;
1595    keyed_data_marker: RegionalIndicatorV1Marker;
1596    func:
1597    /// Regional indicator characters, U+1F1E6..U+1F1FF
1598    ///
1599    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1600    ///
1601    /// [📚 Help choosing a constructor](icu_provider::constructors)
1602    ///
1603    /// # Example
1604    ///
1605    /// ```
1606    /// use icu::properties::sets;
1607    ///
1608    /// let regional_indicator = sets::regional_indicator();
1609    ///
1610    /// assert!(regional_indicator.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
1611    /// assert!(!regional_indicator.contains('Ⓣ'));  // U+24C9 CIRCLED LATIN CAPITAL LETTER T
1612    /// assert!(!regional_indicator.contains('T'));
1613    /// ```
1614
1615    pub const fn regional_indicator() => SINGLETON_PROPS_RI_V1;
1616    pub fn load_regional_indicator();
1617}
1618
1619make_code_point_set_property! {
1620    property: "Soft_Dotted";
1621    marker: SoftDottedProperty;
1622    keyed_data_marker: SoftDottedV1Marker;
1623    func:
1624    /// Characters with a "soft dot", like i or j. An accent placed on these characters causes
1625    /// the dot to disappear.
1626    ///
1627    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1628    ///
1629    /// [📚 Help choosing a constructor](icu_provider::constructors)
1630    ///
1631    /// # Example
1632    ///
1633    /// ```
1634    /// use icu::properties::sets;
1635    ///
1636    /// let soft_dotted = sets::soft_dotted();
1637    ///
1638    /// assert!(soft_dotted.contains('і'));  //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
1639    /// assert!(!soft_dotted.contains('ı'));  // U+0131 LATIN SMALL LETTER DOTLESS I
1640    /// ```
1641
1642    pub const fn soft_dotted() => SINGLETON_PROPS_SD_V1;
1643    pub fn load_soft_dotted();
1644}
1645
1646make_code_point_set_property! {
1647    property: "Segment_Starter";
1648    marker: SegmentStarterProperty;
1649    keyed_data_marker: SegmentStarterV1Marker;
1650    func:
1651    /// Characters that are starters in terms of Unicode normalization and combining character
1652    /// sequences
1653
1654    pub const fn segment_starter() => SINGLETON_PROPS_SEGSTART_V1;
1655    pub fn load_segment_starter();
1656}
1657
1658make_code_point_set_property! {
1659    property: "Case_Sensitive";
1660    marker: CaseSensitiveProperty;
1661    keyed_data_marker: CaseSensitiveV1Marker;
1662    func:
1663    /// Characters that are either the source of a case mapping or in the target of a case
1664    /// mapping
1665
1666    pub const fn case_sensitive() => SINGLETON_PROPS_SENSITIVE_V1;
1667    pub fn load_case_sensitive();
1668}
1669
1670make_code_point_set_property! {
1671    property: "Sentence_Terminal";
1672    marker: SentenceTerminalProperty;
1673    keyed_data_marker: SentenceTerminalV1Marker;
1674    func:
1675    /// Punctuation characters that generally mark the end of sentences
1676    ///
1677    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1678    ///
1679    /// [📚 Help choosing a constructor](icu_provider::constructors)
1680    ///
1681    /// # Example
1682    ///
1683    /// ```
1684    /// use icu::properties::sets;
1685    ///
1686    /// let sentence_terminal = sets::sentence_terminal();
1687    ///
1688    /// assert!(sentence_terminal.contains('.'));
1689    /// assert!(sentence_terminal.contains('?'));
1690    /// assert!(sentence_terminal.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
1691    /// assert!(!sentence_terminal.contains(','));
1692    /// assert!(!sentence_terminal.contains('¿'));  // U+00BF INVERTED QUESTION MARK
1693    /// ```
1694
1695    pub const fn sentence_terminal() => SINGLETON_PROPS_STERM_V1;
1696    pub fn load_sentence_terminal();
1697}
1698
1699make_code_point_set_property! {
1700    property: "Terminal_Punctuation";
1701    marker: TerminalPunctuationProperty;
1702    keyed_data_marker: TerminalPunctuationV1Marker;
1703    func:
1704    /// Punctuation characters that generally mark the end of textual units
1705    ///
1706    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1707    ///
1708    /// [📚 Help choosing a constructor](icu_provider::constructors)
1709    ///
1710    /// # Example
1711    ///
1712    /// ```
1713    /// use icu::properties::sets;
1714    ///
1715    /// let terminal_punctuation = sets::terminal_punctuation();
1716    ///
1717    /// assert!(terminal_punctuation.contains('.'));
1718    /// assert!(terminal_punctuation.contains('?'));
1719    /// assert!(terminal_punctuation.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
1720    /// assert!(terminal_punctuation.contains(','));
1721    /// assert!(!terminal_punctuation.contains('¿'));  // U+00BF INVERTED QUESTION MARK
1722    /// ```
1723
1724    pub const fn terminal_punctuation() => SINGLETON_PROPS_TERM_V1;
1725    pub fn load_terminal_punctuation();
1726}
1727
1728make_code_point_set_property! {
1729    property: "Unified_Ideograph";
1730    marker: UnifiedIdeographProperty;
1731    keyed_data_marker: UnifiedIdeographV1Marker;
1732    func:
1733    /// A property which specifies the exact set of Unified CJK Ideographs in the standard
1734    ///
1735    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1736    ///
1737    /// [📚 Help choosing a constructor](icu_provider::constructors)
1738    ///
1739    /// # Example
1740    ///
1741    /// ```
1742    /// use icu::properties::sets;
1743    ///
1744    /// let unified_ideograph = sets::unified_ideograph();
1745    ///
1746    /// assert!(unified_ideograph.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
1747    /// assert!(unified_ideograph.contains('木'));  // U+6728 CJK UNIFIED IDEOGRAPH-6728
1748    /// assert!(!unified_ideograph.contains('𛅸'));  // U+1B178 NUSHU CHARACTER-1B178
1749    /// ```
1750
1751    pub const fn unified_ideograph() => SINGLETON_PROPS_UIDEO_V1;
1752    pub fn load_unified_ideograph();
1753}
1754
1755make_code_point_set_property! {
1756    property: "Uppercase";
1757    marker: UppercaseProperty;
1758    keyed_data_marker: UppercaseV1Marker;
1759    func:
1760    /// Uppercase characters
1761    ///
1762    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1763    ///
1764    /// [📚 Help choosing a constructor](icu_provider::constructors)
1765    ///
1766    /// # Example
1767    ///
1768    /// ```
1769    /// use icu::properties::sets;
1770    ///
1771    /// let uppercase = sets::uppercase();
1772    ///
1773    /// assert!(uppercase.contains('U'));
1774    /// assert!(!uppercase.contains('u'));
1775    /// ```
1776
1777    pub const fn uppercase() => SINGLETON_PROPS_UPPER_V1;
1778    pub fn load_uppercase();
1779}
1780
1781make_code_point_set_property! {
1782    property: "Variation_Selector";
1783    marker: VariationSelectorProperty;
1784    keyed_data_marker: VariationSelectorV1Marker;
1785    func:
1786    /// Characters that are Variation Selectors.
1787    ///
1788    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1789    ///
1790    /// [📚 Help choosing a constructor](icu_provider::constructors)
1791    ///
1792    /// # Example
1793    ///
1794    /// ```
1795    /// use icu::properties::sets;
1796    ///
1797    /// let variation_selector = sets::variation_selector();
1798    ///
1799    /// assert!(variation_selector.contains32(0x180D));  // MONGOLIAN FREE VARIATION SELECTOR THREE
1800    /// assert!(!variation_selector.contains32(0x303E));  // IDEOGRAPHIC VARIATION INDICATOR
1801    /// assert!(variation_selector.contains32(0xFE0F));  // VARIATION SELECTOR-16
1802    /// assert!(!variation_selector.contains32(0xFE10));  // PRESENTATION FORM FOR VERTICAL COMMA
1803    /// assert!(variation_selector.contains32(0xE01EF));  // VARIATION SELECTOR-256
1804    /// ```
1805
1806    pub const fn variation_selector() => SINGLETON_PROPS_VS_V1;
1807    pub fn load_variation_selector();
1808}
1809
1810make_code_point_set_property! {
1811    property: "White_Space";
1812    marker: WhiteSpaceProperty;
1813    keyed_data_marker: WhiteSpaceV1Marker;
1814    func:
1815    /// Spaces, separator characters and other control characters which should be treated by
1816    /// programming languages as "white space" for the purpose of parsing elements
1817    ///
1818    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1819    ///
1820    /// [📚 Help choosing a constructor](icu_provider::constructors)
1821    ///
1822    /// # Example
1823    ///
1824    /// ```
1825    /// use icu::properties::sets;
1826    ///
1827    /// let white_space = sets::white_space();
1828    ///
1829    /// assert!(white_space.contains(' '));
1830    /// assert!(white_space.contains32(0x000A));  // NEW LINE
1831    /// assert!(white_space.contains32(0x00A0));  // NO-BREAK SPACE
1832    /// assert!(!white_space.contains32(0x200B));  // ZERO WIDTH SPACE
1833    /// ```
1834
1835    pub const fn white_space() => SINGLETON_PROPS_WSPACE_V1;
1836    pub fn load_white_space();
1837}
1838
1839make_code_point_set_property! {
1840    property: "Xdigit";
1841    marker: XdigitProperty;
1842    keyed_data_marker: XdigitV1Marker;
1843    func:
1844    /// Hexadecimal digits
1845    /// This is defined for POSIX compatibility.
1846
1847    pub const fn xdigit() => SINGLETON_PROPS_XDIGIT_V1;
1848    pub fn load_xdigit();
1849}
1850
1851make_code_point_set_property! {
1852    property: "XID_Continue";
1853    marker: XidContinueProperty;
1854    keyed_data_marker: XidContinueV1Marker;
1855    func:
1856    /// Characters that can come after the first character in an identifier.  See [`Unicode Standard Annex
1857    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
1858    ///
1859    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1860    ///
1861    /// [📚 Help choosing a constructor](icu_provider::constructors)
1862    ///
1863    /// # Example
1864    ///
1865    /// ```
1866    /// use icu::properties::sets;
1867    ///
1868    /// let xid_continue = sets::xid_continue();
1869    ///
1870    /// assert!(xid_continue.contains('x'));
1871    /// assert!(xid_continue.contains('1'));
1872    /// assert!(xid_continue.contains('_'));
1873    /// assert!(xid_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
1874    /// assert!(!xid_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
1875    /// assert!(!xid_continue.contains32(0xFC5E));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
1876    /// ```
1877
1878    pub const fn xid_continue() => SINGLETON_PROPS_XIDC_V1;
1879    pub fn load_xid_continue();
1880}
1881
1882make_code_point_set_property! {
1883    property: "XID_Start";
1884    marker: XidStartProperty;
1885    keyed_data_marker: XidStartV1Marker;
1886    func:
1887    /// Characters that can begin an identifier. See [`Unicode
1888    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
1889    /// details.
1890    ///
1891    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1892    ///
1893    /// [📚 Help choosing a constructor](icu_provider::constructors)
1894    ///
1895    /// # Example
1896    ///
1897    /// ```
1898    /// use icu::properties::sets;
1899    ///
1900    /// let xid_start = sets::xid_start();
1901    ///
1902    /// assert!(xid_start.contains('x'));
1903    /// assert!(!xid_start.contains('1'));
1904    /// assert!(!xid_start.contains('_'));
1905    /// assert!(xid_start.contains('ߝ'));  // U+07DD NKO LETTER FA
1906    /// assert!(!xid_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
1907    /// assert!(!xid_start.contains32(0xFC5E));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
1908    /// ```
1909
1910    pub const fn xid_start() => SINGLETON_PROPS_XIDS_V1;
1911    pub fn load_xid_start();
1912}
1913
1914//
1915// Binary property getter fns
1916// (data as sets of strings + code points)
1917//
1918
1919macro_rules! make_unicode_set_property {
1920    (
1921        // currently unused
1922        property: $property:expr;
1923        // currently unused
1924        marker: $marker_name:ident;
1925        keyed_data_marker: $keyed_data_marker:ty;
1926        func:
1927        $(#[$doc:meta])+
1928        $cvis:vis const fn $constname:ident() => $singleton:ident;
1929        $vis:vis fn $funcname:ident();
1930    ) => {
1931        #[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")]
1932        $vis fn $funcname(
1933            provider: &(impl DataProvider<$keyed_data_marker> + ?Sized)
1934        ) -> Result<UnicodeSetData, PropertiesError> {
1935            Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map(UnicodeSetData::from_data)?)
1936        }
1937        $(#[$doc])*
1938        #[cfg(feature = "compiled_data")]
1939        $cvis const fn $constname() -> UnicodeSetDataBorrowed<'static> {
1940            UnicodeSetDataBorrowed {
1941                set: crate::provider::Baked::$singleton
1942            }
1943        }
1944    }
1945}
1946
1947make_unicode_set_property! {
1948    property: "Basic_Emoji";
1949    marker: BasicEmojiProperty;
1950    keyed_data_marker: BasicEmojiV1Marker;
1951    func:
1952    /// Characters and character sequences intended for general-purpose, independent, direct input.
1953    /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
1954    /// details.
1955    ///
1956    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
1957    ///
1958    /// [📚 Help choosing a constructor](icu_provider::constructors)
1959    ///
1960    /// # Example
1961    ///
1962    /// ```
1963    /// use icu::properties::sets;
1964    ///
1965    /// let basic_emoji = sets::basic_emoji();
1966    ///
1967    /// assert!(!basic_emoji.contains32(0x0020));
1968    /// assert!(!basic_emoji.contains_char('\n'));
1969    /// assert!(basic_emoji.contains_char('🦃')); // U+1F983 TURKEY
1970    /// assert!(basic_emoji.contains("\u{1F983}"));
1971    /// assert!(basic_emoji.contains("\u{1F6E4}\u{FE0F}")); // railway track
1972    /// assert!(!basic_emoji.contains("\u{0033}\u{FE0F}\u{20E3}"));  // Emoji_Keycap_Sequence, keycap 3
1973    /// ```
1974    pub const fn basic_emoji() => SINGLETON_PROPS_BASIC_EMOJI_V1;
1975    pub fn load_basic_emoji();
1976}
1977
1978//
1979// Enumerated property getter fns
1980//
1981
1982/// A version of [`for_general_category_group()`] that uses custom data provided by a [`DataProvider`].
1983///
1984/// [📚 Help choosing a constructor](icu_provider::constructors)
1985pub fn load_for_general_category_group(
1986    provider: &(impl DataProvider<GeneralCategoryV1Marker> + ?Sized),
1987    enum_val: GeneralCategoryGroup,
1988) -> Result<CodePointSetData, PropertiesError> {
1989    let gc_map_payload = maps::load_general_category(provider)?;
1990    let gc_map = gc_map_payload.as_borrowed();
1991    let matching_gc_ranges = gc_map
1992        .iter_ranges()
1993        .filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0)
1994        .map(|cpm_range| cpm_range.range);
1995    let set = CodePointInversionList::from_iter(matching_gc_ranges);
1996    Ok(CodePointSetData::from_code_point_inversion_list(set))
1997}
1998
1999/// Return a [`CodePointSetData`] for a value or a grouping of values of the General_Category property. See [`GeneralCategoryGroup`].
2000///
2001/// ✨ *Enabled with the `compiled_data` Cargo feature.*
2002///
2003/// [📚 Help choosing a constructor](icu_provider::constructors)
2004#[cfg(feature = "compiled_data")]
2005pub fn for_general_category_group(enum_val: GeneralCategoryGroup) -> CodePointSetData {
2006    let matching_gc_ranges = maps::general_category()
2007        .iter_ranges()
2008        .filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0)
2009        .map(|cpm_range| cpm_range.range);
2010    let set = CodePointInversionList::from_iter(matching_gc_ranges);
2011    CodePointSetData::from_code_point_inversion_list(set)
2012}
2013
2014/// Returns a type capable of looking up values for a property specified as a string, as long as it is a
2015/// [binary property listed in ECMA-262][ecma], using strict matching on the names in the spec.
2016///
2017/// This handles every property required by ECMA-262 `/u` regular expressions, except for:
2018///
2019/// - `Script` and `General_Category`: handle these directly with [`maps::load_general_category()`] and
2020///    [`maps::load_script()`].
2021///    using property values parsed via [`GeneralCategory::get_name_to_enum_mapper()`] and [`Script::get_name_to_enum_mapper()`]
2022///    if necessary.
2023/// - `Script_Extensions`: handle this directly using APIs from [`crate::script`], like [`script::load_script_with_extensions_unstable()`]
2024/// - `General_Category` mask values: Handle this alongside `General_Category` using [`GeneralCategoryGroup`],
2025///    using property values parsed via [`GeneralCategoryGroup::get_name_to_enum_mapper()`] if necessary
2026/// - `Assigned`, `All`, and `ASCII` pseudoproperties: Handle these using their equivalent sets:
2027///    - `Any` can be expressed as the range `[\u{0}-\u{10FFFF}]`
2028///    - `Assigned` can be expressed as the inverse of the set `gc=Cn` (i.e., `\P{gc=Cn}`).
2029///    - `ASCII` can be expressed as the range `[\u{0}-\u{7F}]`
2030/// - `General_Category` property values can themselves be treated like properties using a shorthand in ECMA262,
2031///    simply create the corresponding `GeneralCategory` set.
2032///
2033/// ✨ *Enabled with the `compiled_data` Cargo feature.*
2034///
2035/// [📚 Help choosing a constructor](icu_provider::constructors)
2036///
2037/// ```
2038/// use icu::properties::sets;
2039///
2040/// let emoji = sets::load_for_ecma262("Emoji").expect("loading data failed");
2041///
2042/// assert!(emoji.contains('🔥')); // U+1F525 FIRE
2043/// assert!(!emoji.contains('V'));
2044/// ```
2045///
2046/// [ecma]: https://tc39.es/ecma262/#table-binary-unicode-properties
2047#[cfg(feature = "compiled_data")]
2048pub fn load_for_ecma262(name: &str) -> Result<CodePointSetDataBorrowed<'static>, PropertiesError> {
2049    use crate::runtime::UnicodeProperty;
2050
2051    let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) {
2052        prop
2053    } else {
2054        return Err(PropertiesError::UnexpectedPropertyName);
2055    };
2056    Ok(match prop {
2057        UnicodeProperty::AsciiHexDigit => ascii_hex_digit(),
2058        UnicodeProperty::Alphabetic => alphabetic(),
2059        UnicodeProperty::BidiControl => bidi_control(),
2060        UnicodeProperty::BidiMirrored => bidi_mirrored(),
2061        UnicodeProperty::CaseIgnorable => case_ignorable(),
2062        UnicodeProperty::Cased => cased(),
2063        UnicodeProperty::ChangesWhenCasefolded => changes_when_casefolded(),
2064        UnicodeProperty::ChangesWhenCasemapped => changes_when_casemapped(),
2065        UnicodeProperty::ChangesWhenLowercased => changes_when_lowercased(),
2066        UnicodeProperty::ChangesWhenNfkcCasefolded => changes_when_nfkc_casefolded(),
2067        UnicodeProperty::ChangesWhenTitlecased => changes_when_titlecased(),
2068        UnicodeProperty::ChangesWhenUppercased => changes_when_uppercased(),
2069        UnicodeProperty::Dash => dash(),
2070        UnicodeProperty::DefaultIgnorableCodePoint => default_ignorable_code_point(),
2071        UnicodeProperty::Deprecated => deprecated(),
2072        UnicodeProperty::Diacritic => diacritic(),
2073        UnicodeProperty::Emoji => emoji(),
2074        UnicodeProperty::EmojiComponent => emoji_component(),
2075        UnicodeProperty::EmojiModifier => emoji_modifier(),
2076        UnicodeProperty::EmojiModifierBase => emoji_modifier_base(),
2077        UnicodeProperty::EmojiPresentation => emoji_presentation(),
2078        UnicodeProperty::ExtendedPictographic => extended_pictographic(),
2079        UnicodeProperty::Extender => extender(),
2080        UnicodeProperty::GraphemeBase => grapheme_base(),
2081        UnicodeProperty::GraphemeExtend => grapheme_extend(),
2082        UnicodeProperty::HexDigit => hex_digit(),
2083        UnicodeProperty::IdsBinaryOperator => ids_binary_operator(),
2084        UnicodeProperty::IdsTrinaryOperator => ids_trinary_operator(),
2085        UnicodeProperty::IdContinue => id_continue(),
2086        UnicodeProperty::IdStart => id_start(),
2087        UnicodeProperty::Ideographic => ideographic(),
2088        UnicodeProperty::JoinControl => join_control(),
2089        UnicodeProperty::LogicalOrderException => logical_order_exception(),
2090        UnicodeProperty::Lowercase => lowercase(),
2091        UnicodeProperty::Math => math(),
2092        UnicodeProperty::NoncharacterCodePoint => noncharacter_code_point(),
2093        UnicodeProperty::PatternSyntax => pattern_syntax(),
2094        UnicodeProperty::PatternWhiteSpace => pattern_white_space(),
2095        UnicodeProperty::QuotationMark => quotation_mark(),
2096        UnicodeProperty::Radical => radical(),
2097        UnicodeProperty::RegionalIndicator => regional_indicator(),
2098        UnicodeProperty::SentenceTerminal => sentence_terminal(),
2099        UnicodeProperty::SoftDotted => soft_dotted(),
2100        UnicodeProperty::TerminalPunctuation => terminal_punctuation(),
2101        UnicodeProperty::UnifiedIdeograph => unified_ideograph(),
2102        UnicodeProperty::Uppercase => uppercase(),
2103        UnicodeProperty::VariationSelector => variation_selector(),
2104        UnicodeProperty::WhiteSpace => white_space(),
2105        UnicodeProperty::XidContinue => xid_continue(),
2106        UnicodeProperty::XidStart => xid_start(),
2107        _ => return Err(PropertiesError::UnexpectedPropertyName),
2108    })
2109}
2110
2111icu_provider::gen_any_buffer_data_constructors!(
2112    locale: skip,
2113    name: &str,
2114    result: Result<CodePointSetData, PropertiesError>,
2115    #[cfg(skip)]
2116    functions: [
2117        load_for_ecma262,
2118        load_for_ecma262_with_any_provider,
2119        load_for_ecma262_with_buffer_provider,
2120        load_for_ecma262_unstable,
2121    ]
2122);
2123
2124#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, load_for_ecma262)]
2125pub fn load_for_ecma262_unstable<P>(
2126    provider: &P,
2127    name: &str,
2128) -> Result<CodePointSetData, PropertiesError>
2129where
2130    P: ?Sized
2131        + DataProvider<AsciiHexDigitV1Marker>
2132        + DataProvider<AlphabeticV1Marker>
2133        + DataProvider<BidiControlV1Marker>
2134        + DataProvider<BidiMirroredV1Marker>
2135        + DataProvider<CaseIgnorableV1Marker>
2136        + DataProvider<CasedV1Marker>
2137        + DataProvider<ChangesWhenCasefoldedV1Marker>
2138        + DataProvider<ChangesWhenCasemappedV1Marker>
2139        + DataProvider<ChangesWhenLowercasedV1Marker>
2140        + DataProvider<ChangesWhenNfkcCasefoldedV1Marker>
2141        + DataProvider<ChangesWhenTitlecasedV1Marker>
2142        + DataProvider<ChangesWhenUppercasedV1Marker>
2143        + DataProvider<DashV1Marker>
2144        + DataProvider<DefaultIgnorableCodePointV1Marker>
2145        + DataProvider<DeprecatedV1Marker>
2146        + DataProvider<DiacriticV1Marker>
2147        + DataProvider<EmojiV1Marker>
2148        + DataProvider<EmojiComponentV1Marker>
2149        + DataProvider<EmojiModifierV1Marker>
2150        + DataProvider<EmojiModifierBaseV1Marker>
2151        + DataProvider<EmojiPresentationV1Marker>
2152        + DataProvider<ExtendedPictographicV1Marker>
2153        + DataProvider<ExtenderV1Marker>
2154        + DataProvider<GraphemeBaseV1Marker>
2155        + DataProvider<GraphemeExtendV1Marker>
2156        + DataProvider<HexDigitV1Marker>
2157        + DataProvider<IdsBinaryOperatorV1Marker>
2158        + DataProvider<IdsTrinaryOperatorV1Marker>
2159        + DataProvider<IdContinueV1Marker>
2160        + DataProvider<IdStartV1Marker>
2161        + DataProvider<IdeographicV1Marker>
2162        + DataProvider<JoinControlV1Marker>
2163        + DataProvider<LogicalOrderExceptionV1Marker>
2164        + DataProvider<LowercaseV1Marker>
2165        + DataProvider<MathV1Marker>
2166        + DataProvider<NoncharacterCodePointV1Marker>
2167        + DataProvider<PatternSyntaxV1Marker>
2168        + DataProvider<PatternWhiteSpaceV1Marker>
2169        + DataProvider<QuotationMarkV1Marker>
2170        + DataProvider<RadicalV1Marker>
2171        + DataProvider<RegionalIndicatorV1Marker>
2172        + DataProvider<SentenceTerminalV1Marker>
2173        + DataProvider<SoftDottedV1Marker>
2174        + DataProvider<TerminalPunctuationV1Marker>
2175        + DataProvider<UnifiedIdeographV1Marker>
2176        + DataProvider<UppercaseV1Marker>
2177        + DataProvider<VariationSelectorV1Marker>
2178        + DataProvider<WhiteSpaceV1Marker>
2179        + DataProvider<XidContinueV1Marker>
2180        + DataProvider<XidStartV1Marker>,
2181{
2182    use crate::runtime::UnicodeProperty;
2183
2184    let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) {
2185        prop
2186    } else {
2187        return Err(PropertiesError::UnexpectedPropertyName);
2188    };
2189    match prop {
2190        UnicodeProperty::AsciiHexDigit => load_ascii_hex_digit(provider),
2191        UnicodeProperty::Alphabetic => load_alphabetic(provider),
2192        UnicodeProperty::BidiControl => load_bidi_control(provider),
2193        UnicodeProperty::BidiMirrored => load_bidi_mirrored(provider),
2194        UnicodeProperty::CaseIgnorable => load_case_ignorable(provider),
2195        UnicodeProperty::Cased => load_cased(provider),
2196        UnicodeProperty::ChangesWhenCasefolded => load_changes_when_casefolded(provider),
2197        UnicodeProperty::ChangesWhenCasemapped => load_changes_when_casemapped(provider),
2198        UnicodeProperty::ChangesWhenLowercased => load_changes_when_lowercased(provider),
2199        UnicodeProperty::ChangesWhenNfkcCasefolded => load_changes_when_nfkc_casefolded(provider),
2200        UnicodeProperty::ChangesWhenTitlecased => load_changes_when_titlecased(provider),
2201        UnicodeProperty::ChangesWhenUppercased => load_changes_when_uppercased(provider),
2202        UnicodeProperty::Dash => load_dash(provider),
2203        UnicodeProperty::DefaultIgnorableCodePoint => load_default_ignorable_code_point(provider),
2204        UnicodeProperty::Deprecated => load_deprecated(provider),
2205        UnicodeProperty::Diacritic => load_diacritic(provider),
2206        UnicodeProperty::Emoji => load_emoji(provider),
2207        UnicodeProperty::EmojiComponent => load_emoji_component(provider),
2208        UnicodeProperty::EmojiModifier => load_emoji_modifier(provider),
2209        UnicodeProperty::EmojiModifierBase => load_emoji_modifier_base(provider),
2210        UnicodeProperty::EmojiPresentation => load_emoji_presentation(provider),
2211        UnicodeProperty::ExtendedPictographic => load_extended_pictographic(provider),
2212        UnicodeProperty::Extender => load_extender(provider),
2213        UnicodeProperty::GraphemeBase => load_grapheme_base(provider),
2214        UnicodeProperty::GraphemeExtend => load_grapheme_extend(provider),
2215        UnicodeProperty::HexDigit => load_hex_digit(provider),
2216        UnicodeProperty::IdsBinaryOperator => load_ids_binary_operator(provider),
2217        UnicodeProperty::IdsTrinaryOperator => load_ids_trinary_operator(provider),
2218        UnicodeProperty::IdContinue => load_id_continue(provider),
2219        UnicodeProperty::IdStart => load_id_start(provider),
2220        UnicodeProperty::Ideographic => load_ideographic(provider),
2221        UnicodeProperty::JoinControl => load_join_control(provider),
2222        UnicodeProperty::LogicalOrderException => load_logical_order_exception(provider),
2223        UnicodeProperty::Lowercase => load_lowercase(provider),
2224        UnicodeProperty::Math => load_math(provider),
2225        UnicodeProperty::NoncharacterCodePoint => load_noncharacter_code_point(provider),
2226        UnicodeProperty::PatternSyntax => load_pattern_syntax(provider),
2227        UnicodeProperty::PatternWhiteSpace => load_pattern_white_space(provider),
2228        UnicodeProperty::QuotationMark => load_quotation_mark(provider),
2229        UnicodeProperty::Radical => load_radical(provider),
2230        UnicodeProperty::RegionalIndicator => load_regional_indicator(provider),
2231        UnicodeProperty::SentenceTerminal => load_sentence_terminal(provider),
2232        UnicodeProperty::SoftDotted => load_soft_dotted(provider),
2233        UnicodeProperty::TerminalPunctuation => load_terminal_punctuation(provider),
2234        UnicodeProperty::UnifiedIdeograph => load_unified_ideograph(provider),
2235        UnicodeProperty::Uppercase => load_uppercase(provider),
2236        UnicodeProperty::VariationSelector => load_variation_selector(provider),
2237        UnicodeProperty::WhiteSpace => load_white_space(provider),
2238        UnicodeProperty::XidContinue => load_xid_continue(provider),
2239        UnicodeProperty::XidStart => load_xid_start(provider),
2240        _ => Err(PropertiesError::UnexpectedPropertyName),
2241    }
2242}
2243
2244#[cfg(test)]
2245mod tests {
2246
2247    #[test]
2248    fn test_general_category() {
2249        use icu::properties::sets;
2250        use icu::properties::GeneralCategoryGroup;
2251
2252        let digits_data = sets::for_general_category_group(GeneralCategoryGroup::Number);
2253        let digits = digits_data.as_borrowed();
2254
2255        assert!(digits.contains('5'));
2256        assert!(digits.contains('\u{0665}')); // U+0665 ARABIC-INDIC DIGIT FIVE
2257        assert!(digits.contains('\u{096b}')); // U+0969 DEVANAGARI DIGIT FIVE
2258
2259        assert!(!digits.contains('A'));
2260    }
2261
2262    #[test]
2263    fn test_script() {
2264        use icu::properties::maps;
2265        use icu::properties::Script;
2266
2267        let thai_data = maps::script().get_set_for_value(Script::Thai);
2268        let thai = thai_data.as_borrowed();
2269
2270        assert!(thai.contains('\u{0e01}')); // U+0E01 THAI CHARACTER KO KAI
2271        assert!(thai.contains('\u{0e50}')); // U+0E50 THAI DIGIT ZERO
2272
2273        assert!(!thai.contains('A'));
2274        assert!(!thai.contains('\u{0e3f}')); // U+0E50 THAI CURRENCY SYMBOL BAHT
2275    }
2276
2277    #[test]
2278    fn test_gc_groupings() {
2279        use icu::properties::{maps, sets};
2280        use icu::properties::{GeneralCategory, GeneralCategoryGroup};
2281        use icu_collections::codepointinvlist::CodePointInversionListBuilder;
2282
2283        let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| {
2284            let category_set = sets::for_general_category_group(category);
2285            let category_set = category_set
2286                .as_code_point_inversion_list()
2287                .expect("The data should be valid");
2288
2289            let mut builder = CodePointInversionListBuilder::new();
2290            for subcategory in subcategories {
2291                let gc_set_data = &maps::general_category().get_set_for_value(*subcategory);
2292                let gc_set = gc_set_data.as_borrowed();
2293                for range in gc_set.iter_ranges() {
2294                    builder.add_range32(&range);
2295                }
2296            }
2297            let combined_set = builder.build();
2298            println!("{category:?} {subcategories:?}");
2299            assert_eq!(
2300                category_set.get_inversion_list_vec(),
2301                combined_set.get_inversion_list_vec()
2302            );
2303        };
2304
2305        test_group(
2306            GeneralCategoryGroup::Letter,
2307            &[
2308                GeneralCategory::UppercaseLetter,
2309                GeneralCategory::LowercaseLetter,
2310                GeneralCategory::TitlecaseLetter,
2311                GeneralCategory::ModifierLetter,
2312                GeneralCategory::OtherLetter,
2313            ],
2314        );
2315        test_group(
2316            GeneralCategoryGroup::Other,
2317            &[
2318                GeneralCategory::Control,
2319                GeneralCategory::Format,
2320                GeneralCategory::Unassigned,
2321                GeneralCategory::PrivateUse,
2322                GeneralCategory::Surrogate,
2323            ],
2324        );
2325        test_group(
2326            GeneralCategoryGroup::Mark,
2327            &[
2328                GeneralCategory::SpacingMark,
2329                GeneralCategory::EnclosingMark,
2330                GeneralCategory::NonspacingMark,
2331            ],
2332        );
2333        test_group(
2334            GeneralCategoryGroup::Number,
2335            &[
2336                GeneralCategory::DecimalNumber,
2337                GeneralCategory::LetterNumber,
2338                GeneralCategory::OtherNumber,
2339            ],
2340        );
2341        test_group(
2342            GeneralCategoryGroup::Punctuation,
2343            &[
2344                GeneralCategory::ConnectorPunctuation,
2345                GeneralCategory::DashPunctuation,
2346                GeneralCategory::ClosePunctuation,
2347                GeneralCategory::FinalPunctuation,
2348                GeneralCategory::InitialPunctuation,
2349                GeneralCategory::OtherPunctuation,
2350                GeneralCategory::OpenPunctuation,
2351            ],
2352        );
2353        test_group(
2354            GeneralCategoryGroup::Symbol,
2355            &[
2356                GeneralCategory::CurrencySymbol,
2357                GeneralCategory::ModifierSymbol,
2358                GeneralCategory::MathSymbol,
2359                GeneralCategory::OtherSymbol,
2360            ],
2361        );
2362        test_group(
2363            GeneralCategoryGroup::Separator,
2364            &[
2365                GeneralCategory::LineSeparator,
2366                GeneralCategory::ParagraphSeparator,
2367                GeneralCategory::SpaceSeparator,
2368            ],
2369        );
2370    }
2371
2372    #[test]
2373    fn test_gc_surrogate() {
2374        use icu::properties::maps;
2375        use icu::properties::GeneralCategory;
2376
2377        let surrogates_data =
2378            maps::general_category().get_set_for_value(GeneralCategory::Surrogate);
2379        let surrogates = surrogates_data.as_borrowed();
2380
2381        assert!(surrogates.contains32(0xd800));
2382        assert!(surrogates.contains32(0xd900));
2383        assert!(surrogates.contains32(0xdfff));
2384
2385        assert!(!surrogates.contains('A'));
2386    }
2387}