icu_properties/
provider.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5// Provider structs must be stable
6#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
7
8//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
9//!
10//! <div class="stab unstable">
11//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
12//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
13//! to be stable, their Rust representation might not be. Use with caution.
14//! </div>
15//!
16//! Read more about data providers: [`icu_provider`]
17
18pub mod names;
19
20use crate::script::ScriptWithExt;
21use crate::Script;
22
23use core::ops::RangeInclusive;
24use core::str;
25use icu_collections::codepointinvlist::CodePointInversionList;
26use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
27use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue};
28use icu_provider::prelude::*;
29use icu_provider::{DataKeyMetadata, FallbackPriority};
30use zerofrom::ZeroFrom;
31
32use zerovec::{VarZeroVec, ZeroSlice, ZeroVecError};
33
34#[cfg(feature = "compiled_data")]
35#[derive(Debug)]
36/// Baked data
37///
38/// <div class="stab unstable">
39/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
40/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
41/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
42/// </div>
43pub struct Baked;
44
45#[cfg(feature = "compiled_data")]
46const _: () = {
47    pub mod icu {
48        pub use crate as properties;
49        pub use icu_collections as collections;
50        #[allow(unused_imports)] // baked data may or may not need this
51        pub use icu_locid_transform as locid_transform;
52    }
53    icu_properties_data::make_provider!(Baked);
54    icu_properties_data::impl_propnames_from_gcb_v1!(Baked);
55    icu_properties_data::impl_propnames_from_bc_v1!(Baked);
56    icu_properties_data::impl_propnames_from_ccc_v1!(Baked);
57    icu_properties_data::impl_propnames_from_ea_v1!(Baked);
58    icu_properties_data::impl_propnames_from_gc_v1!(Baked);
59    icu_properties_data::impl_propnames_from_gcm_v1!(Baked);
60    icu_properties_data::impl_propnames_from_hst_v1!(Baked);
61    icu_properties_data::impl_propnames_from_insc_v1!(Baked);
62    icu_properties_data::impl_propnames_from_jt_v1!(Baked);
63    icu_properties_data::impl_propnames_from_lb_v1!(Baked);
64    icu_properties_data::impl_propnames_from_sb_v1!(Baked);
65    icu_properties_data::impl_propnames_from_sc_v1!(Baked);
66    icu_properties_data::impl_propnames_from_wb_v1!(Baked);
67    icu_properties_data::impl_propnames_to_long_linear_bc_v1!(Baked);
68    icu_properties_data::impl_propnames_to_long_linear_ea_v1!(Baked);
69    icu_properties_data::impl_propnames_to_long_linear_gc_v1!(Baked);
70    icu_properties_data::impl_propnames_to_long_linear_gcb_v1!(Baked);
71    icu_properties_data::impl_propnames_to_long_linear_hst_v1!(Baked);
72    icu_properties_data::impl_propnames_to_long_linear_insc_v1!(Baked);
73    icu_properties_data::impl_propnames_to_long_linear_jt_v1!(Baked);
74    icu_properties_data::impl_propnames_to_long_linear_lb_v1!(Baked);
75    icu_properties_data::impl_propnames_to_long_linear_sb_v1!(Baked);
76    icu_properties_data::impl_propnames_to_long_linear_sc_v1!(Baked);
77    icu_properties_data::impl_propnames_to_long_linear_wb_v1!(Baked);
78    icu_properties_data::impl_propnames_to_long_sparse_ccc_v1!(Baked);
79    icu_properties_data::impl_propnames_to_short_linear_bc_v1!(Baked);
80    icu_properties_data::impl_propnames_to_short_linear_ea_v1!(Baked);
81    icu_properties_data::impl_propnames_to_short_linear_gc_v1!(Baked);
82    icu_properties_data::impl_propnames_to_short_linear_gcb_v1!(Baked);
83    icu_properties_data::impl_propnames_to_short_linear_hst_v1!(Baked);
84    icu_properties_data::impl_propnames_to_short_linear_insc_v1!(Baked);
85    icu_properties_data::impl_propnames_to_short_linear_jt_v1!(Baked);
86    icu_properties_data::impl_propnames_to_short_linear_lb_v1!(Baked);
87    icu_properties_data::impl_propnames_to_short_linear_sb_v1!(Baked);
88    icu_properties_data::impl_propnames_to_short_linear_wb_v1!(Baked);
89    icu_properties_data::impl_propnames_to_short_linear4_sc_v1!(Baked);
90    icu_properties_data::impl_propnames_to_short_sparse_ccc_v1!(Baked);
91    icu_properties_data::impl_props_ahex_v1!(Baked);
92    icu_properties_data::impl_props_alnum_v1!(Baked);
93    icu_properties_data::impl_props_alpha_v1!(Baked);
94    icu_properties_data::impl_props_basic_emoji_v1!(Baked);
95    icu_properties_data::impl_props_bc_v1!(Baked);
96    icu_properties_data::impl_props_bidi_c_v1!(Baked);
97    icu_properties_data::impl_props_bidi_m_v1!(Baked);
98    icu_properties_data::impl_props_bidiauxiliaryprops_v1!(Baked);
99    icu_properties_data::impl_props_blank_v1!(Baked);
100    icu_properties_data::impl_props_cased_v1!(Baked);
101    icu_properties_data::impl_props_ccc_v1!(Baked);
102    icu_properties_data::impl_props_ci_v1!(Baked);
103    icu_properties_data::impl_props_comp_ex_v1!(Baked);
104    icu_properties_data::impl_props_cwcf_v1!(Baked);
105    icu_properties_data::impl_props_cwcm_v1!(Baked);
106    icu_properties_data::impl_props_cwkcf_v1!(Baked);
107    icu_properties_data::impl_props_cwl_v1!(Baked);
108    icu_properties_data::impl_props_cwt_v1!(Baked);
109    icu_properties_data::impl_props_cwu_v1!(Baked);
110    icu_properties_data::impl_props_dash_v1!(Baked);
111    icu_properties_data::impl_props_dep_v1!(Baked);
112    icu_properties_data::impl_props_di_v1!(Baked);
113    icu_properties_data::impl_props_dia_v1!(Baked);
114    icu_properties_data::impl_props_ea_v1!(Baked);
115    icu_properties_data::impl_props_ebase_v1!(Baked);
116    icu_properties_data::impl_props_ecomp_v1!(Baked);
117    icu_properties_data::impl_props_emod_v1!(Baked);
118    icu_properties_data::impl_props_emoji_v1!(Baked);
119    icu_properties_data::impl_props_epres_v1!(Baked);
120    icu_properties_data::impl_props_exemplarchars_auxiliary_v1!(Baked);
121    icu_properties_data::impl_props_exemplarchars_index_v1!(Baked);
122    icu_properties_data::impl_props_exemplarchars_main_v1!(Baked);
123    icu_properties_data::impl_props_exemplarchars_numbers_v1!(Baked);
124    icu_properties_data::impl_props_exemplarchars_punctuation_v1!(Baked);
125    icu_properties_data::impl_props_ext_v1!(Baked);
126    icu_properties_data::impl_props_extpict_v1!(Baked);
127    icu_properties_data::impl_props_gc_v1!(Baked);
128    icu_properties_data::impl_props_gcb_v1!(Baked);
129    icu_properties_data::impl_props_gr_base_v1!(Baked);
130    icu_properties_data::impl_props_gr_ext_v1!(Baked);
131    icu_properties_data::impl_props_gr_link_v1!(Baked);
132    icu_properties_data::impl_props_graph_v1!(Baked);
133    icu_properties_data::impl_props_hex_v1!(Baked);
134    icu_properties_data::impl_props_hst_v1!(Baked);
135    icu_properties_data::impl_props_hyphen_v1!(Baked);
136    icu_properties_data::impl_props_idc_v1!(Baked);
137    icu_properties_data::impl_props_ideo_v1!(Baked);
138    icu_properties_data::impl_props_ids_v1!(Baked);
139    icu_properties_data::impl_props_idsb_v1!(Baked);
140    icu_properties_data::impl_props_idst_v1!(Baked);
141    icu_properties_data::impl_props_insc_v1!(Baked);
142    icu_properties_data::impl_props_join_c_v1!(Baked);
143    icu_properties_data::impl_props_jt_v1!(Baked);
144    icu_properties_data::impl_props_lb_v1!(Baked);
145    icu_properties_data::impl_props_loe_v1!(Baked);
146    icu_properties_data::impl_props_lower_v1!(Baked);
147    icu_properties_data::impl_props_math_v1!(Baked);
148    icu_properties_data::impl_props_nchar_v1!(Baked);
149    icu_properties_data::impl_props_nfcinert_v1!(Baked);
150    icu_properties_data::impl_props_nfdinert_v1!(Baked);
151    icu_properties_data::impl_props_nfkcinert_v1!(Baked);
152    icu_properties_data::impl_props_nfkdinert_v1!(Baked);
153    icu_properties_data::impl_props_pat_syn_v1!(Baked);
154    icu_properties_data::impl_props_pat_ws_v1!(Baked);
155    icu_properties_data::impl_props_pcm_v1!(Baked);
156    icu_properties_data::impl_props_print_v1!(Baked);
157    icu_properties_data::impl_props_qmark_v1!(Baked);
158    icu_properties_data::impl_props_radical_v1!(Baked);
159    icu_properties_data::impl_props_ri_v1!(Baked);
160    icu_properties_data::impl_props_sb_v1!(Baked);
161    icu_properties_data::impl_props_sc_v1!(Baked);
162    icu_properties_data::impl_props_scx_v1!(Baked);
163    icu_properties_data::impl_props_sd_v1!(Baked);
164    icu_properties_data::impl_props_segstart_v1!(Baked);
165    icu_properties_data::impl_props_sensitive_v1!(Baked);
166    icu_properties_data::impl_props_sterm_v1!(Baked);
167    icu_properties_data::impl_props_term_v1!(Baked);
168    icu_properties_data::impl_props_uideo_v1!(Baked);
169    icu_properties_data::impl_props_upper_v1!(Baked);
170    icu_properties_data::impl_props_vs_v1!(Baked);
171    icu_properties_data::impl_props_wb_v1!(Baked);
172    icu_properties_data::impl_props_wspace_v1!(Baked);
173    icu_properties_data::impl_props_xdigit_v1!(Baked);
174    icu_properties_data::impl_props_xidc_v1!(Baked);
175    icu_properties_data::impl_props_xids_v1!(Baked);
176};
177
178// include the specialized structs for the compact representation of Bidi property data
179pub mod bidi_data;
180
181/// A set of characters which share a particular property value.
182///
183/// This data enum is extensible, more backends may be added in the future.
184/// Old data can be used with newer code but not vice versa.
185///
186/// <div class="stab unstable">
187/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
188/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
189/// to be stable, their Rust representation might not be. Use with caution.
190/// </div>
191#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
192#[cfg_attr(
193    feature = "datagen", 
194    derive(serde::Serialize, databake::Bake),
195    databake(path = icu_properties::provider),
196)]
197#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
198#[non_exhaustive]
199pub enum PropertyCodePointSetV1<'data> {
200    /// The set of characters, represented as an inversion list
201    InversionList(#[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionList<'data>),
202    // new variants should go BELOW existing ones
203    // Serde serializes based on variant name and index in the enum
204    // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant
205}
206
207/// A map efficiently storing data about individual characters.
208///
209/// This data enum is extensible, more backends may be added in the future.
210/// Old data can be used with newer code but not vice versa.
211///
212/// <div class="stab unstable">
213/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
214/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
215/// to be stable, their Rust representation might not be. Use with caution.
216/// </div>
217#[derive(Clone, Debug, Eq, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
218#[cfg_attr(
219    feature = "datagen", 
220    derive(serde::Serialize, databake::Bake),
221    databake(path = icu_properties::provider),
222)]
223#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
224#[non_exhaustive]
225pub enum PropertyCodePointMapV1<'data, T: TrieValue> {
226    /// A codepoint trie storing the data
227    CodePointTrie(#[cfg_attr(feature = "serde", serde(borrow))] CodePointTrie<'data, T>),
228    // new variants should go BELOW existing ones
229    // Serde serializes based on variant name and index in the enum
230    // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant
231}
232
233/// A set of characters and strings which share a particular property value.
234///
235/// <div class="stab unstable">
236/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
237/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
238/// to be stable, their Rust representation might not be. Use with caution.
239/// </div>
240#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
241#[cfg_attr(
242    feature = "datagen", 
243    derive(serde::Serialize, databake::Bake),
244    databake(path = icu_properties::provider),
245)]
246#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
247#[non_exhaustive]
248pub enum PropertyUnicodeSetV1<'data> {
249    /// A set representing characters in an inversion list, and the strings in a list.
250    CPInversionListStrList(
251        #[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionListAndStringList<'data>,
252    ),
253    // new variants should go BELOW existing ones
254    // Serde serializes based on variant name and index in the enum
255    // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant
256}
257
258impl<'data> PropertyUnicodeSetV1<'data> {
259    #[inline]
260    pub(crate) fn contains(&self, s: &str) -> bool {
261        match *self {
262            Self::CPInversionListStrList(ref l) => l.contains(s),
263        }
264    }
265
266    #[inline]
267    pub(crate) fn contains32(&self, cp: u32) -> bool {
268        match *self {
269            Self::CPInversionListStrList(ref l) => l.contains32(cp),
270        }
271    }
272
273    #[inline]
274    pub(crate) fn contains_char(&self, ch: char) -> bool {
275        match *self {
276            Self::CPInversionListStrList(ref l) => l.contains_char(ch),
277        }
278    }
279
280    #[inline]
281    pub(crate) fn from_code_point_inversion_list_string_list(
282        l: CodePointInversionListAndStringList<'static>,
283    ) -> Self {
284        Self::CPInversionListStrList(l)
285    }
286
287    #[inline]
288    pub(crate) fn as_code_point_inversion_list_string_list(
289        &'_ self,
290    ) -> Option<&'_ CodePointInversionListAndStringList<'data>> {
291        match *self {
292            Self::CPInversionListStrList(ref l) => Some(l),
293            // any other backing data structure that cannot return a CPInversionListStrList in O(1) time should return None
294        }
295    }
296
297    #[inline]
298    pub(crate) fn to_code_point_inversion_list_string_list(
299        &self,
300    ) -> CodePointInversionListAndStringList<'_> {
301        match *self {
302            Self::CPInversionListStrList(ref t) => ZeroFrom::zero_from(t),
303        }
304    }
305}
306
307/// A struct that efficiently stores `Script` and `Script_Extensions` property data.
308///
309/// <div class="stab unstable">
310/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
311/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
312/// to be stable, their Rust representation might not be. Use with caution.
313/// </div>
314#[icu_provider::data_struct(marker(
315    ScriptWithExtensionsPropertyV1Marker,
316    "props/scx@1",
317    singleton
318))]
319#[derive(Debug, Eq, PartialEq, Clone)]
320#[cfg_attr(
321    feature = "datagen", 
322    derive(serde::Serialize, databake::Bake),
323    databake(path = icu_properties::provider),
324)]
325#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
326pub struct ScriptWithExtensionsPropertyV1<'data> {
327    /// Note: The `ScriptWithExt` values in this array will assume a 12-bit layout. The 2
328    /// higher order bits 11..10 will indicate how to deduce the Script value and
329    /// Script_Extensions value, nearly matching the representation
330    /// [in ICU](https://github.com/unicode-org/icu/blob/main/icu4c/source/common/uprops.h):
331    ///
332    /// | High order 2 bits value | Script                                                 | Script_Extensions                                              |
333    /// |-------------------------|--------------------------------------------------------|----------------------------------------------------------------|
334    /// | 3                       | First value in sub-array, index given by lower 10 bits | Sub-array excluding first value, index given by lower 10 bits  |
335    /// | 2                       | Script=Inherited                                       | Entire sub-array, index given by lower 10 bits                 |
336    /// | 1                       | Script=Common                                          | Entire sub-array, index given by lower 10 bits                 |
337    /// | 0                       | Value in lower 10 bits                                 | `[ Script value ]` single-element array                        |
338    ///
339    /// When the lower 10 bits of the value are used as an index, that index is
340    /// used for the outer-level vector of the nested `extensions` structure.
341    #[cfg_attr(feature = "serde", serde(borrow))]
342    pub trie: CodePointTrie<'data, ScriptWithExt>,
343
344    /// This companion structure stores Script_Extensions values, which are
345    /// themselves arrays / vectors. This structure only stores the values for
346    /// cases in which `scx(cp) != [ sc(cp) ]`. Each sub-vector is distinct. The
347    /// sub-vector represents the Script_Extensions array value for a code point,
348    /// and may also indicate Script value, as described for the `trie` field.
349    #[cfg_attr(feature = "serde", serde(borrow))]
350    pub extensions: VarZeroVec<'data, ZeroSlice<Script>>,
351}
352
353impl<'data> ScriptWithExtensionsPropertyV1<'data> {
354    // This method is intended to be used by constructors of deserialized data
355    // in a data provider.
356    #[doc(hidden)]
357    pub fn new(
358        trie: CodePointTrie<'data, ScriptWithExt>,
359        extensions: VarZeroVec<'data, ZeroSlice<Script>>,
360    ) -> ScriptWithExtensionsPropertyV1<'data> {
361        ScriptWithExtensionsPropertyV1 { trie, extensions }
362    }
363}
364
365// See CodePointSetData for documentation of these functions
366impl<'data> PropertyCodePointSetV1<'data> {
367    #[inline]
368    pub(crate) fn contains(&self, ch: char) -> bool {
369        match *self {
370            Self::InversionList(ref l) => l.contains(ch),
371        }
372    }
373
374    #[inline]
375    pub(crate) fn contains32(&self, ch: u32) -> bool {
376        match *self {
377            Self::InversionList(ref l) => l.contains32(ch),
378        }
379    }
380
381    #[inline]
382    pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
383        match *self {
384            Self::InversionList(ref l) => l.iter_ranges(),
385        }
386    }
387
388    #[inline]
389    pub(crate) fn iter_ranges_complemented(
390        &self,
391    ) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
392        match *self {
393            Self::InversionList(ref l) => l.iter_ranges_complemented(),
394        }
395    }
396
397    #[inline]
398    pub(crate) fn from_code_point_inversion_list(l: CodePointInversionList<'static>) -> Self {
399        Self::InversionList(l)
400    }
401
402    #[inline]
403    pub(crate) fn as_code_point_inversion_list(
404        &'_ self,
405    ) -> Option<&'_ CodePointInversionList<'data>> {
406        match *self {
407            Self::InversionList(ref l) => Some(l),
408            // any other backing data structure that cannot return a CPInvList in O(1) time should return None
409        }
410    }
411
412    #[inline]
413    pub(crate) fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> {
414        match *self {
415            Self::InversionList(ref t) => ZeroFrom::zero_from(t),
416        }
417    }
418}
419
420// See CodePointMapData for documentation of these functions
421impl<'data, T: TrieValue> PropertyCodePointMapV1<'data, T> {
422    #[inline]
423    pub(crate) fn get32(&self, ch: u32) -> T {
424        match *self {
425            Self::CodePointTrie(ref t) => t.get32(ch),
426        }
427    }
428
429    #[inline]
430    pub(crate) fn try_into_converted<P>(
431        self,
432    ) -> Result<PropertyCodePointMapV1<'data, P>, ZeroVecError>
433    where
434        P: TrieValue,
435    {
436        match self {
437            Self::CodePointTrie(t) => t
438                .try_into_converted()
439                .map(PropertyCodePointMapV1::CodePointTrie),
440        }
441    }
442
443    #[inline]
444    pub(crate) fn get_set_for_value(&self, value: T) -> CodePointInversionList<'static> {
445        match *self {
446            Self::CodePointTrie(ref t) => t.get_set_for_value(value),
447        }
448    }
449
450    #[inline]
451    pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = CodePointMapRange<T>> + '_ {
452        match *self {
453            Self::CodePointTrie(ref t) => t.iter_ranges(),
454        }
455    }
456    #[inline]
457    pub(crate) fn iter_ranges_mapped<'a, U: Eq + 'a>(
458        &'a self,
459        map: impl FnMut(T) -> U + Copy + 'a,
460    ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a {
461        match *self {
462            Self::CodePointTrie(ref t) => t.iter_ranges_mapped(map),
463        }
464    }
465
466    #[inline]
467    pub(crate) fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self {
468        Self::CodePointTrie(trie)
469    }
470
471    #[inline]
472    pub(crate) fn as_code_point_trie(&self) -> Option<&CodePointTrie<'data, T>> {
473        match *self {
474            Self::CodePointTrie(ref t) => Some(t),
475            // any other backing data structure that cannot return a CPT in O(1) time should return None
476        }
477    }
478
479    #[inline]
480    pub(crate) fn to_code_point_trie(&self) -> CodePointTrie<'_, T> {
481        match *self {
482            Self::CodePointTrie(ref t) => ZeroFrom::zero_from(t),
483        }
484    }
485}
486
487macro_rules! expand {
488    (
489        ($(($code_point_set_marker:ident, $bin_cp_s:literal),)+),
490        ($(($unicode_set_marker:ident, $bin_us_s:literal, $us_singleton:literal),)+),
491        ($(($code_point_map_marker:ident,
492            $name_value_marker:ident,
493
494            $((sparse: $value_short_name_marker_sparse:ident, $value_long_name_marker_sparse:ident),)?
495            $((linear: $value_short_name_marker_linear:ident, $value_long_name_marker_linear:ident ),)?
496            $((linear4: $value_short_name_marker_linear4:ident, $value_long_name_marker_linear4:ident ),)?
497            $enum_s:literal, $value_ty:ident),)+)
498    ) => {
499
500            // Data keys that return code point sets (represented as CodePointSetData).
501            // For now, synonymous with binary properties of code points only.
502            $(
503                #[doc = core::concat!("Data marker for the '", $bin_cp_s, "' Unicode property")]
504                #[derive(Debug, Default)]
505                #[cfg_attr(
506                    feature = "datagen",
507                    derive(databake::Bake),
508                    databake(path = icu_properties::provider),
509                )]
510                pub struct $code_point_set_marker;
511
512                impl DataMarker for $code_point_set_marker {
513                    type Yokeable = PropertyCodePointSetV1<'static>;
514                }
515                impl KeyedDataMarker for $code_point_set_marker {
516                    const KEY: DataKey = data_key!(concat!("props/", $bin_cp_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true));
517                }
518
519            )+
520
521            // Data keys that return sets of strings + code points (represented as UnicodeSetData).
522            // Includes:
523            //   - binary properties of strings + code points
524            //   - exemplar characters
525            $(
526                #[doc = core::concat!("Data marker for the '", $bin_us_s, "' Unicode property")]
527                #[derive(Debug, Default)]
528                #[cfg_attr(
529                    feature = "datagen",
530                    derive(databake::Bake),
531                    databake(path = icu_properties::provider),
532                )]
533                pub struct $unicode_set_marker;
534
535                impl DataMarker for $unicode_set_marker {
536                    type Yokeable = PropertyUnicodeSetV1<'static>;
537                }
538                impl KeyedDataMarker for $unicode_set_marker {
539                    const KEY: DataKey = data_key!(concat!("props/", $bin_us_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, $us_singleton));
540                }
541            )+
542
543            // Data keys that return code point map (represented as CodePointMapData).
544            // For now, synonymous with enumerated properties [of code points only].
545            $(
546                #[doc = core::concat!("Data marker for the '", $enum_s, "' Unicode property")]
547                #[derive(Debug, Default)]
548                #[cfg_attr(
549                    feature = "datagen",
550                    derive(databake::Bake),
551                    databake(path = icu_properties::provider),
552                )]
553                pub struct $code_point_map_marker;
554
555                impl DataMarker for $code_point_map_marker {
556                    type Yokeable = PropertyCodePointMapV1<'static, crate::$value_ty>;
557                }
558
559                impl KeyedDataMarker for $code_point_map_marker {
560                    const KEY: DataKey = data_key!(concat!("props/", $enum_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true));
561                }
562
563
564                #[doc = core::concat!("Data marker for parsing the names of the values of the '", $enum_s, "' Unicode property")]
565                #[derive(Debug, Default)]
566                #[cfg_attr(
567                    feature = "datagen",
568                    derive(databake::Bake),
569                    databake(path = icu_properties::provider),
570                )]
571                pub struct $name_value_marker;
572
573                impl DataMarker for $name_value_marker {
574                    type Yokeable = names::PropertyValueNameToEnumMapV1<'static>;
575                }
576
577                impl KeyedDataMarker for $name_value_marker {
578                    const KEY: DataKey = data_key!(concat!("propnames/from/", $enum_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true));
579                }
580
581                $(
582                    #[doc = core::concat!("Data marker for producing short names of the values of the '", $enum_s, "' Unicode property")]
583                    #[derive(Debug, Default)]
584                    #[cfg_attr(
585                        feature = "datagen",
586                        derive(databake::Bake),
587                        databake(path = icu_properties::provider),
588                    )]
589                    pub struct $value_short_name_marker_sparse;
590
591                    impl DataMarker for $value_short_name_marker_sparse {
592                        type Yokeable = names::PropertyEnumToValueNameSparseMapV1<'static>;
593                    }
594
595                    impl KeyedDataMarker for $value_short_name_marker_sparse {
596                        const KEY: DataKey = data_key!(concat!("propnames/to/short/sparse/", $enum_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true));
597                    }
598
599                    #[doc = core::concat!("Data marker for producing long names of the values of the '", $enum_s, "' Unicode property")]
600                    #[derive(Debug, Default)]
601                    #[cfg_attr(
602                        feature = "datagen",
603                        derive(databake::Bake),
604                        databake(path = icu_properties::provider),
605                    )]
606                    pub struct $value_long_name_marker_sparse;
607
608                    impl DataMarker for $value_long_name_marker_sparse {
609                        type Yokeable = names::PropertyEnumToValueNameSparseMapV1<'static>;
610                    }
611
612                    impl KeyedDataMarker for $value_long_name_marker_sparse {
613                        const KEY: DataKey = data_key!(concat!("propnames/to/long/sparse/", $enum_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true));
614                    }
615                )?
616
617                $(
618                    #[doc = core::concat!("Data marker for producing short names of the values of the '", $enum_s, "' Unicode property")]
619                    #[derive(Debug, Default)]
620                    #[cfg_attr(
621                        feature = "datagen",
622                        derive(databake::Bake),
623                        databake(path = icu_properties::provider),
624                    )]
625                    pub struct $value_short_name_marker_linear;
626
627                    impl DataMarker for $value_short_name_marker_linear {
628                        type Yokeable = names::PropertyEnumToValueNameLinearMapV1<'static>;
629                    }
630
631                    impl KeyedDataMarker for $value_short_name_marker_linear {
632                        const KEY: DataKey = data_key!(concat!("propnames/to/short/linear/", $enum_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true));
633                    }
634
635                    #[doc = core::concat!("Data marker for producing long names of the values of the '", $enum_s, "' Unicode property")]
636                    #[derive(Debug, Default)]
637                    #[cfg_attr(
638                        feature = "datagen",
639                        derive(databake::Bake),
640                        databake(path = icu_properties::provider),
641                    )]
642                    pub struct $value_long_name_marker_linear;
643
644                    impl DataMarker for $value_long_name_marker_linear {
645                        type Yokeable = names::PropertyEnumToValueNameLinearMapV1<'static>;
646                    }
647
648                    impl KeyedDataMarker for $value_long_name_marker_linear {
649                        const KEY: DataKey = data_key!(concat!("propnames/to/long/linear/", $enum_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true));
650                    }
651                )?
652
653                $(
654                    #[doc = core::concat!("Data marker for producing short names of the values of the '", $enum_s, "' Unicode property")]
655                    #[derive(Debug, Default)]
656                    #[cfg_attr(
657                        feature = "datagen",
658                        derive(databake::Bake),
659                        databake(path = icu_properties::provider),
660                    )]
661                    pub struct $value_short_name_marker_linear4;
662
663                    impl DataMarker for $value_short_name_marker_linear4 {
664                        type Yokeable = names::PropertyEnumToValueNameLinearTiny4MapV1<'static>;
665                    }
666
667                    impl KeyedDataMarker for $value_short_name_marker_linear4 {
668                        const KEY: DataKey = data_key!(concat!("propnames/to/short/linear4/", $enum_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true));
669                    }
670
671                    #[doc = core::concat!("Data marker for producing long names of the values of the '", $enum_s, "' Unicode property")]
672                    #[derive(Debug, Default)]
673                    #[cfg_attr(
674                        feature = "datagen",
675                        derive(databake::Bake),
676                        databake(path = icu_properties::provider),
677                    )]
678                    pub struct $value_long_name_marker_linear4;
679
680                    impl DataMarker for $value_long_name_marker_linear4 {
681                        // Tiny4 is only for short names
682                        type Yokeable = names::PropertyEnumToValueNameLinearMapV1<'static>;
683                    }
684
685                    impl KeyedDataMarker for $value_long_name_marker_linear4 {
686                        const KEY: DataKey = data_key!(concat!("propnames/to/long/linear/", $enum_s, "@1"), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true));
687                    }
688                )?
689            )+
690
691        /// All data keys in this module.
692        pub const KEYS: &[DataKey] = &[
693            $($code_point_set_marker::KEY,)+
694            $($unicode_set_marker::KEY,)+
695            $(
696                $code_point_map_marker::KEY,
697                $name_value_marker::KEY,
698                $($value_short_name_marker_sparse::KEY, $value_long_name_marker_sparse::KEY,)?
699                $($value_short_name_marker_linear::KEY, $value_long_name_marker_linear::KEY,)?
700                $($value_short_name_marker_linear4::KEY, $value_long_name_marker_linear4::KEY,)?
701            )+
702            bidi_data::BidiAuxiliaryPropertiesV1Marker::KEY,
703            GeneralCategoryMaskNameToValueV1Marker::KEY,
704            ScriptWithExtensionsPropertyV1Marker::KEY,
705        ];
706    };
707}
708
709pub use self::names::GeneralCategoryMaskNameToValueV1Marker;
710
711expand!(
712    (
713        // code point sets
714        (AsciiHexDigitV1Marker, "AHex"),
715        (AlnumV1Marker, "alnum"),
716        (AlphabeticV1Marker, "Alpha"),
717        (BidiControlV1Marker, "Bidi_C"),
718        (BidiMirroredV1Marker, "Bidi_M"),
719        (BlankV1Marker, "blank"),
720        (CasedV1Marker, "Cased"),
721        (CaseIgnorableV1Marker, "CI"),
722        (FullCompositionExclusionV1Marker, "Comp_Ex"),
723        (ChangesWhenCasefoldedV1Marker, "CWCF"),
724        (ChangesWhenCasemappedV1Marker, "CWCM"),
725        (ChangesWhenNfkcCasefoldedV1Marker, "CWKCF"),
726        (ChangesWhenLowercasedV1Marker, "CWL"),
727        (ChangesWhenTitlecasedV1Marker, "CWT"),
728        (ChangesWhenUppercasedV1Marker, "CWU"),
729        (DashV1Marker, "Dash"),
730        (DeprecatedV1Marker, "Dep"),
731        (DefaultIgnorableCodePointV1Marker, "DI"),
732        (DiacriticV1Marker, "Dia"),
733        (EmojiModifierBaseV1Marker, "EBase"),
734        (EmojiComponentV1Marker, "EComp"),
735        (EmojiModifierV1Marker, "EMod"),
736        (EmojiV1Marker, "Emoji"),
737        (EmojiPresentationV1Marker, "EPres"),
738        (ExtenderV1Marker, "Ext"),
739        (ExtendedPictographicV1Marker, "ExtPict"),
740        (GraphV1Marker, "graph"),
741        (GraphemeBaseV1Marker, "Gr_Base"),
742        (GraphemeExtendV1Marker, "Gr_Ext"),
743        (GraphemeLinkV1Marker, "Gr_Link"),
744        (HexDigitV1Marker, "Hex"),
745        (HyphenV1Marker, "Hyphen"),
746        (IdContinueV1Marker, "IDC"),
747        (IdeographicV1Marker, "Ideo"),
748        (IdStartV1Marker, "IDS"),
749        (IdsBinaryOperatorV1Marker, "IDSB"),
750        (IdsTrinaryOperatorV1Marker, "IDST"),
751        (JoinControlV1Marker, "Join_C"),
752        (LogicalOrderExceptionV1Marker, "LOE"),
753        (LowercaseV1Marker, "Lower"),
754        (MathV1Marker, "Math"),
755        (NoncharacterCodePointV1Marker, "NChar"),
756        (NfcInertV1Marker, "nfcinert"),
757        (NfdInertV1Marker, "nfdinert"),
758        (NfkcInertV1Marker, "nfkcinert"),
759        (NfkdInertV1Marker, "nfkdinert"),
760        (PatternSyntaxV1Marker, "Pat_Syn"),
761        (PatternWhiteSpaceV1Marker, "Pat_WS"),
762        (PrependedConcatenationMarkV1Marker, "PCM"),
763        (PrintV1Marker, "print"),
764        (QuotationMarkV1Marker, "QMark"),
765        (RadicalV1Marker, "Radical"),
766        (RegionalIndicatorV1Marker, "RI"),
767        (SoftDottedV1Marker, "SD"),
768        (SegmentStarterV1Marker, "segstart"),
769        (CaseSensitiveV1Marker, "Sensitive"),
770        (SentenceTerminalV1Marker, "STerm"),
771        (TerminalPunctuationV1Marker, "Term"),
772        (UnifiedIdeographV1Marker, "UIdeo"),
773        (UppercaseV1Marker, "Upper"),
774        (VariationSelectorV1Marker, "VS"),
775        (WhiteSpaceV1Marker, "WSpace"),
776        (XdigitV1Marker, "xdigit"),
777        (XidContinueV1Marker, "XIDC"),
778        (XidStartV1Marker, "XIDS"),
779    ),
780    (
781        // UnicodeSets (code points + strings)
782        (BasicEmojiV1Marker, "Basic_Emoji", true),
783        (ExemplarCharactersMainV1Marker, "exemplarchars/main", false),
784        (
785            ExemplarCharactersAuxiliaryV1Marker,
786            "exemplarchars/auxiliary",
787            false
788        ),
789        (
790            ExemplarCharactersPunctuationV1Marker,
791            "exemplarchars/punctuation",
792            false
793        ),
794        (
795            ExemplarCharactersNumbersV1Marker,
796            "exemplarchars/numbers",
797            false
798        ),
799        (
800            ExemplarCharactersIndexV1Marker,
801            "exemplarchars/index",
802            false
803        ),
804    ),
805    (
806        // code point maps
807        (
808            CanonicalCombiningClassV1Marker,
809            CanonicalCombiningClassNameToValueV1Marker,
810            (
811                sparse: CanonicalCombiningClassValueToShortNameV1Marker,
812                CanonicalCombiningClassValueToLongNameV1Marker
813            ),
814            "ccc",
815            CanonicalCombiningClass
816        ),
817        (
818            GeneralCategoryV1Marker,
819            GeneralCategoryNameToValueV1Marker,
820            (
821                linear: GeneralCategoryValueToShortNameV1Marker,
822                GeneralCategoryValueToLongNameV1Marker
823            ),
824            "gc",
825            GeneralCategory
826        ),
827        (
828            BidiClassV1Marker,
829            BidiClassNameToValueV1Marker,
830            (
831                linear: BidiClassValueToShortNameV1Marker,
832                BidiClassValueToLongNameV1Marker
833            ),
834            "bc",
835            BidiClass
836        ),
837        (
838            ScriptV1Marker,
839            ScriptNameToValueV1Marker,
840            (
841                linear4: ScriptValueToShortNameV1Marker,
842                ScriptValueToLongNameV1Marker
843            ),
844            "sc",
845            Script
846        ),
847        (
848            HangulSyllableTypeV1Marker,
849            HangulSyllableTypeNameToValueV1Marker,
850            (
851                linear: HangulSyllableTypeValueToShortNameV1Marker,
852                HangulSyllableTypeValueToLongNameV1Marker
853            ),
854            "hst",
855            HangulSyllableType
856        ),
857        (
858            EastAsianWidthV1Marker,
859            EastAsianWidthNameToValueV1Marker,
860            (
861                linear: EastAsianWidthValueToShortNameV1Marker,
862                EastAsianWidthValueToLongNameV1Marker
863            ),
864            "ea",
865            EastAsianWidth
866        ),
867        (
868            LineBreakV1Marker,
869            LineBreakNameToValueV1Marker,
870            (
871                linear: LineBreakValueToShortNameV1Marker,
872                LineBreakValueToLongNameV1Marker
873            ),
874            "lb",
875            LineBreak
876        ),
877        (
878            GraphemeClusterBreakV1Marker,
879            GraphemeClusterBreakNameToValueV1Marker,
880            (
881                linear: GraphemeClusterBreakValueToShortNameV1Marker,
882                GraphemeClusterBreakValueToLongNameV1Marker
883            ),
884            "GCB",
885            GraphemeClusterBreak
886        ),
887        (
888            WordBreakV1Marker,
889            WordBreakNameToValueV1Marker,
890            (
891                linear: WordBreakValueToShortNameV1Marker,
892                WordBreakValueToLongNameV1Marker
893            ),
894            "WB",
895            WordBreak
896        ),
897        (
898            SentenceBreakV1Marker,
899            SentenceBreakNameToValueV1Marker,
900            (
901                linear: SentenceBreakValueToShortNameV1Marker,
902                SentenceBreakValueToLongNameV1Marker
903            ),
904            "SB",
905            SentenceBreak
906        ),
907        (
908            IndicSyllabicCategoryV1Marker,
909            IndicSyllabicCategoryNameToValueV1Marker,
910            (
911                linear: IndicSyllabicCategoryValueToShortNameV1Marker,
912                IndicSyllabicCategoryValueToLongNameV1Marker
913            ),
914            "InSC",
915            IndicSyllabicCategory
916        ),
917        (
918            JoiningTypeV1Marker,
919            JoiningTypeNameToValueV1Marker,
920            (
921                linear: JoiningTypeValueToShortNameV1Marker,
922                JoiningTypeValueToLongNameV1Marker
923            ),
924            "jt",
925            JoiningType
926        ),
927        // note: the names key for the GCM mask is handled above
928    )
929);