Skip to main content

icu_properties/
emoji.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::provider::*;
6use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
7use icu_provider::marker::ErasedMarker;
8use icu_provider::prelude::*;
9
10/// A wrapper around `UnicodeSet` data (characters and strings)
11#[derive(#[automatically_derived]
impl ::core::fmt::Debug for EmojiSetData {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field1_finish(f, "EmojiSetData",
            "data", &&self.data)
    }
}Debug)]
12pub struct EmojiSetData {
13    data: DataPayload<ErasedMarker<PropertyUnicodeSet<'static>>>,
14}
15
16impl EmojiSetData {
17    /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`].
18    ///
19    /// See the documentation on [`EmojiSet`] implementations for details.
20    ///
21    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
22    ///
23    /// [📚 Help choosing a constructor](icu_provider::constructors)
24    #[cfg(feature = "compiled_data")]
25    #[expect(clippy::new_ret_no_self)]
26    pub const fn new<P: EmojiSet>() -> EmojiSetDataBorrowed<'static> {
27        EmojiSetDataBorrowed::new::<P>()
28    }
29
30    #[cfg(feature = "serde")]
31    #[doc = icu_provider::gen_buffer_unstable_docs!(BUFFER, Self::new)]
32    pub fn try_new_with_buffer_provider<P: EmojiSet>(
33        provider: &(impl BufferProvider + ?Sized),
34    ) -> Result<EmojiSetData, DataError> {
35        use icu_provider::buf::AsDeserializingBufferProvider;
36        Self::try_new_unstable::<P>(&provider.as_deserializing())
37    }
38
39    /// A version of `new()` that uses custom data provided by a [`DataProvider`].
40    ///
41    /// Note that this will return an owned version of the data. Functionality is available on
42    /// the borrowed version, accessible through [`EmojiSetData::as_borrowed`].
43    pub fn try_new_unstable<P: EmojiSet>(
44        provider: &(impl DataProvider<P::DataMarker> + ?Sized),
45    ) -> Result<EmojiSetData, DataError> {
46        Ok(EmojiSetData::from_data(
47            provider.load(Default::default())?.payload,
48        ))
49    }
50
51    /// Construct a borrowed version of this type that can be queried.
52    ///
53    /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
54    /// up front.
55    #[inline]
56    pub fn as_borrowed(&self) -> EmojiSetDataBorrowed<'_> {
57        EmojiSetDataBorrowed {
58            set: self.data.get(),
59        }
60    }
61
62    /// Construct a new one from loaded data
63    ///
64    /// Typically it is preferable to use getters instead
65    pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self
66    where
67        M: DynamicDataMarker<DataStruct = PropertyUnicodeSet<'static>>,
68    {
69        Self { data: data.cast() }
70    }
71
72    /// Construct a new owned [`CodePointInversionListAndStringList`]
73    pub fn from_code_point_inversion_list_string_list(
74        set: CodePointInversionListAndStringList<'static>,
75    ) -> Self {
76        let set = PropertyUnicodeSet::from_code_point_inversion_list_string_list(set);
77        EmojiSetData::from_data(
78            DataPayload::<ErasedMarker<PropertyUnicodeSet<'static>>>::from_owned(set),
79        )
80    }
81
82    /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value.
83    ///
84    /// The data backing this is extensible and supports multiple implementations.
85    /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
86    /// added, and users may select which at data generation time.
87    ///
88    /// This method returns an `Option` in order to return `None` when the backing data provider
89    /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time
90    /// constraint.
91    pub fn as_code_point_inversion_list_string_list(
92        &self,
93    ) -> Option<&CodePointInversionListAndStringList<'_>> {
94        self.data.get().as_code_point_inversion_list_string_list()
95    }
96
97    /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible,
98    /// otherwise allocating a new [`CodePointInversionListAndStringList`].
99    ///
100    /// The data backing this is extensible and supports multiple implementations.
101    /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
102    /// added, and users may select which at data generation time.
103    ///
104    /// The performance of the conversion to this specific return type will vary
105    /// depending on the data structure that is backing `self`.
106    pub fn to_code_point_inversion_list_string_list(
107        &self,
108    ) -> CodePointInversionListAndStringList<'_> {
109        self.data.get().to_code_point_inversion_list_string_list()
110    }
111}
112
113/// A borrowed wrapper around code point set data, returned by
114/// [`EmojiSetData::as_borrowed()`]. More efficient to query.
115#[derive(#[automatically_derived]
impl<'a> ::core::clone::Clone for EmojiSetDataBorrowed<'a> {
    #[inline]
    fn clone(&self) -> EmojiSetDataBorrowed<'a> {
        let _: ::core::clone::AssertParamIsClone<&'a PropertyUnicodeSet<'a>>;
        *self
    }
}Clone, #[automatically_derived]
impl<'a> ::core::marker::Copy for EmojiSetDataBorrowed<'a> { }Copy, #[automatically_derived]
impl<'a> ::core::fmt::Debug for EmojiSetDataBorrowed<'a> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field1_finish(f,
            "EmojiSetDataBorrowed", "set", &&self.set)
    }
}Debug)]
116pub struct EmojiSetDataBorrowed<'a> {
117    set: &'a PropertyUnicodeSet<'a>,
118}
119
120impl EmojiSetDataBorrowed<'_> {
121    /// Check if the set contains the string. Strings consisting of one character
122    /// are treated as a character/code point.
123    ///
124    /// This matches ICU behavior for ICU's `UnicodeSet`.
125    #[inline]
126    pub fn contains_str(self, s: &str) -> bool {
127        self.set.contains_str(s)
128    }
129
130    /// See [`Self::contains_str`].
131    #[inline]
132    pub fn contains_utf8(self, s: &[u8]) -> bool {
133        self.set.contains_utf8(s)
134    }
135
136    /// Check if the set contains the code point.
137    #[inline]
138    pub fn contains(self, ch: char) -> bool {
139        self.set.contains(ch)
140    }
141
142    /// See [`Self::contains`].
143    #[inline]
144    pub fn contains32(self, cp: u32) -> bool {
145        self.set.contains32(cp)
146    }
147}
148
149impl EmojiSetDataBorrowed<'static> {
150    /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`].
151    ///
152    /// See the documentation on [`EmojiSet`] implementations for details.
153    ///
154    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
155    ///
156    /// [📚 Help choosing a constructor](icu_provider::constructors)
157    #[inline]
158    #[cfg(feature = "compiled_data")]
159    pub const fn new<P: EmojiSet>() -> Self {
160        EmojiSetDataBorrowed { set: P::SINGLETON }
161    }
162
163    /// Cheaply converts a [`EmojiSetDataBorrowed<'static>`] into a [`EmojiSetData`].
164    ///
165    /// Note: Due to branching and indirection, using [`EmojiSetData`] might inhibit some
166    /// compile-time optimizations that are possible with [`EmojiSetDataBorrowed`].
167    pub const fn static_to_owned(self) -> EmojiSetData {
168        EmojiSetData {
169            data: DataPayload::from_static_ref(self.set),
170        }
171    }
172}
173
174/// An Emoji set as defined by [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/#Emoji_Sets>).
175///
176/// <div class="stab unstable">
177/// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this
178/// trait, please consider using a type from the implementors listed below.
179/// </div>
180pub trait EmojiSet: crate::private::Sealed + Sized {
181    #[doc(hidden)]
182    type DataMarker: DataMarker<DataStruct = PropertyUnicodeSet<'static>>;
183    #[doc(hidden)]
184    #[cfg(feature = "compiled_data")]
185    const SINGLETON: &'static PropertyUnicodeSet<'static>;
186    /// The name of this property
187    const NAME: &'static [u8];
188    /// The abbreviated name of this property, if it exists, otherwise the name
189    const SHORT_NAME: &'static [u8];
190
191    /// Convenience method for `EmojiSetData::new().contains(ch)`
192    ///
193    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
194    #[cfg(feature = "compiled_data")]
195    fn for_char(ch: char) -> bool {
196        EmojiSetData::new::<Self>().contains(ch)
197    }
198
199    /// Convenience method for `EmojiSetData::new().contains_str(s)`
200    ///
201    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
202    #[cfg(feature = "compiled_data")]
203    fn for_str(s: &str) -> bool {
204        EmojiSetData::new::<Self>().contains_str(s)
205    }
206}