icu_properties/
trievalue.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::provider::bidi_data::{
6    CheckedBidiPairedBracketType, MirroredPairedBracketData, MirroredPairedBracketDataTryFromError,
7};
8use crate::script::ScriptWithExt;
9use crate::{
10    BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
11    GraphemeClusterBreak, HangulSyllableType, IndicSyllabicCategory, JoiningType, LineBreak,
12    Script, SentenceBreak, WordBreak,
13};
14use core::convert::TryInto;
15use core::num::TryFromIntError;
16use zerovec::ule::{AsULE, RawBytesULE};
17
18use icu_collections::codepointtrie::TrieValue;
19
20use core::convert::TryFrom;
21
22impl TrieValue for CanonicalCombiningClass {
23    type TryFromU32Error = TryFromIntError;
24
25    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
26        u8::try_from(i).map(Self)
27    }
28
29    fn to_u32(self) -> u32 {
30        u32::from(self.0)
31    }
32}
33
34impl TrieValue for BidiClass {
35    type TryFromU32Error = TryFromIntError;
36
37    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
38        u8::try_from(i).map(Self)
39    }
40
41    fn to_u32(self) -> u32 {
42        u32::from(self.0)
43    }
44}
45
46impl TrieValue for GeneralCategory {
47    type TryFromU32Error = &'static str;
48
49    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
50        // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum.
51        GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX))
52            .ok_or("Cannot parse GeneralCategory from integer")
53    }
54
55    fn to_u32(self) -> u32 {
56        u32::from(self as u8)
57    }
58}
59
60impl TrieValue for Script {
61    type TryFromU32Error = TryFromIntError;
62
63    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
64        u16::try_from(i).map(Script)
65    }
66
67    fn to_u32(self) -> u32 {
68        u32::from(self.0)
69    }
70}
71
72impl TrieValue for HangulSyllableType {
73    type TryFromU32Error = TryFromIntError;
74
75    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
76        u8::try_from(i).map(Self)
77    }
78
79    fn to_u32(self) -> u32 {
80        u32::from(self.0)
81    }
82}
83
84impl TrieValue for ScriptWithExt {
85    type TryFromU32Error = TryFromIntError;
86
87    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
88        u16::try_from(i).map(Self)
89    }
90
91    fn to_u32(self) -> u32 {
92        u32::from(self.0)
93    }
94}
95
96impl TrieValue for EastAsianWidth {
97    type TryFromU32Error = TryFromIntError;
98
99    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
100        u8::try_from(i).map(Self)
101    }
102
103    fn to_u32(self) -> u32 {
104        u32::from(self.0)
105    }
106}
107
108impl TrieValue for LineBreak {
109    type TryFromU32Error = TryFromIntError;
110
111    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
112        u8::try_from(i).map(Self)
113    }
114
115    fn to_u32(self) -> u32 {
116        u32::from(self.0)
117    }
118}
119
120impl TrieValue for GraphemeClusterBreak {
121    type TryFromU32Error = TryFromIntError;
122
123    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
124        u8::try_from(i).map(Self)
125    }
126
127    fn to_u32(self) -> u32 {
128        u32::from(self.0)
129    }
130}
131
132impl TrieValue for WordBreak {
133    type TryFromU32Error = TryFromIntError;
134
135    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
136        u8::try_from(i).map(Self)
137    }
138
139    fn to_u32(self) -> u32 {
140        u32::from(self.0)
141    }
142}
143
144impl TrieValue for SentenceBreak {
145    type TryFromU32Error = TryFromIntError;
146
147    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
148        u8::try_from(i).map(Self)
149    }
150
151    fn to_u32(self) -> u32 {
152        u32::from(self.0)
153    }
154}
155
156impl TrieValue for CheckedBidiPairedBracketType {
157    type TryFromU32Error = TryFromIntError;
158
159    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
160        Ok(match i {
161            1 => CheckedBidiPairedBracketType::Open,
162            2 => CheckedBidiPairedBracketType::Close,
163            _ => CheckedBidiPairedBracketType::None,
164        })
165    }
166}
167
168impl TrieValue for IndicSyllabicCategory {
169    type TryFromU32Error = TryFromIntError;
170
171    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
172        u8::try_from(i).map(Self)
173    }
174
175    fn to_u32(self) -> u32 {
176        u32::from(self.0)
177    }
178}
179
180// GCG is not used inside tries, but it is used in the name lookup type, and we want
181// to squeeze it into a u16 for storage. Its named mask values are specced so we can
182// do this in code.
183//
184// This is done by:
185// - Single-value masks are translated to their corresponding GeneralCategory values
186// - we know all of the multi-value masks and we give them special values
187// - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata
188//
189// In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except
190// with malformed ICU4X generated data.
191impl AsULE for GeneralCategoryGroup {
192    type ULE = RawBytesULE<2>;
193    fn to_unaligned(self) -> Self::ULE {
194        let value = gcg_to_packed_u16(self);
195        value.to_unaligned()
196    }
197    fn from_unaligned(ule: Self::ULE) -> Self {
198        let value = ule.as_unsigned_int();
199        packed_u16_to_gcg(value)
200    }
201}
202
203fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup {
204    match value {
205        0xFFFF => GeneralCategoryGroup::CasedLetter,
206        0xFFFE => GeneralCategoryGroup::Letter,
207        0xFFFD => GeneralCategoryGroup::Mark,
208        0xFFFC => GeneralCategoryGroup::Number,
209        0xFFFB => GeneralCategoryGroup::Separator,
210        0xFFFA => GeneralCategoryGroup::Other,
211        0xFFF9 => GeneralCategoryGroup::Punctuation,
212        0xFFF8 => GeneralCategoryGroup::Symbol,
213        v if v < 32 => GeneralCategory::new_from_u8(v as u8)
214            .map(|gc| gc.into())
215            .unwrap_or(GeneralCategoryGroup(0)),
216        // unknown values produce an empty mask
217        _ => GeneralCategoryGroup(0),
218    }
219}
220
221fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 {
222    // if it's a single property, translate to that property
223    if gcg.0.count_ones() == 1 {
224        // inverse operation of a bitshift
225        gcg.0.trailing_zeros() as u16
226    } else {
227        match gcg {
228            GeneralCategoryGroup::CasedLetter => 0xFFFF,
229            GeneralCategoryGroup::Letter => 0xFFFE,
230            GeneralCategoryGroup::Mark => 0xFFFD,
231            GeneralCategoryGroup::Number => 0xFFFC,
232            GeneralCategoryGroup::Separator => 0xFFFB,
233            GeneralCategoryGroup::Other => 0xFFFA,
234            GeneralCategoryGroup::Punctuation => 0xFFF9,
235            GeneralCategoryGroup::Symbol => 0xFFF8,
236            _ => 0xFF00, // random sentinel value
237        }
238    }
239}
240
241impl TrieValue for GeneralCategoryGroup {
242    type TryFromU32Error = TryFromIntError;
243    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
244        // Even though we're dealing with u32s here, TrieValue is about converting
245        // trie storage types to the actual type. This type will always be a packed u16
246        // in our case since the names map upcasts from u16
247        u16::try_from(i).map(packed_u16_to_gcg)
248    }
249
250    fn to_u32(self) -> u32 {
251        u32::from(gcg_to_packed_u16(self))
252    }
253}
254
255impl TrieValue for MirroredPairedBracketData {
256    type TryFromU32Error = MirroredPairedBracketDataTryFromError;
257
258    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
259        Self::try_from(i)
260    }
261}
262
263impl TrieValue for JoiningType {
264    type TryFromU32Error = TryFromIntError;
265
266    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
267        u8::try_from(i).map(Self)
268    }
269
270    fn to_u32(self) -> u32 {
271        u32::from(self.0)
272    }
273}