Skip to main content

icu_properties/
trievalue.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::bidi::BidiMirroringGlyph;
6use crate::props::{
7    BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
8    GraphemeClusterBreak, HangulSyllableType, IndicConjunctBreak, IndicSyllabicCategory,
9    JoiningGroup, JoiningType, LineBreak, NumericType, Script, SentenceBreak, VerticalOrientation,
10    WordBreak,
11};
12use crate::script::ScriptWithExt;
13use core::convert::TryInto;
14use core::num::TryFromIntError;
15use zerovec::ule::{AsULE, RawBytesULE};
16
17use icu_collections::codepointtrie::TrieValue;
18
19use core::convert::TryFrom;
20
21impl TrieValue for CanonicalCombiningClass {
22    type TryFromU32Error = TryFromIntError;
23
24    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
25        u8::try_from(i).map(Self)
26    }
27
28    fn to_u32(self) -> u32 {
29        u32::from(self.0)
30    }
31}
32
33impl TrieValue for NumericType {
34    type TryFromU32Error = TryFromIntError;
35
36    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
37        u8::try_from(i).map(Self)
38    }
39
40    fn to_u32(self) -> u32 {
41        u32::from(self.0)
42    }
43}
44
45impl TrieValue for BidiClass {
46    type TryFromU32Error = TryFromIntError;
47
48    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
49        u8::try_from(i).map(Self)
50    }
51
52    fn to_u32(self) -> u32 {
53        u32::from(self.0)
54    }
55}
56
57impl TrieValue for GeneralCategory {
58    type TryFromU32Error = &'static str;
59
60    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
61        // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum.
62        GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX))
63            .ok_or("Cannot parse GeneralCategory from integer")
64    }
65
66    fn to_u32(self) -> u32 {
67        u32::from(self as u8)
68    }
69}
70
71impl TrieValue for Script {
72    type TryFromU32Error = TryFromIntError;
73
74    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
75        u16::try_from(i).map(Script)
76    }
77
78    fn to_u32(self) -> u32 {
79        u32::from(self.0)
80    }
81}
82
83impl TrieValue for HangulSyllableType {
84    type TryFromU32Error = TryFromIntError;
85
86    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
87        u8::try_from(i).map(Self)
88    }
89
90    fn to_u32(self) -> u32 {
91        u32::from(self.0)
92    }
93}
94
95impl TrieValue for ScriptWithExt {
96    type TryFromU32Error = TryFromIntError;
97
98    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
99        u16::try_from(i).map(Self)
100    }
101
102    fn to_u32(self) -> u32 {
103        u32::from(self.0)
104    }
105}
106
107impl TrieValue for EastAsianWidth {
108    type TryFromU32Error = TryFromIntError;
109
110    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
111        u8::try_from(i).map(Self)
112    }
113
114    fn to_u32(self) -> u32 {
115        u32::from(self.0)
116    }
117}
118
119impl TrieValue for LineBreak {
120    type TryFromU32Error = TryFromIntError;
121
122    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
123        u8::try_from(i).map(Self)
124    }
125
126    fn to_u32(self) -> u32 {
127        u32::from(self.0)
128    }
129}
130
131impl TrieValue for GraphemeClusterBreak {
132    type TryFromU32Error = TryFromIntError;
133
134    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
135        u8::try_from(i).map(Self)
136    }
137
138    fn to_u32(self) -> u32 {
139        u32::from(self.0)
140    }
141}
142
143impl TrieValue for WordBreak {
144    type TryFromU32Error = TryFromIntError;
145
146    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
147        u8::try_from(i).map(Self)
148    }
149
150    fn to_u32(self) -> u32 {
151        u32::from(self.0)
152    }
153}
154
155impl TrieValue for SentenceBreak {
156    type TryFromU32Error = TryFromIntError;
157
158    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
159        u8::try_from(i).map(Self)
160    }
161
162    fn to_u32(self) -> u32 {
163        u32::from(self.0)
164    }
165}
166
167impl TrieValue for IndicConjunctBreak {
168    type TryFromU32Error = TryFromIntError;
169
170    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
171        u8::try_from(i).map(Self)
172    }
173
174    fn to_u32(self) -> u32 {
175        u32::from(self.0)
176    }
177}
178
179impl TrieValue for IndicSyllabicCategory {
180    type TryFromU32Error = TryFromIntError;
181
182    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
183        u8::try_from(i).map(Self)
184    }
185
186    fn to_u32(self) -> u32 {
187        u32::from(self.0)
188    }
189}
190
191impl TrieValue for VerticalOrientation {
192    type TryFromU32Error = TryFromIntError;
193
194    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
195        u8::try_from(i).map(Self)
196    }
197
198    fn to_u32(self) -> u32 {
199        u32::from(self.0)
200    }
201}
202
203// GCG is not used inside tries, but it is used in the name lookup type, and we want
204// to squeeze it into a u16 for storage. Its named mask values are specced so we can
205// do this in code.
206//
207// This is done by:
208// - Single-value masks are translated to their corresponding GeneralCategory values
209// - we know all of the multi-value masks and we give them special values
210// - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata
211//
212// In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except
213// with malformed ICU4X generated data.
214impl AsULE for GeneralCategoryGroup {
215    type ULE = RawBytesULE<2>;
216    fn to_unaligned(self) -> Self::ULE {
217        let value = gcg_to_packed_u16(self);
218        value.to_unaligned()
219    }
220    fn from_unaligned(ule: Self::ULE) -> Self {
221        let value = ule.as_unsigned_int();
222        packed_u16_to_gcg(value)
223    }
224}
225
226fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup {
227    match value {
228        0xFFFF => GeneralCategoryGroup::CasedLetter,
229        0xFFFE => GeneralCategoryGroup::Letter,
230        0xFFFD => GeneralCategoryGroup::Mark,
231        0xFFFC => GeneralCategoryGroup::Number,
232        0xFFFB => GeneralCategoryGroup::Separator,
233        0xFFFA => GeneralCategoryGroup::Other,
234        0xFFF9 => GeneralCategoryGroup::Punctuation,
235        0xFFF8 => GeneralCategoryGroup::Symbol,
236        v if v < 32 => GeneralCategory::new_from_u8(v as u8)
237            .map(|gc| gc.into())
238            .unwrap_or(GeneralCategoryGroup(0)),
239        // unknown values produce an empty mask
240        _ => GeneralCategoryGroup(0),
241    }
242}
243
244fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 {
245    // if it's a single property, translate to that property
246    if gcg.0.is_power_of_two() {
247        // inverse operation of a bitshift
248        gcg.0.trailing_zeros() as u16
249    } else {
250        match gcg {
251            GeneralCategoryGroup::CasedLetter => 0xFFFF,
252            GeneralCategoryGroup::Letter => 0xFFFE,
253            GeneralCategoryGroup::Mark => 0xFFFD,
254            GeneralCategoryGroup::Number => 0xFFFC,
255            GeneralCategoryGroup::Separator => 0xFFFB,
256            GeneralCategoryGroup::Other => 0xFFFA,
257            GeneralCategoryGroup::Punctuation => 0xFFF9,
258            GeneralCategoryGroup::Symbol => 0xFFF8,
259            _ => 0xFF00, // random sentinel value
260        }
261    }
262}
263
264impl TrieValue for GeneralCategoryGroup {
265    type TryFromU32Error = TryFromIntError;
266    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
267        // Even though we're dealing with u32s here, TrieValue is about converting
268        // trie storage types to the actual type. This type will always be a packed u16
269        // in our case since the names map upcasts from u16
270        u16::try_from(i).map(packed_u16_to_gcg)
271    }
272
273    fn to_u32(self) -> u32 {
274        u32::from(gcg_to_packed_u16(self))
275    }
276}
277
278impl TrieValue for BidiMirroringGlyph {
279    type TryFromU32Error = u32;
280
281    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
282        let code_point = i & 0x1FFFFF;
283        let mirroring_glyph = if code_point == 0 {
284            None
285        } else {
286            Some(char::try_from_u32(code_point).map_err(|_| i)?)
287        };
288        let mirrored = ((i >> 21) & 0x1) == 1;
289        let paired_bracket_type = {
290            let value = ((i >> 22) & 0x3) as u8;
291            match value {
292                0 => crate::bidi::BidiPairedBracketType::None,
293                1 => crate::bidi::BidiPairedBracketType::Open,
294                2 => crate::bidi::BidiPairedBracketType::Close,
295                _ => return Err(i),
296            }
297        };
298        Ok(Self {
299            mirrored,
300            mirroring_glyph,
301            paired_bracket_type,
302        })
303    }
304
305    fn to_u32(self) -> u32 {
306        self.mirroring_glyph.unwrap_or_default() as u32
307            | ((self.mirrored as u32) << 21)
308            | (match self.paired_bracket_type {
309                crate::bidi::BidiPairedBracketType::None => 0,
310                crate::bidi::BidiPairedBracketType::Open => 1,
311                crate::bidi::BidiPairedBracketType::Close => 2,
312            } << 22)
313    }
314}
315
316impl TrieValue for JoiningType {
317    type TryFromU32Error = TryFromIntError;
318
319    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
320        u8::try_from(i).map(Self)
321    }
322
323    fn to_u32(self) -> u32 {
324        u32::from(self.0)
325    }
326}
327
328impl TrieValue for JoiningGroup {
329    type TryFromU32Error = TryFromIntError;
330
331    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
332        u8::try_from(i).map(Self)
333    }
334    fn to_u32(self) -> u32 {
335        u32::from(self.0)
336    }
337}