icu_collections/
iterator_utils.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::codepointtrie::CodePointMapRange;
6
7/// This is an iterator that coalesces adjacent ranges in an iterator over code
8/// point ranges
9pub(crate) struct RangeListIteratorCoalescer<I, T> {
10    iter: I,
11    peek: Option<CodePointMapRange<T>>,
12}
13
14impl<I, T: Eq> RangeListIteratorCoalescer<I, T>
15where
16    I: Iterator<Item = CodePointMapRange<T>>,
17{
18    pub fn new(iter: I) -> Self {
19        Self { iter, peek: None }
20    }
21}
22
23impl<I, T: Eq> Iterator for RangeListIteratorCoalescer<I, T>
24where
25    I: Iterator<Item = CodePointMapRange<T>>,
26{
27    type Item = CodePointMapRange<T>;
28
29    fn next(&mut self) -> Option<Self::Item> {
30        // Get the initial range we're working with: either a leftover
31        // range from last time, or the next range
32        let mut ret = if let Some(peek) = self.peek.take() {
33            peek
34        } else if let Some(next) = self.iter.next() {
35            next
36        } else {
37            // No ranges, exit early
38            return None;
39        };
40
41        // Keep pulling ranges
42        #[allow(clippy::while_let_on_iterator)]
43        // can't move the iterator, also we want it to be explicit that we're not draining the iterator
44        while let Some(next) = self.iter.next() {
45            if *next.range.start() == ret.range.end() + 1 && next.value == ret.value {
46                // Range has no gap, coalesce
47                ret.range = *ret.range.start()..=*next.range.end();
48            } else {
49                // Range has a gap, return what we have so far, update
50                // peek
51                self.peek = Some(next);
52                return Some(ret);
53            }
54        }
55
56        // Ran out of elements, exit
57        Some(ret)
58    }
59}
60
61#[cfg(test)]
62mod tests {
63    use core::fmt::Debug;
64    use icu::collections::codepointinvlist::CodePointInversionListBuilder;
65    use icu::collections::codepointtrie::TrieValue;
66    use icu::properties::maps::{self, CodePointMapDataBorrowed};
67    use icu::properties::sets::{self, CodePointSetDataBorrowed};
68    use icu::properties::{GeneralCategory, Script};
69
70    fn test_set(data: CodePointSetDataBorrowed<'static>, name: &str) {
71        let mut builder = CodePointInversionListBuilder::new();
72        let mut builder_complement = CodePointInversionListBuilder::new();
73
74        for range in data.iter_ranges() {
75            builder.add_range32(&range)
76        }
77
78        for range in data.iter_ranges_complemented() {
79            builder_complement.add_range32(&range)
80        }
81
82        builder.complement();
83        let set1 = builder.build();
84        let set2 = builder_complement.build();
85        assert_eq!(set1, set2, "Set {name} failed to complement correctly");
86    }
87
88    fn test_map<T: TrieValue + Debug>(
89        data: &CodePointMapDataBorrowed<'static, T>,
90        value: T,
91        name: &str,
92    ) {
93        let mut builder = CodePointInversionListBuilder::new();
94        let mut builder_complement = CodePointInversionListBuilder::new();
95
96        for range in data.iter_ranges_for_value(value) {
97            builder.add_range32(&range)
98        }
99
100        for range in data.iter_ranges_for_value_complemented(value) {
101            builder_complement.add_range32(&range)
102        }
103
104        builder.complement();
105        let set1 = builder.build();
106        let set2 = builder_complement.build();
107        assert_eq!(
108            set1, set2,
109            "Map {name} failed to complement correctly with value {value:?}"
110        );
111    }
112
113    #[test]
114    fn test_complement_sets() {
115        // Stress test the RangeListIteratorComplementer logic by ensuring it works for
116        // a whole bunch of binary properties
117        test_set(sets::ascii_hex_digit(), "ASCII_Hex_Digit");
118        test_set(sets::alnum(), "Alnum");
119        test_set(sets::alphabetic(), "Alphabetic");
120        test_set(sets::bidi_control(), "Bidi_Control");
121        test_set(sets::bidi_mirrored(), "Bidi_Mirrored");
122        test_set(sets::blank(), "Blank");
123        test_set(sets::cased(), "Cased");
124        test_set(sets::case_ignorable(), "Case_Ignorable");
125        test_set(
126            sets::full_composition_exclusion(),
127            "Full_Composition_Exclusion",
128        );
129        test_set(sets::changes_when_casefolded(), "Changes_When_Casefolded");
130        test_set(sets::changes_when_casemapped(), "Changes_When_Casemapped");
131        test_set(
132            sets::changes_when_nfkc_casefolded(),
133            "Changes_When_NFKC_Casefolded",
134        );
135        test_set(sets::changes_when_lowercased(), "Changes_When_Lowercased");
136        test_set(sets::changes_when_titlecased(), "Changes_When_Titlecased");
137        test_set(sets::changes_when_uppercased(), "Changes_When_Uppercased");
138        test_set(sets::dash(), "Dash");
139        test_set(sets::deprecated(), "Deprecated");
140        test_set(
141            sets::default_ignorable_code_point(),
142            "Default_Ignorable_Code_Point",
143        );
144        test_set(sets::diacritic(), "Diacritic");
145        test_set(sets::emoji_modifier_base(), "Emoji_Modifier_Base");
146        test_set(sets::emoji_component(), "Emoji_Component");
147        test_set(sets::emoji_modifier(), "Emoji_Modifier");
148        test_set(sets::emoji(), "Emoji");
149        test_set(sets::emoji_presentation(), "Emoji_Presentation");
150        test_set(sets::extender(), "Extender");
151        test_set(sets::extended_pictographic(), "Extended_Pictographic");
152        test_set(sets::graph(), "Graph");
153        test_set(sets::grapheme_base(), "Grapheme_Base");
154        test_set(sets::grapheme_extend(), "Grapheme_Extend");
155        test_set(sets::grapheme_link(), "Grapheme_Link");
156        test_set(sets::hex_digit(), "Hex_Digit");
157        test_set(sets::hyphen(), "Hyphen");
158        test_set(sets::id_continue(), "Id_Continue");
159        test_set(sets::ideographic(), "Ideographic");
160        test_set(sets::id_start(), "Id_Start");
161        test_set(sets::ids_binary_operator(), "Ids_Binary_Operator");
162        test_set(sets::ids_trinary_operator(), "Ids_Trinary_Operator");
163        test_set(sets::join_control(), "Join_Control");
164        test_set(sets::logical_order_exception(), "Logical_Order_Exception");
165        test_set(sets::lowercase(), "Lowercase");
166        test_set(sets::math(), "Math");
167        test_set(sets::noncharacter_code_point(), "Noncharacter_Code_Point");
168        test_set(sets::nfc_inert(), "NFC_Inert");
169        test_set(sets::nfd_inert(), "NFD_Inert");
170        test_set(sets::nfkc_inert(), "NFKC_Inert");
171        test_set(sets::nfkd_inert(), "NFKD_Inert");
172        test_set(sets::pattern_syntax(), "Pattern_Syntax");
173        test_set(sets::pattern_white_space(), "Pattern_White_Space");
174        test_set(
175            sets::prepended_concatenation_mark(),
176            "Prepended_Concatenation_Mark",
177        );
178        test_set(sets::print(), "Print");
179        test_set(sets::quotation_mark(), "Quotation_Mark");
180        test_set(sets::radical(), "Radical");
181        test_set(sets::regional_indicator(), "Regional_Indicator");
182        test_set(sets::soft_dotted(), "Soft_Dotted");
183        test_set(sets::segment_starter(), "Segment_Starter");
184        test_set(sets::case_sensitive(), "Case_Sensitive");
185        test_set(sets::sentence_terminal(), "Sentence_Terminal");
186        test_set(sets::terminal_punctuation(), "Terminal_Punctuation");
187        test_set(sets::unified_ideograph(), "Unified_Ideograph");
188        test_set(sets::uppercase(), "Uppercase");
189        test_set(sets::variation_selector(), "Variation_Selector");
190        test_set(sets::white_space(), "White_Space");
191        test_set(sets::xdigit(), "Xdigit");
192        test_set(sets::xid_continue(), "XID_Continue");
193        test_set(sets::xid_start(), "XID_Start");
194    }
195
196    #[test]
197    fn test_complement_maps() {
198        let gc = maps::general_category();
199        let script = maps::script();
200        test_map(&gc, GeneralCategory::UppercaseLetter, "gc");
201        test_map(&gc, GeneralCategory::OtherPunctuation, "gc");
202        test_map(&script, Script::Devanagari, "script");
203        test_map(&script, Script::Latin, "script");
204        test_map(&script, Script::Common, "script");
205    }
206}