utf16_iter/
lib.rs
1#![no_std]
18
19mod indices;
27mod report;
28
29pub use crate::indices::Utf16CharIndices;
30pub use crate::report::ErrorReportingUtf16Chars;
31pub use crate::report::Utf16CharsError;
32use core::iter::FusedIterator;
33
34#[inline(always)]
35fn in_inclusive_range16(i: u16, start: u16, end: u16) -> bool {
36 i.wrapping_sub(start) <= (end - start)
37}
38
39#[derive(Debug, Clone)]
42pub struct Utf16Chars<'a> {
43 remaining: &'a [u16],
44}
45
46impl<'a> Utf16Chars<'a> {
47 #[inline(always)]
48 pub fn new(code_units: &'a [u16]) -> Self {
50 Utf16Chars::<'a> {
51 remaining: code_units,
52 }
53 }
54
55 #[inline(always)]
58 pub fn as_slice(&self) -> &'a [u16] {
59 self.remaining
60 }
61
62 #[inline(never)]
63 fn surrogate_next(&mut self, surrogate_base: u16, first: u16) -> char {
64 if surrogate_base <= (0xDBFF - 0xD800) {
65 if let Some((&low, tail_tail)) = self.remaining.split_first() {
66 if in_inclusive_range16(low, 0xDC00, 0xDFFF) {
67 self.remaining = tail_tail;
68 return unsafe {
69 char::from_u32_unchecked(
70 (u32::from(first) << 10) + u32::from(low)
71 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
72 )
73 };
74 }
75 }
76 }
77 '\u{FFFD}'
78 }
79
80 #[inline(never)]
81 fn surrogate_next_back(&mut self, last: u16) -> char {
82 if in_inclusive_range16(last, 0xDC00, 0xDFFF) {
83 if let Some((&high, head_head)) = self.remaining.split_last() {
84 if in_inclusive_range16(high, 0xD800, 0xDBFF) {
85 self.remaining = head_head;
86 return unsafe {
87 char::from_u32_unchecked(
88 (u32::from(high) << 10) + u32::from(last)
89 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
90 )
91 };
92 }
93 }
94 }
95 '\u{FFFD}'
96 }
97}
98
99impl<'a> Iterator for Utf16Chars<'a> {
100 type Item = char;
101
102 #[inline(always)]
103 fn next(&mut self) -> Option<char> {
104 let (&first, tail) = self.remaining.split_first()?;
110 self.remaining = tail;
111 let surrogate_base = first.wrapping_sub(0xD800);
112 if surrogate_base > (0xDFFF - 0xD800) {
113 return Some(unsafe { char::from_u32_unchecked(u32::from(first)) });
114 }
115 Some(self.surrogate_next(surrogate_base, first))
116 }
117}
118
119impl<'a> DoubleEndedIterator for Utf16Chars<'a> {
120 #[inline(always)]
121 fn next_back(&mut self) -> Option<char> {
122 let (&last, head) = self.remaining.split_last()?;
123 self.remaining = head;
124 if !in_inclusive_range16(last, 0xD800, 0xDFFF) {
125 return Some(unsafe { char::from_u32_unchecked(u32::from(last)) });
126 }
127 Some(self.surrogate_next_back(last))
128 }
129}
130
131impl FusedIterator for Utf16Chars<'_> {}
132
133pub trait Utf16CharsEx {
136 fn chars(&self) -> Utf16Chars<'_>;
137 fn char_indices(&self) -> Utf16CharIndices<'_>;
138}
139
140impl Utf16CharsEx for [u16] {
141 #[inline]
144 fn chars(&self) -> Utf16Chars<'_> {
145 Utf16Chars::new(self)
146 }
147 #[inline]
150 fn char_indices(&self) -> Utf16CharIndices<'_> {
151 Utf16CharIndices::new(self)
152 }
153}
154
155#[cfg(test)]
156mod tests {
157 use crate::Utf16CharsEx;
158
159 #[test]
160 fn test_boundaries() {
161 assert!([0xD7FFu16]
162 .as_slice()
163 .chars()
164 .eq(core::iter::once('\u{D7FF}')));
165 assert!([0xE000u16]
166 .as_slice()
167 .chars()
168 .eq(core::iter::once('\u{E000}')));
169 assert!([0xD800u16]
170 .as_slice()
171 .chars()
172 .eq(core::iter::once('\u{FFFD}')));
173 assert!([0xDFFFu16]
174 .as_slice()
175 .chars()
176 .eq(core::iter::once('\u{FFFD}')));
177 }
178
179 #[test]
180 fn test_unpaired() {
181 assert!([0xD800u16, 0x0061u16]
182 .as_slice()
183 .chars()
184 .eq([0xFFFDu16, 0x0061u16].as_slice().chars()));
185 assert!([0xDFFFu16, 0x0061u16]
186 .as_slice()
187 .chars()
188 .eq([0xFFFDu16, 0x0061u16].as_slice().chars()));
189 }
190
191 #[test]
192 fn test_unpaired_rev() {
193 assert!([0xD800u16, 0x0061u16]
194 .as_slice()
195 .chars()
196 .rev()
197 .eq([0xFFFDu16, 0x0061u16].as_slice().chars().rev()));
198 assert!([0xDFFFu16, 0x0061u16]
199 .as_slice()
200 .chars()
201 .rev()
202 .eq([0xFFFDu16, 0x0061u16].as_slice().chars().rev()));
203 }
204
205 #[test]
206 fn test_paired() {
207 assert!([0xD83Eu16, 0xDD73u16]
208 .as_slice()
209 .chars()
210 .eq(core::iter::once('🥳')));
211 }
212
213 #[test]
214 fn test_paired_rev() {
215 assert!([0xD83Eu16, 0xDD73u16]
216 .as_slice()
217 .chars()
218 .rev()
219 .eq(core::iter::once('🥳')));
220 }
221
222 #[test]
223 fn test_as_slice() {
224 let mut iter = [0x0061u16, 0x0062u16].as_slice().chars();
225 let at_start = iter.as_slice();
226 assert_eq!(iter.next(), Some('a'));
227 let in_middle = iter.as_slice();
228 assert_eq!(iter.next(), Some('b'));
229 let at_end = iter.as_slice();
230 assert_eq!(at_start.len(), 2);
231 assert_eq!(in_middle.len(), 1);
232 assert_eq!(at_end.len(), 0);
233 assert_eq!(at_start[0], 0x0061u16);
234 assert_eq!(at_start[1], 0x0062u16);
235 assert_eq!(in_middle[0], 0x0062u16);
236 }
237}