utf16_iter/
report.rs
1use crate::in_inclusive_range16;
18use core::fmt::Formatter;
19use core::iter::FusedIterator;
20
21#[derive(Debug, PartialEq)]
33#[non_exhaustive]
34pub struct Utf16CharsError;
35
36impl core::fmt::Display for Utf16CharsError {
37 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> {
38 write!(f, "unpaired surrogate")
39 }
40}
41
42#[derive(Debug, Clone)]
46pub struct ErrorReportingUtf16Chars<'a> {
47 remaining: &'a [u16],
48}
49
50impl<'a> ErrorReportingUtf16Chars<'a> {
51 #[inline(always)]
52 pub fn new(code_units: &'a [u16]) -> Self {
54 ErrorReportingUtf16Chars::<'a> {
55 remaining: code_units,
56 }
57 }
58
59 #[inline(always)]
62 pub fn as_slice(&self) -> &'a [u16] {
63 self.remaining
64 }
65
66 #[inline(never)]
67 fn surrogate_next(&mut self, surrogate_base: u16, first: u16) -> Result<char, Utf16CharsError> {
68 if surrogate_base <= (0xDBFF - 0xD800) {
69 if let Some((&low, tail_tail)) = self.remaining.split_first() {
70 if in_inclusive_range16(low, 0xDC00, 0xDFFF) {
71 self.remaining = tail_tail;
72 return Ok(unsafe {
73 char::from_u32_unchecked(
74 (u32::from(first) << 10) + u32::from(low)
75 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
76 )
77 });
78 }
79 }
80 }
81 Err(Utf16CharsError)
82 }
83
84 #[inline(never)]
85 fn surrogate_next_back(&mut self, last: u16) -> Result<char, Utf16CharsError> {
86 if in_inclusive_range16(last, 0xDC00, 0xDFFF) {
87 if let Some((&high, head_head)) = self.remaining.split_last() {
88 if in_inclusive_range16(high, 0xD800, 0xDBFF) {
89 self.remaining = head_head;
90 return Ok(unsafe {
91 char::from_u32_unchecked(
92 (u32::from(high) << 10) + u32::from(last)
93 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
94 )
95 });
96 }
97 }
98 }
99 Err(Utf16CharsError)
100 }
101}
102
103impl<'a> Iterator for ErrorReportingUtf16Chars<'a> {
104 type Item = Result<char, Utf16CharsError>;
105
106 #[inline(always)]
107 fn next(&mut self) -> Option<Result<char, Utf16CharsError>> {
108 let (&first, tail) = self.remaining.split_first()?;
114 self.remaining = tail;
115 let surrogate_base = first.wrapping_sub(0xD800);
116 if surrogate_base > (0xDFFF - 0xD800) {
117 return Some(Ok(unsafe { char::from_u32_unchecked(u32::from(first)) }));
118 }
119 Some(self.surrogate_next(surrogate_base, first))
120 }
121}
122
123impl<'a> DoubleEndedIterator for ErrorReportingUtf16Chars<'a> {
124 #[inline(always)]
125 fn next_back(&mut self) -> Option<Result<char, Utf16CharsError>> {
126 let (&last, head) = self.remaining.split_last()?;
127 self.remaining = head;
128 if !in_inclusive_range16(last, 0xD800, 0xDFFF) {
129 return Some(Ok(unsafe { char::from_u32_unchecked(u32::from(last)) }));
130 }
131 Some(self.surrogate_next_back(last))
132 }
133}
134
135impl FusedIterator for ErrorReportingUtf16Chars<'_> {}
136
137#[cfg(test)]
138mod tests {
139 use crate::ErrorReportingUtf16Chars;
140 use crate::Utf16CharsEx;
141
142 #[test]
143 fn test_boundaries() {
144 assert!(ErrorReportingUtf16Chars::new([0xD7FFu16].as_slice())
145 .map(|r| r.unwrap_or('\u{FFFD}'))
146 .eq(core::iter::once('\u{D7FF}')));
147 assert!(ErrorReportingUtf16Chars::new([0xE000u16].as_slice())
148 .map(|r| r.unwrap_or('\u{FFFD}'))
149 .eq(core::iter::once('\u{E000}')));
150 assert!(ErrorReportingUtf16Chars::new([0xD800u16].as_slice())
151 .map(|r| r.unwrap_or('\u{FFFD}'))
152 .eq(core::iter::once('\u{FFFD}')));
153 assert!(ErrorReportingUtf16Chars::new([0xDFFFu16].as_slice())
154 .map(|r| r.unwrap_or('\u{FFFD}'))
155 .eq(core::iter::once('\u{FFFD}')));
156 }
157
158 #[test]
159 fn test_unpaired() {
160 assert!(
161 ErrorReportingUtf16Chars::new([0xD800u16, 0x0061u16].as_slice())
162 .map(|r| r.unwrap_or('\u{FFFD}'))
163 .eq([0xFFFDu16, 0x0061u16].as_slice().chars())
164 );
165 assert!(
166 ErrorReportingUtf16Chars::new([0xDFFFu16, 0x0061u16].as_slice())
167 .map(|r| r.unwrap_or('\u{FFFD}'))
168 .eq([0xFFFDu16, 0x0061u16].as_slice().chars())
169 );
170 }
171
172 #[test]
173 fn test_unpaired_rev() {
174 assert!(
175 ErrorReportingUtf16Chars::new([0xD800u16, 0x0061u16].as_slice())
176 .rev()
177 .map(|r| r.unwrap_or('\u{FFFD}'))
178 .eq([0xFFFDu16, 0x0061u16].as_slice().chars().rev())
179 );
180 assert!(
181 ErrorReportingUtf16Chars::new([0xDFFFu16, 0x0061u16].as_slice())
182 .rev()
183 .map(|r| r.unwrap_or('\u{FFFD}'))
184 .eq([0xFFFDu16, 0x0061u16].as_slice().chars().rev())
185 );
186 }
187
188 #[test]
189 fn test_paired() {
190 assert!(
191 ErrorReportingUtf16Chars::new([0xD83Eu16, 0xDD73u16].as_slice())
192 .map(|r| r.unwrap_or('\u{FFFD}'))
193 .eq(core::iter::once('🥳'))
194 );
195 }
196
197 #[test]
198 fn test_paired_rev() {
199 assert!(
200 ErrorReportingUtf16Chars::new([0xD83Eu16, 0xDD73u16].as_slice())
201 .rev()
202 .map(|r| r.unwrap_or('\u{FFFD}'))
203 .eq(core::iter::once('🥳'))
204 );
205 }
206
207 #[test]
208 fn test_as_slice() {
209 let mut iter = ErrorReportingUtf16Chars::new([0x0061u16, 0x0062u16].as_slice());
210 let at_start = iter.as_slice();
211 assert_eq!(iter.next(), Some(Ok('a')));
212 let in_middle = iter.as_slice();
213 assert_eq!(iter.next(), Some(Ok('b')));
214 let at_end = iter.as_slice();
215 assert_eq!(at_start.len(), 2);
216 assert_eq!(in_middle.len(), 1);
217 assert_eq!(at_end.len(), 0);
218 assert_eq!(at_start[0], 0x0061u16);
219 assert_eq!(at_start[1], 0x0062u16);
220 assert_eq!(in_middle[0], 0x0062u16);
221 }
222
223 #[test]
225 fn test_size() {
226 assert_eq!(
227 core::mem::size_of::<Option<<ErrorReportingUtf16Chars<'_> as Iterator>::Item>>(),
228 core::mem::size_of::<Option<char>>()
229 );
230 }
231}