1#![cfg_attr(not(any(test, feature = "std")), no_std)]
7#![cfg_attr(
8 not(test),
9 deny(
10 clippy::indexing_slicing,
11 clippy::unwrap_used,
12 clippy::expect_used,
13 clippy::panic,
14 clippy::exhaustive_structs,
15 clippy::exhaustive_enums,
16 missing_debug_implementations,
17 )
18)]
19#![warn(missing_docs)]
20
21extern crate alloc;
69
70mod error;
71pub mod properties;
72pub mod provider;
73pub mod uts46;
74
75pub use crate::error::NormalizerError;
76
77#[doc(no_inline)]
78pub use NormalizerError as Error;
79
80use crate::provider::CanonicalDecompositionDataV1Marker;
81use crate::provider::CompatibilityDecompositionSupplementV1Marker;
82use crate::provider::DecompositionDataV1;
83use crate::provider::Uts46DecompositionSupplementV1Marker;
84use alloc::string::String;
85use alloc::vec::Vec;
86use core::char::REPLACEMENT_CHARACTER;
87use core::str::from_utf8_unchecked;
88use icu_collections::char16trie::Char16Trie;
89use icu_collections::char16trie::Char16TrieIterator;
90use icu_collections::char16trie::TrieResult;
91use icu_collections::codepointtrie::CodePointTrie;
92use icu_properties::CanonicalCombiningClass;
93use icu_provider::prelude::*;
94use provider::CanonicalCompositionsV1Marker;
95use provider::CanonicalDecompositionTablesV1Marker;
96use provider::CompatibilityDecompositionTablesV1Marker;
97use provider::DecompositionSupplementV1;
98use provider::DecompositionTablesV1;
99use smallvec::SmallVec;
100use utf16_iter::Utf16CharsEx;
101use utf8_iter::Utf8CharsEx;
102use write16::Write16;
103use zerofrom::ZeroFrom;
104use zerovec::{zeroslice, ZeroSlice};
105
106#[derive(Debug)]
107enum SupplementPayloadHolder {
108 Compatibility(DataPayload<CompatibilityDecompositionSupplementV1Marker>),
109 Uts46(DataPayload<Uts46DecompositionSupplementV1Marker>),
110}
111
112impl SupplementPayloadHolder {
113 fn get(&self) -> &DecompositionSupplementV1 {
114 match self {
115 SupplementPayloadHolder::Compatibility(d) => d.get(),
116 SupplementPayloadHolder::Uts46(d) => d.get(),
117 }
118 }
119}
120
121#[derive(Debug, PartialEq, Eq)]
123enum IgnorableBehavior {
124 Unsupported,
126 Ignored,
128 ReplacementCharacter,
131}
132
133const UTF16_FAST_PATH_FLUSH_THRESHOLD: usize = 4096;
144
145const IGNORABLE_MARKER: u32 = 0xFFFFFFFF;
147
148const BACKWARD_COMBINING_STARTER_MARKER: u32 = 1;
152
153const SPECIAL_NON_STARTER_DECOMPOSITION_MARKER: u32 = 2;
157
158const SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16: u16 = 2;
160
161const NON_ROUND_TRIP_MARKER: u16 = 1;
164
165fn trie_value_has_ccc(trie_value: u32) -> bool {
168 (trie_value & 0xFFFFFF00) == 0xD800
169}
170
171fn trie_value_indicates_special_non_starter_decomposition(trie_value: u32) -> bool {
173 trie_value == SPECIAL_NON_STARTER_DECOMPOSITION_MARKER
174}
175
176fn decomposition_starts_with_non_starter(trie_value: u32) -> bool {
179 trie_value_has_ccc(trie_value)
180 || trie_value_indicates_special_non_starter_decomposition(trie_value)
181}
182
183fn ccc_from_trie_value(trie_value: u32) -> CanonicalCombiningClass {
190 if trie_value_has_ccc(trie_value) {
191 CanonicalCombiningClass(trie_value as u8)
192 } else {
193 debug_assert_ne!(trie_value, SPECIAL_NON_STARTER_DECOMPOSITION_MARKER);
194 CanonicalCombiningClass::NotReordered
195 }
196}
197
198static FDFA_NFKD: [u16; 17] = [
201 0x644, 0x649, 0x20, 0x627, 0x644, 0x644, 0x647, 0x20, 0x639, 0x644, 0x64A, 0x647, 0x20, 0x648,
202 0x633, 0x644, 0x645,
203];
204
205const FDFA_MARKER: u16 = 3;
207
208const HANGUL_S_BASE: u32 = 0xAC00;
211const HANGUL_L_BASE: u32 = 0x1100;
213const HANGUL_V_BASE: u32 = 0x1161;
215const HANGUL_T_BASE: u32 = 0x11A7;
217const HANGUL_L_COUNT: u32 = 19;
219const HANGUL_V_COUNT: u32 = 21;
221const HANGUL_T_COUNT: u32 = 28;
223const HANGUL_N_COUNT: u32 = 588;
225const HANGUL_S_COUNT: u32 = 11172;
227
228const HANGUL_JAMO_LIMIT: u32 = 0x1200;
230
231#[inline(always)]
237fn unwrap_or_gigo<T>(opt: Option<T>, default: T) -> T {
238 if let Some(val) = opt {
239 val
240 } else {
241 debug_assert!(false);
243 default
244 }
245}
246
247#[inline(always)]
249fn char_from_u32(u: u32) -> char {
250 unwrap_or_gigo(core::char::from_u32(u), REPLACEMENT_CHARACTER)
251}
252
253#[inline(always)]
255fn char_from_u16(u: u16) -> char {
256 char_from_u32(u32::from(u))
257}
258
259const EMPTY_U16: &ZeroSlice<u16> = zeroslice![];
260
261const EMPTY_CHAR: &ZeroSlice<char> = zeroslice![];
262
263#[inline(always)]
264fn in_inclusive_range(c: char, start: char, end: char) -> bool {
265 u32::from(c).wrapping_sub(u32::from(start)) <= (u32::from(end) - u32::from(start))
266}
267
268#[inline(always)]
269fn in_inclusive_range32(u: u32, start: u32, end: u32) -> bool {
270 u.wrapping_sub(start) <= (end - start)
271}
272
273#[inline(always)]
274fn in_inclusive_range16(u: u16, start: u16, end: u16) -> bool {
275 u.wrapping_sub(start) <= (end - start)
276}
277
278#[inline]
282fn compose(iter: Char16TrieIterator, starter: char, second: char) -> Option<char> {
283 let v = u32::from(second).wrapping_sub(HANGUL_V_BASE);
284 if v >= HANGUL_JAMO_LIMIT - HANGUL_V_BASE {
285 return compose_non_hangul(iter, starter, second);
286 }
287 if v < HANGUL_V_COUNT {
288 let l = u32::from(starter).wrapping_sub(HANGUL_L_BASE);
289 if l < HANGUL_L_COUNT {
290 let lv = l * HANGUL_N_COUNT + v * HANGUL_T_COUNT;
291 return Some(unsafe { char::from_u32_unchecked(HANGUL_S_BASE + lv) });
293 }
294 return None;
295 }
296 if in_inclusive_range(second, '\u{11A8}', '\u{11C2}') {
297 let lv = u32::from(starter).wrapping_sub(HANGUL_S_BASE);
298 if lv < HANGUL_S_COUNT && lv % HANGUL_T_COUNT == 0 {
299 let lvt = lv + (u32::from(second) - HANGUL_T_BASE);
300 return Some(unsafe { char::from_u32_unchecked(HANGUL_S_BASE + lvt) });
302 }
303 }
304 None
305}
306
307fn compose_non_hangul(mut iter: Char16TrieIterator, starter: char, second: char) -> Option<char> {
311 match iter.next(second) {
316 TrieResult::NoMatch => None,
317 TrieResult::NoValue => match iter.next(starter) {
318 TrieResult::NoMatch => None,
319 TrieResult::FinalValue(i) => {
320 if let Some(c) = char::from_u32(i as u32) {
321 Some(c)
322 } else {
323 debug_assert!(false);
325 None
326 }
327 }
328 TrieResult::NoValue | TrieResult::Intermediate(_) => {
329 debug_assert!(false);
331 None
332 }
333 },
334 TrieResult::FinalValue(_) | TrieResult::Intermediate(_) => {
335 debug_assert!(false);
337 None
338 }
339 }
340}
341
342#[derive(Debug, PartialEq, Eq)]
347struct CharacterAndTrieValue {
348 character: char,
349 trie_val: u32,
350 from_supplement: bool,
351}
352
353impl CharacterAndTrieValue {
354 #[inline(always)]
355 pub fn new(c: char, trie_value: u32) -> Self {
356 CharacterAndTrieValue {
357 character: c,
358 trie_val: trie_value,
359 from_supplement: false,
360 }
361 }
362 #[inline(always)]
363 pub fn new_from_supplement(c: char, trie_value: u32) -> Self {
364 CharacterAndTrieValue {
365 character: c,
366 trie_val: trie_value,
367 from_supplement: true,
368 }
369 }
370 #[inline(always)]
371 pub fn starter_and_decomposes_to_self(&self) -> bool {
372 if self.trie_val > BACKWARD_COMBINING_STARTER_MARKER {
373 return false;
374 }
375 u32::from(self.character).wrapping_sub(HANGUL_S_BASE) >= HANGUL_S_COUNT
377 }
378 #[inline(always)]
379 pub fn can_combine_backwards(&self) -> bool {
380 decomposition_starts_with_non_starter(self.trie_val)
381 || self.trie_val == BACKWARD_COMBINING_STARTER_MARKER
382 || in_inclusive_range32(self.trie_val, 0x1161, 0x11C2)
383 }
384 #[inline(always)]
385 pub fn potential_passthrough(&self) -> bool {
386 self.potential_passthrough_impl(BACKWARD_COMBINING_STARTER_MARKER)
387 }
388 #[inline(always)]
389 pub fn potential_passthrough_and_cannot_combine_backwards(&self) -> bool {
390 self.potential_passthrough_impl(0)
391 }
392 #[inline(always)]
393 fn potential_passthrough_impl(&self, bound: u32) -> bool {
394 if self.trie_val <= bound {
397 return true;
398 }
399 if self.from_supplement {
400 return false;
401 }
402 let trail_or_complex = (self.trie_val >> 16) as u16;
403 if trail_or_complex == 0 {
404 return false;
405 }
406 let lead = self.trie_val as u16;
407 if lead == 0 {
408 return true;
409 }
410 if lead == NON_ROUND_TRIP_MARKER {
411 return false;
412 }
413 if (trail_or_complex & 0x7F) == 0x3C
414 && in_inclusive_range16(trail_or_complex, 0x0900, 0x0BFF)
415 {
416 return false;
418 }
419 if in_inclusive_range(self.character, '\u{FB1D}', '\u{FB4E}') {
420 return false;
422 }
423 if in_inclusive_range(self.character, '\u{1F71}', '\u{1FFB}') {
424 return false;
426 }
427 true
436 }
437}
438
439#[derive(Debug)]
462struct CharacterAndClass(u32);
463
464impl CharacterAndClass {
465 pub fn new(c: char, ccc: CanonicalCombiningClass) -> Self {
466 CharacterAndClass(u32::from(c) | (u32::from(ccc.0) << 24))
467 }
468 pub fn new_with_placeholder(c: char) -> Self {
469 CharacterAndClass(u32::from(c) | ((0xFF) << 24))
470 }
471 pub fn new_with_trie_value(c_tv: CharacterAndTrieValue) -> Self {
472 Self::new(c_tv.character, ccc_from_trie_value(c_tv.trie_val))
473 }
474 pub fn new_starter(c: char) -> Self {
475 CharacterAndClass(u32::from(c))
476 }
477 pub fn character(&self) -> char {
478 unsafe { char::from_u32_unchecked(self.0 & 0xFFFFFF) }
481 }
482 pub fn ccc(&self) -> CanonicalCombiningClass {
483 CanonicalCombiningClass((self.0 >> 24) as u8)
484 }
485 pub fn character_and_ccc(&self) -> (char, CanonicalCombiningClass) {
486 (self.character(), self.ccc())
487 }
488 pub fn set_ccc_from_trie_if_not_already_set(&mut self, trie: &CodePointTrie<u32>) {
489 if self.0 >> 24 != 0xFF {
490 return;
491 }
492 let scalar = self.0 & 0xFFFFFF;
493 self.0 = ((ccc_from_trie_value(trie.get32_u32(scalar)).0 as u32) << 24) | scalar;
494 }
495}
496
497#[inline(always)]
499fn sort_slice_by_ccc(slice: &mut [CharacterAndClass], trie: &CodePointTrie<u32>) {
500 if slice.len() < 2 {
506 return;
507 }
508 slice
509 .iter_mut()
510 .for_each(|cc| cc.set_ccc_from_trie_if_not_already_set(trie));
511 slice.sort_by_key(|cc| cc.ccc());
512}
513
514#[derive(Debug)]
517pub struct Decomposition<'data, I>
518where
519 I: Iterator<Item = char>,
520{
521 delegate: I,
522 buffer: SmallVec<[CharacterAndClass; 17]>, buffer_pos: usize,
527 pending: Option<CharacterAndTrieValue>, trie: &'data CodePointTrie<'data, u32>,
533 supplementary_trie: Option<&'data CodePointTrie<'data, u32>>,
534 scalars16: &'data ZeroSlice<u16>,
535 scalars24: &'data ZeroSlice<char>,
536 supplementary_scalars16: &'data ZeroSlice<u16>,
537 supplementary_scalars24: &'data ZeroSlice<char>,
538 half_width_voicing_marks_become_non_starters: bool,
539 decomposition_passthrough_bound: u32, ignorable_behavior: IgnorableBehavior, }
546
547impl<'data, I> Decomposition<'data, I>
548where
549 I: Iterator<Item = char>,
550{
551 #[doc(hidden)]
561 pub fn new(
562 delegate: I,
563 decompositions: &'data DecompositionDataV1,
564 tables: &'data DecompositionTablesV1,
565 ) -> Self {
566 Self::new_with_supplements(
567 delegate,
568 decompositions,
569 None,
570 tables,
571 None,
572 0xC0,
573 IgnorableBehavior::Unsupported,
574 )
575 }
576
577 fn new_with_supplements(
584 delegate: I,
585 decompositions: &'data DecompositionDataV1,
586 supplementary_decompositions: Option<&'data DecompositionSupplementV1>,
587 tables: &'data DecompositionTablesV1,
588 supplementary_tables: Option<&'data DecompositionTablesV1>,
589 decomposition_passthrough_bound: u8,
590 ignorable_behavior: IgnorableBehavior,
591 ) -> Self {
592 let half_width_voicing_marks_become_non_starters =
593 if let Some(supplementary) = supplementary_decompositions {
594 supplementary.half_width_voicing_marks_become_non_starters()
595 } else {
596 false
597 };
598 let mut ret = Decomposition::<I> {
599 delegate,
600 buffer: SmallVec::new(), buffer_pos: 0,
602 pending: Some(CharacterAndTrieValue::new('\u{FFFF}', 0)),
605 trie: &decompositions.trie,
606 supplementary_trie: supplementary_decompositions.map(|s| &s.trie),
607 scalars16: &tables.scalars16,
608 scalars24: &tables.scalars24,
609 supplementary_scalars16: if let Some(supplementary) = supplementary_tables {
610 &supplementary.scalars16
611 } else {
612 EMPTY_U16
613 },
614 supplementary_scalars24: if let Some(supplementary) = supplementary_tables {
615 &supplementary.scalars24
616 } else {
617 EMPTY_CHAR
618 },
619 half_width_voicing_marks_become_non_starters,
620 decomposition_passthrough_bound: u32::from(decomposition_passthrough_bound),
621 ignorable_behavior,
622 };
623 let _ = ret.next(); ret
625 }
626
627 fn push_decomposition16(
628 &mut self,
629 low: u16,
630 offset: usize,
631 slice16: &ZeroSlice<u16>,
632 ) -> (char, usize) {
633 let len = usize::from(low >> 13) + 2;
634 let (starter, tail) = slice16
635 .get_subslice(offset..offset + len)
636 .and_then(|slice| slice.split_first())
637 .map_or_else(
638 || {
639 debug_assert!(false);
641 (REPLACEMENT_CHARACTER, EMPTY_U16)
642 },
643 |(first, trail)| (char_from_u16(first), trail),
644 );
645 if low & 0x1000 != 0 {
646 self.buffer.extend(
648 tail.iter()
649 .map(|u| CharacterAndClass::new_with_placeholder(char_from_u16(u))),
650 );
651 (starter, 0)
652 } else {
653 let mut i = 0;
654 let mut combining_start = 0;
655 for u in tail.iter() {
656 let ch = char_from_u16(u);
657 let trie_value = self.trie.get(ch);
658 self.buffer.push(CharacterAndClass::new_with_trie_value(
659 CharacterAndTrieValue::new(ch, trie_value),
660 ));
661 i += 1;
662 if !decomposition_starts_with_non_starter(trie_value) {
665 combining_start = i;
666 }
667 }
668 (starter, combining_start)
669 }
670 }
671
672 fn push_decomposition32(
673 &mut self,
674 low: u16,
675 offset: usize,
676 slice32: &ZeroSlice<char>,
677 ) -> (char, usize) {
678 let len = usize::from(low >> 13) + 1;
679 let (starter, tail) = slice32
680 .get_subslice(offset..offset + len)
681 .and_then(|slice| slice.split_first())
682 .unwrap_or_else(|| {
683 debug_assert!(false);
685 (REPLACEMENT_CHARACTER, EMPTY_CHAR)
686 });
687 if low & 0x1000 != 0 {
688 self.buffer
690 .extend(tail.iter().map(CharacterAndClass::new_with_placeholder));
691 (starter, 0)
692 } else {
693 let mut i = 0;
694 let mut combining_start = 0;
695 for ch in tail.iter() {
696 let trie_value = self.trie.get(ch);
697 self.buffer.push(CharacterAndClass::new_with_trie_value(
698 CharacterAndTrieValue::new(ch, trie_value),
699 ));
700 i += 1;
701 if !decomposition_starts_with_non_starter(trie_value) {
704 combining_start = i;
705 }
706 }
707 (starter, combining_start)
708 }
709 }
710
711 #[inline(always)]
712 fn attach_trie_value(&self, c: char) -> CharacterAndTrieValue {
713 if let Some(supplementary) = self.supplementary_trie {
714 if let Some(value) = self.attach_supplementary_trie_value(c, supplementary) {
715 return value;
716 }
717 }
718
719 CharacterAndTrieValue::new(c, self.trie.get(c))
720 }
721
722 #[inline(never)]
723 fn attach_supplementary_trie_value(
724 &self,
725 c: char,
726 supplementary: &CodePointTrie<u32>,
727 ) -> Option<CharacterAndTrieValue> {
728 let voicing_mark = u32::from(c).wrapping_sub(0xFF9E);
729 if voicing_mark <= 1 && self.half_width_voicing_marks_become_non_starters {
730 return Some(CharacterAndTrieValue::new(
731 if voicing_mark == 0 {
732 '\u{3099}'
733 } else {
734 '\u{309A}'
735 },
736 0xD800 | u32::from(CanonicalCombiningClass::KanaVoicing.0),
737 ));
738 }
739 let trie_value = supplementary.get32(u32::from(c));
740 if trie_value != 0 {
741 return Some(CharacterAndTrieValue::new_from_supplement(c, trie_value));
742 }
743 None
744 }
745
746 fn delegate_next_no_pending(&mut self) -> Option<CharacterAndTrieValue> {
747 debug_assert!(self.pending.is_none());
748 loop {
749 let c = self.delegate.next()?;
750
751 if u32::from(c) < self.decomposition_passthrough_bound {
755 return Some(CharacterAndTrieValue::new(c, 0));
756 }
757
758 if let Some(supplementary) = self.supplementary_trie {
759 if let Some(value) = self.attach_supplementary_trie_value(c, supplementary) {
760 if value.trie_val == IGNORABLE_MARKER {
761 match self.ignorable_behavior {
762 IgnorableBehavior::Unsupported => {
763 debug_assert!(false);
764 }
765 IgnorableBehavior::ReplacementCharacter => {
766 return Some(CharacterAndTrieValue::new(
767 c,
768 u32::from(REPLACEMENT_CHARACTER),
769 ));
770 }
771 IgnorableBehavior::Ignored => {
772 continue;
774 }
775 }
776 }
777 return Some(value);
778 }
779 }
780 let trie_val = self.trie.get(c);
781 debug_assert_ne!(trie_val, IGNORABLE_MARKER);
782 return Some(CharacterAndTrieValue::new(c, trie_val));
783 }
784 }
785
786 fn delegate_next(&mut self) -> Option<CharacterAndTrieValue> {
787 if let Some(pending) = self.pending.take() {
788 Some(pending)
793 } else {
794 self.delegate_next_no_pending()
795 }
796 }
797
798 fn decomposing_next(&mut self, c_and_trie_val: CharacterAndTrieValue) -> char {
799 let (starter, combining_start) = {
800 let c = c_and_trie_val.character;
801 let hangul_offset = u32::from(c).wrapping_sub(HANGUL_S_BASE); if hangul_offset >= HANGUL_S_COUNT {
803 let decomposition = c_and_trie_val.trie_val;
804 if decomposition <= BACKWARD_COMBINING_STARTER_MARKER {
805 (c, 0)
807 } else {
808 let trail_or_complex = (decomposition >> 16) as u16;
809 let lead = decomposition as u16;
810 if lead > NON_ROUND_TRIP_MARKER && trail_or_complex != 0 {
811 let starter = char_from_u16(lead);
813 let combining = char_from_u16(trail_or_complex);
814 self.buffer
815 .push(CharacterAndClass::new_with_placeholder(combining));
816 (starter, 0)
817 } else if lead > NON_ROUND_TRIP_MARKER {
818 if lead != FDFA_MARKER {
819 debug_assert_ne!(
820 lead, SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16,
821 "Should not reach this point with non-starter marker"
822 );
823 let starter = char_from_u16(lead);
825 (starter, 0)
826 } else {
827 self.buffer.extend(FDFA_NFKD.map(|u| {
829 CharacterAndClass::new_starter(unsafe {
832 core::char::from_u32_unchecked(u32::from(u))
833 })
834 }));
835 ('\u{0635}', 17)
836 }
837 } else {
838 let offset = usize::from(trail_or_complex & 0xFFF);
855 if offset < self.scalars16.len() {
856 self.push_decomposition16(trail_or_complex, offset, self.scalars16)
857 } else if offset < self.scalars16.len() + self.scalars24.len() {
858 self.push_decomposition32(
859 trail_or_complex,
860 offset - self.scalars16.len(),
861 self.scalars24,
862 )
863 } else if offset
864 < self.scalars16.len()
865 + self.scalars24.len()
866 + self.supplementary_scalars16.len()
867 {
868 self.push_decomposition16(
869 trail_or_complex,
870 offset - (self.scalars16.len() + self.scalars24.len()),
871 self.supplementary_scalars16,
872 )
873 } else {
874 self.push_decomposition32(
875 trail_or_complex,
876 offset
877 - (self.scalars16.len()
878 + self.scalars24.len()
879 + self.supplementary_scalars16.len()),
880 self.supplementary_scalars24,
881 )
882 }
883 }
884 }
885 } else {
886 let l = hangul_offset / HANGUL_N_COUNT;
889 let v = (hangul_offset % HANGUL_N_COUNT) / HANGUL_T_COUNT;
890 let t = hangul_offset % HANGUL_T_COUNT;
891
892 self.buffer.push(CharacterAndClass::new_starter(unsafe {
896 core::char::from_u32_unchecked(HANGUL_V_BASE + v)
897 }));
898 let first = unsafe { core::char::from_u32_unchecked(HANGUL_L_BASE + l) };
899 if t != 0 {
900 self.buffer.push(CharacterAndClass::new_starter(unsafe {
901 core::char::from_u32_unchecked(HANGUL_T_BASE + t)
902 }));
903 (first, 2)
904 } else {
905 (first, 1)
906 }
907 }
908 };
909 self.gather_and_sort_combining(combining_start);
912 starter
913 }
914
915 fn gather_and_sort_combining(&mut self, combining_start: usize) {
916 while let Some(ch_and_trie_val) = self.delegate_next() {
919 if trie_value_has_ccc(ch_and_trie_val.trie_val) {
920 self.buffer
921 .push(CharacterAndClass::new_with_trie_value(ch_and_trie_val));
922 } else if trie_value_indicates_special_non_starter_decomposition(
923 ch_and_trie_val.trie_val,
924 ) {
925 let mapped = match ch_and_trie_val.character {
927 '\u{0340}' => {
928 CharacterAndClass::new('\u{0300}', CanonicalCombiningClass::Above)
930 }
931 '\u{0341}' => {
932 CharacterAndClass::new('\u{0301}', CanonicalCombiningClass::Above)
934 }
935 '\u{0343}' => {
936 CharacterAndClass::new('\u{0313}', CanonicalCombiningClass::Above)
938 }
939 '\u{0344}' => {
940 self.buffer.push(CharacterAndClass::new(
942 '\u{0308}',
943 CanonicalCombiningClass::Above,
944 ));
945 CharacterAndClass::new('\u{0301}', CanonicalCombiningClass::Above)
946 }
947 '\u{0F73}' => {
948 self.buffer.push(CharacterAndClass::new(
950 '\u{0F71}',
951 CanonicalCombiningClass::CCC129,
952 ));
953 CharacterAndClass::new('\u{0F72}', CanonicalCombiningClass::CCC130)
954 }
955 '\u{0F75}' => {
956 self.buffer.push(CharacterAndClass::new(
958 '\u{0F71}',
959 CanonicalCombiningClass::CCC129,
960 ));
961 CharacterAndClass::new('\u{0F74}', CanonicalCombiningClass::CCC132)
962 }
963 '\u{0F81}' => {
964 self.buffer.push(CharacterAndClass::new(
966 '\u{0F71}',
967 CanonicalCombiningClass::CCC129,
968 ));
969 CharacterAndClass::new('\u{0F80}', CanonicalCombiningClass::CCC130)
970 }
971 _ => {
972 debug_assert!(false);
974 CharacterAndClass::new_with_placeholder(REPLACEMENT_CHARACTER)
975 }
976 };
977 self.buffer.push(mapped);
978 } else {
979 self.pending = Some(ch_and_trie_val);
980 break;
981 }
982 }
983 #[allow(clippy::indexing_slicing)]
986 sort_slice_by_ccc(&mut self.buffer[combining_start..], self.trie);
987 }
988}
989
990impl<'data, I> Iterator for Decomposition<'data, I>
991where
992 I: Iterator<Item = char>,
993{
994 type Item = char;
995
996 fn next(&mut self) -> Option<char> {
997 if let Some(ret) = self.buffer.get(self.buffer_pos).map(|c| c.character()) {
998 self.buffer_pos += 1;
999 if self.buffer_pos == self.buffer.len() {
1000 self.buffer.clear();
1001 self.buffer_pos = 0;
1002 }
1003 return Some(ret);
1004 }
1005 debug_assert_eq!(self.buffer_pos, 0);
1006 let c_and_trie_val = self.pending.take()?;
1007 Some(self.decomposing_next(c_and_trie_val))
1008 }
1009}
1010
1011#[derive(Debug)]
1014pub struct Composition<'data, I>
1015where
1016 I: Iterator<Item = char>,
1017{
1018 decomposition: Decomposition<'data, I>,
1021 canonical_compositions: Char16Trie<'data>,
1023 unprocessed_starter: Option<char>,
1028 composition_passthrough_bound: u32,
1034}
1035
1036impl<'data, I> Composition<'data, I>
1037where
1038 I: Iterator<Item = char>,
1039{
1040 fn new(
1041 decomposition: Decomposition<'data, I>,
1042 canonical_compositions: Char16Trie<'data>,
1043 composition_passthrough_bound: u16,
1044 ) -> Self {
1045 Self {
1046 decomposition,
1047 canonical_compositions,
1048 unprocessed_starter: None,
1049 composition_passthrough_bound: u32::from(composition_passthrough_bound),
1050 }
1051 }
1052
1053 #[inline(always)]
1057 pub fn compose(&self, starter: char, second: char) -> Option<char> {
1058 compose(self.canonical_compositions.iter(), starter, second)
1059 }
1060
1061 #[inline(always)]
1065 fn compose_non_hangul(&self, starter: char, second: char) -> Option<char> {
1066 compose_non_hangul(self.canonical_compositions.iter(), starter, second)
1067 }
1068}
1069
1070impl<'data, I> Iterator for Composition<'data, I>
1071where
1072 I: Iterator<Item = char>,
1073{
1074 type Item = char;
1075
1076 #[inline]
1077 fn next(&mut self) -> Option<char> {
1078 let mut undecomposed_starter = CharacterAndTrieValue::new('\u{0}', 0); if self.unprocessed_starter.is_none() {
1080 #[allow(clippy::never_loop)]
1082 loop {
1083 if let Some((character, ccc)) = self
1084 .decomposition
1085 .buffer
1086 .get(self.decomposition.buffer_pos)
1087 .map(|c| c.character_and_ccc())
1088 {
1089 self.decomposition.buffer_pos += 1;
1090 if self.decomposition.buffer_pos == self.decomposition.buffer.len() {
1091 self.decomposition.buffer.clear();
1092 self.decomposition.buffer_pos = 0;
1093 }
1094 if ccc == CanonicalCombiningClass::NotReordered {
1095 self.unprocessed_starter = Some(character);
1103 break; }
1105 return Some(character);
1106 }
1107 debug_assert_eq!(self.decomposition.buffer_pos, 0);
1108 undecomposed_starter = self.decomposition.pending.take()?;
1109 if u32::from(undecomposed_starter.character) < self.composition_passthrough_bound
1110 || undecomposed_starter.potential_passthrough()
1111 {
1112 if let Some(upcoming) = self.decomposition.delegate_next_no_pending() {
1117 let cannot_combine_backwards = u32::from(upcoming.character)
1118 < self.composition_passthrough_bound
1119 || !upcoming.can_combine_backwards();
1120 self.decomposition.pending = Some(upcoming);
1121 if cannot_combine_backwards {
1122 return Some(undecomposed_starter.character);
1124 }
1125 } else {
1126 return Some(undecomposed_starter.character);
1128 }
1129 }
1130 break; }
1132 }
1133 let mut starter = '\u{0}'; let mut attempt_composition = false;
1139 loop {
1140 if let Some(unprocessed) = self.unprocessed_starter.take() {
1141 debug_assert_eq!(undecomposed_starter, CharacterAndTrieValue::new('\u{0}', 0));
1142 debug_assert_eq!(starter, '\u{0}');
1143 starter = unprocessed;
1144 } else {
1145 debug_assert_eq!(self.decomposition.buffer_pos, 0);
1146 let next_starter = self.decomposition.decomposing_next(undecomposed_starter);
1147 if !attempt_composition {
1148 starter = next_starter;
1149 } else if let Some(composed) = self.compose(starter, next_starter) {
1150 starter = composed;
1151 } else {
1152 self.unprocessed_starter = Some(next_starter);
1155 return Some(starter);
1156 }
1157 }
1158 loop {
1161 let (character, ccc) = if let Some((character, ccc)) = self
1162 .decomposition
1163 .buffer
1164 .get(self.decomposition.buffer_pos)
1165 .map(|c| c.character_and_ccc())
1166 {
1167 (character, ccc)
1168 } else {
1169 self.decomposition.buffer.clear();
1170 self.decomposition.buffer_pos = 0;
1171 break;
1172 };
1173 if let Some(composed) = self.compose(starter, character) {
1174 starter = composed;
1175 self.decomposition.buffer_pos += 1;
1176 continue;
1177 }
1178 let mut most_recent_skipped_ccc = ccc;
1179 {
1180 let _ = self
1181 .decomposition
1182 .buffer
1183 .drain(0..self.decomposition.buffer_pos);
1184 }
1185 self.decomposition.buffer_pos = 0;
1186 if most_recent_skipped_ccc == CanonicalCombiningClass::NotReordered {
1187 return Some(starter);
1190 }
1191 let mut i = 1; while let Some((character, ccc)) = self
1193 .decomposition
1194 .buffer
1195 .get(i)
1196 .map(|c| c.character_and_ccc())
1197 {
1198 if ccc == CanonicalCombiningClass::NotReordered {
1199 return Some(starter);
1201 }
1202 debug_assert!(ccc >= most_recent_skipped_ccc);
1203 if ccc != most_recent_skipped_ccc {
1204 if let Some(composed) = self.compose_non_hangul(starter, character) {
1208 self.decomposition.buffer.remove(i);
1209 starter = composed;
1210 continue;
1211 }
1212 }
1213 most_recent_skipped_ccc = ccc;
1214 i += 1;
1215 }
1216 break;
1217 }
1218
1219 debug_assert_eq!(self.decomposition.buffer_pos, 0);
1220
1221 if !self.decomposition.buffer.is_empty() {
1222 return Some(starter);
1223 }
1224 #[allow(clippy::unwrap_used)]
1226 if self.decomposition.pending.is_some() {
1227 let pending = self.decomposition.pending.as_ref().unwrap();
1235 if u32::from(pending.character) < self.composition_passthrough_bound
1236 || !pending.can_combine_backwards()
1237 {
1238 return Some(starter);
1240 }
1241 undecomposed_starter = self.decomposition.pending.take().unwrap();
1244 attempt_composition = true;
1249 continue;
1250 }
1251 return Some(starter);
1253 }
1254 }
1255}
1256
1257macro_rules! composing_normalize_to {
1258 ($(#[$meta:meta])*,
1259 $normalize_to:ident,
1260 $write:path,
1261 $slice:ty,
1262 $prolog:block,
1263 $always_valid_utf:literal,
1264 $as_slice:ident,
1265 $fast:block,
1266 $text:ident,
1267 $sink:ident,
1268 $composition:ident,
1269 $composition_passthrough_bound:ident,
1270 $undecomposed_starter:ident,
1271 $pending_slice:ident,
1272 $len_utf:ident,
1273 ) => {
1274 $(#[$meta])*
1275 pub fn $normalize_to<W: $write + ?Sized>(
1276 &self,
1277 $text: $slice,
1278 $sink: &mut W,
1279 ) -> core::fmt::Result {
1280 $prolog
1281 let mut $composition = self.normalize_iter($text.chars());
1282 debug_assert_eq!($composition.decomposition.ignorable_behavior, IgnorableBehavior::Unsupported);
1283 for cc in $composition.decomposition.buffer.drain(..) {
1284 $sink.write_char(cc.character())?;
1285 }
1286
1287 let $composition_passthrough_bound = $composition.composition_passthrough_bound;
1289 'outer: loop {
1290 debug_assert_eq!($composition.decomposition.buffer_pos, 0);
1291 let mut $undecomposed_starter =
1292 if let Some(pending) = $composition.decomposition.pending.take() {
1293 pending
1294 } else {
1295 return Ok(());
1296 };
1297 #[allow(clippy::indexing_slicing)]
1300 if u32::from($undecomposed_starter.character) < $composition_passthrough_bound ||
1301 $undecomposed_starter.potential_passthrough()
1302 {
1303 if $always_valid_utf || $undecomposed_starter.character != REPLACEMENT_CHARACTER {
1307 let $pending_slice = &$text[$text.len() - $composition.decomposition.delegate.$as_slice().len() - $undecomposed_starter.character.$len_utf()..];
1308 $fast
1314 }
1315 }
1316 let mut starter = $composition
1318 .decomposition
1319 .decomposing_next($undecomposed_starter);
1320 'bufferloop: loop {
1321 loop {
1324 let (character, ccc) = if let Some((character, ccc)) = $composition
1325 .decomposition
1326 .buffer
1327 .get($composition.decomposition.buffer_pos)
1328 .map(|c| c.character_and_ccc())
1329 {
1330 (character, ccc)
1331 } else {
1332 $composition.decomposition.buffer.clear();
1333 $composition.decomposition.buffer_pos = 0;
1334 break;
1335 };
1336 if let Some(composed) = $composition.compose(starter, character) {
1337 starter = composed;
1338 $composition.decomposition.buffer_pos += 1;
1339 continue;
1340 }
1341 let mut most_recent_skipped_ccc = ccc;
1342 if most_recent_skipped_ccc == CanonicalCombiningClass::NotReordered {
1343 $sink.write_char(starter)?;
1348 starter = character;
1349 $composition.decomposition.buffer_pos += 1;
1350 continue 'bufferloop;
1351 } else {
1352 {
1353 let _ = $composition
1354 .decomposition
1355 .buffer
1356 .drain(0..$composition.decomposition.buffer_pos);
1357 }
1358 $composition.decomposition.buffer_pos = 0;
1359 }
1360 let mut i = 1; while let Some((character, ccc)) = $composition
1362 .decomposition
1363 .buffer
1364 .get(i)
1365 .map(|c| c.character_and_ccc())
1366 {
1367 if ccc == CanonicalCombiningClass::NotReordered {
1368 $sink.write_char(starter)?;
1370 for cc in $composition.decomposition.buffer.drain(..i) {
1371 $sink.write_char(cc.character())?;
1372 }
1373 starter = character;
1374 {
1375 let removed = $composition.decomposition.buffer.remove(0);
1376 debug_assert_eq!(starter, removed.character());
1377 }
1378 debug_assert_eq!($composition.decomposition.buffer_pos, 0);
1379 continue 'bufferloop;
1380 }
1381 debug_assert!(ccc >= most_recent_skipped_ccc);
1382 if ccc != most_recent_skipped_ccc {
1383 if let Some(composed) =
1387 $composition.compose_non_hangul(starter, character)
1388 {
1389 $composition.decomposition.buffer.remove(i);
1390 starter = composed;
1391 continue;
1392 }
1393 }
1394 most_recent_skipped_ccc = ccc;
1395 i += 1;
1396 }
1397 break;
1398 }
1399 debug_assert_eq!($composition.decomposition.buffer_pos, 0);
1400
1401 if !$composition.decomposition.buffer.is_empty() {
1402 $sink.write_char(starter)?;
1403 for cc in $composition.decomposition.buffer.drain(..) {
1404 $sink.write_char(cc.character())?;
1405 }
1406 continue 'outer;
1408 }
1409 if $composition.decomposition.pending.is_some() {
1411 let pending = $composition.decomposition.pending.as_ref().unwrap();
1419 if u32::from(pending.character) < $composition.composition_passthrough_bound
1420 || !pending.can_combine_backwards()
1421 {
1422 $sink.write_char(starter)?;
1424 continue 'outer;
1425 }
1426 let pending_starter = $composition.decomposition.pending.take().unwrap();
1427 let decomposed = $composition.decomposition.decomposing_next(pending_starter);
1428 if let Some(composed) = $composition.compose(starter, decomposed) {
1429 starter = composed;
1430 } else {
1431 $sink.write_char(starter)?;
1432 starter = decomposed;
1433 }
1434 continue 'bufferloop;
1435 }
1436 $sink.write_char(starter)?;
1438 return Ok(());
1439 } }
1441 }
1442 };
1443}
1444
1445macro_rules! decomposing_normalize_to {
1446 ($(#[$meta:meta])*,
1447 $normalize_to:ident,
1448 $write:path,
1449 $slice:ty,
1450 $prolog:block,
1451 $as_slice:ident,
1452 $fast:block,
1453 $text:ident,
1454 $sink:ident,
1455 $decomposition:ident,
1456 $decomposition_passthrough_bound:ident,
1457 $undecomposed_starter:ident,
1458 $pending_slice:ident,
1459 $outer:lifetime, ) => {
1461 $(#[$meta])*
1462 pub fn $normalize_to<W: $write + ?Sized>(
1463 &self,
1464 $text: $slice,
1465 $sink: &mut W,
1466 ) -> core::fmt::Result {
1467 $prolog
1468
1469 let mut $decomposition = self.normalize_iter($text.chars());
1470 debug_assert_eq!($decomposition.ignorable_behavior, IgnorableBehavior::Unsupported);
1471
1472 let $decomposition_passthrough_bound = $decomposition.decomposition_passthrough_bound;
1474 $outer: loop {
1475 for cc in $decomposition.buffer.drain(..) {
1476 $sink.write_char(cc.character())?;
1477 }
1478 debug_assert_eq!($decomposition.buffer_pos, 0);
1479 let mut $undecomposed_starter = if let Some(pending) = $decomposition.pending.take() {
1480 pending
1481 } else {
1482 return Ok(());
1483 };
1484 #[allow(clippy::indexing_slicing)]
1487 if $undecomposed_starter.starter_and_decomposes_to_self() {
1488 $sink.write_char($undecomposed_starter.character)?;
1491
1492 let $pending_slice = $decomposition.delegate.$as_slice();
1493 $fast
1494 }
1495 let starter = $decomposition.decomposing_next($undecomposed_starter);
1496 $sink.write_char(starter)?;
1497 }
1498 }
1499 };
1500}
1501
1502macro_rules! normalizer_methods {
1503 () => {
1504 pub fn normalize(&self, text: &str) -> String {
1506 let mut ret = String::new();
1507 ret.reserve(text.len());
1508 let _ = self.normalize_to(text, &mut ret);
1509 ret
1510 }
1511
1512 pub fn is_normalized(&self, text: &str) -> bool {
1514 let mut sink = IsNormalizedSinkStr::new(text);
1515 if self.normalize_to(text, &mut sink).is_err() {
1516 return false;
1517 }
1518 sink.finished()
1519 }
1520
1521 pub fn normalize_utf16(&self, text: &[u16]) -> Vec<u16> {
1526 let mut ret = Vec::new();
1527 let _ = self.normalize_utf16_to(text, &mut ret);
1528 ret
1529 }
1530
1531 pub fn is_normalized_utf16(&self, text: &[u16]) -> bool {
1535 let mut sink = IsNormalizedSinkUtf16::new(text);
1536 if self.normalize_utf16_to(text, &mut sink).is_err() {
1537 return false;
1538 }
1539 sink.finished()
1540 }
1541
1542 pub fn normalize_utf8(&self, text: &[u8]) -> String {
1547 let mut ret = String::new();
1548 ret.reserve(text.len());
1549 let _ = self.normalize_utf8_to(text, &mut ret);
1550 ret
1551 }
1552
1553 pub fn is_normalized_utf8(&self, text: &[u8]) -> bool {
1558 let mut sink = IsNormalizedSinkUtf8::new(text);
1559 if self.normalize_utf8_to(text, &mut sink).is_err() {
1560 return false;
1561 }
1562 sink.finished()
1563 }
1564 };
1565}
1566
1567#[derive(Debug)]
1569pub struct DecomposingNormalizer {
1570 decompositions: DataPayload<CanonicalDecompositionDataV1Marker>,
1571 supplementary_decompositions: Option<SupplementPayloadHolder>,
1572 tables: DataPayload<CanonicalDecompositionTablesV1Marker>,
1573 supplementary_tables: Option<DataPayload<CompatibilityDecompositionTablesV1Marker>>,
1574 decomposition_passthrough_bound: u8, composition_passthrough_bound: u16, }
1577
1578impl DecomposingNormalizer {
1579 #[cfg(feature = "compiled_data")]
1585 pub const fn new_nfd() -> Self {
1586 const _: () = assert!(
1587 crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1
1588 .scalars16
1589 .const_len()
1590 + crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1
1591 .scalars24
1592 .const_len()
1593 <= 0xFFF,
1594 "NormalizerError::FutureExtension"
1595 );
1596
1597 DecomposingNormalizer {
1598 decompositions: DataPayload::from_static_ref(
1599 crate::provider::Baked::SINGLETON_NORMALIZER_NFD_V1,
1600 ),
1601 supplementary_decompositions: None,
1602 tables: DataPayload::from_static_ref(
1603 crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1,
1604 ),
1605 supplementary_tables: None,
1606 decomposition_passthrough_bound: 0xC0,
1607 composition_passthrough_bound: 0x0300,
1608 }
1609 }
1610
1611 icu_provider::gen_any_buffer_data_constructors!(
1612 locale: skip,
1613 options: skip,
1614 error: NormalizerError,
1615 #[cfg(skip)]
1616 functions: [
1617 new_nfd,
1618 try_new_nfd_with_any_provider,
1619 try_new_nfd_with_buffer_provider,
1620 try_new_nfd_unstable,
1621 Self,
1622 ]
1623 );
1624
1625 #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_nfd)]
1626 pub fn try_new_nfd_unstable<D>(provider: &D) -> Result<Self, NormalizerError>
1627 where
1628 D: DataProvider<CanonicalDecompositionDataV1Marker>
1629 + DataProvider<CanonicalDecompositionTablesV1Marker>
1630 + ?Sized,
1631 {
1632 let decompositions: DataPayload<CanonicalDecompositionDataV1Marker> =
1633 provider.load(Default::default())?.take_payload()?;
1634 let tables: DataPayload<CanonicalDecompositionTablesV1Marker> =
1635 provider.load(Default::default())?.take_payload()?;
1636
1637 if tables.get().scalars16.len() + tables.get().scalars24.len() > 0xFFF {
1638 return Err(NormalizerError::FutureExtension);
1645 }
1646
1647 Ok(DecomposingNormalizer {
1648 decompositions,
1649 supplementary_decompositions: None,
1650 tables,
1651 supplementary_tables: None,
1652 decomposition_passthrough_bound: 0xC0,
1653 composition_passthrough_bound: 0x0300,
1654 })
1655 }
1656
1657 #[cfg(feature = "compiled_data")]
1663 pub const fn new_nfkd() -> Self {
1664 const _: () = assert!(
1665 crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1
1666 .scalars16
1667 .const_len()
1668 + crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1
1669 .scalars24
1670 .const_len()
1671 + crate::provider::Baked::SINGLETON_NORMALIZER_NFKDEX_V1
1672 .scalars16
1673 .const_len()
1674 + crate::provider::Baked::SINGLETON_NORMALIZER_NFKDEX_V1
1675 .scalars24
1676 .const_len()
1677 <= 0xFFF,
1678 "NormalizerError::FutureExtension"
1679 );
1680
1681 const _: () = assert!(
1682 crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_V1.passthrough_cap <= 0x0300,
1683 "NormalizerError::ValidationError"
1684 );
1685
1686 let decomposition_capped =
1687 if crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_V1.passthrough_cap < 0xC0 {
1688 crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_V1.passthrough_cap
1689 } else {
1690 0xC0
1691 };
1692 let composition_capped =
1693 if crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_V1.passthrough_cap < 0x0300 {
1694 crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_V1.passthrough_cap
1695 } else {
1696 0x0300
1697 };
1698
1699 DecomposingNormalizer {
1700 decompositions: DataPayload::from_static_ref(
1701 crate::provider::Baked::SINGLETON_NORMALIZER_NFD_V1,
1702 ),
1703 supplementary_decompositions: Some(SupplementPayloadHolder::Compatibility(
1704 DataPayload::from_static_ref(crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_V1),
1705 )),
1706 tables: DataPayload::from_static_ref(
1707 crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1,
1708 ),
1709 supplementary_tables: Some(DataPayload::from_static_ref(
1710 crate::provider::Baked::SINGLETON_NORMALIZER_NFKDEX_V1,
1711 )),
1712 decomposition_passthrough_bound: decomposition_capped as u8,
1713 composition_passthrough_bound: composition_capped,
1714 }
1715 }
1716
1717 icu_provider::gen_any_buffer_data_constructors!(
1718 locale: skip,
1719 options: skip,
1720 error: NormalizerError,
1721 #[cfg(skip)]
1722 functions: [
1723 new_nfkd,
1724 try_new_nfkd_with_any_provider,
1725 try_new_nfkd_with_buffer_provider,
1726 try_new_nfkd_unstable,
1727 Self,
1728 ]
1729 );
1730
1731 #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_nfkd)]
1732 pub fn try_new_nfkd_unstable<D>(provider: &D) -> Result<Self, NormalizerError>
1733 where
1734 D: DataProvider<CanonicalDecompositionDataV1Marker>
1735 + DataProvider<CompatibilityDecompositionSupplementV1Marker>
1736 + DataProvider<CanonicalDecompositionTablesV1Marker>
1737 + DataProvider<CompatibilityDecompositionTablesV1Marker>
1738 + ?Sized,
1739 {
1740 let decompositions: DataPayload<CanonicalDecompositionDataV1Marker> =
1741 provider.load(Default::default())?.take_payload()?;
1742 let supplementary_decompositions: DataPayload<
1743 CompatibilityDecompositionSupplementV1Marker,
1744 > = provider.load(Default::default())?.take_payload()?;
1745 let tables: DataPayload<CanonicalDecompositionTablesV1Marker> =
1746 provider.load(Default::default())?.take_payload()?;
1747 let supplementary_tables: DataPayload<CompatibilityDecompositionTablesV1Marker> =
1748 provider.load(Default::default())?.take_payload()?;
1749
1750 if tables.get().scalars16.len()
1751 + tables.get().scalars24.len()
1752 + supplementary_tables.get().scalars16.len()
1753 + supplementary_tables.get().scalars24.len()
1754 > 0xFFF
1755 {
1756 return Err(NormalizerError::FutureExtension);
1763 }
1764
1765 let cap = supplementary_decompositions.get().passthrough_cap;
1766 if cap > 0x0300 {
1767 return Err(NormalizerError::ValidationError);
1768 }
1769 let decomposition_capped = cap.min(0xC0);
1770 let composition_capped = cap.min(0x0300);
1771
1772 Ok(DecomposingNormalizer {
1773 decompositions,
1774 supplementary_decompositions: Some(SupplementPayloadHolder::Compatibility(
1775 supplementary_decompositions,
1776 )),
1777 tables,
1778 supplementary_tables: Some(supplementary_tables),
1779 decomposition_passthrough_bound: decomposition_capped as u8,
1780 composition_passthrough_bound: composition_capped,
1781 })
1782 }
1783
1784 #[doc(hidden)]
1785 #[cfg(feature = "compiled_data")]
1786 pub(crate) const fn new_uts46_decomposed() -> Self {
1787 const _: () = assert!(
1788 crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1
1789 .scalars16
1790 .const_len()
1791 + crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1
1792 .scalars24
1793 .const_len()
1794 + crate::provider::Baked::SINGLETON_NORMALIZER_NFKDEX_V1
1795 .scalars16
1796 .const_len()
1797 + crate::provider::Baked::SINGLETON_NORMALIZER_NFKDEX_V1
1798 .scalars24
1799 .const_len()
1800 <= 0xFFF,
1801 "NormalizerError::FutureExtension"
1802 );
1803
1804 const _: () = assert!(
1805 crate::provider::Baked::SINGLETON_NORMALIZER_UTS46D_V1.passthrough_cap <= 0x0300,
1806 "NormalizerError::ValidationError"
1807 );
1808
1809 let decomposition_capped =
1810 if crate::provider::Baked::SINGLETON_NORMALIZER_UTS46D_V1.passthrough_cap < 0xC0 {
1811 crate::provider::Baked::SINGLETON_NORMALIZER_UTS46D_V1.passthrough_cap
1812 } else {
1813 0xC0
1814 };
1815 let composition_capped =
1816 if crate::provider::Baked::SINGLETON_NORMALIZER_UTS46D_V1.passthrough_cap < 0x0300 {
1817 crate::provider::Baked::SINGLETON_NORMALIZER_UTS46D_V1.passthrough_cap
1818 } else {
1819 0x0300
1820 };
1821
1822 DecomposingNormalizer {
1823 decompositions: DataPayload::from_static_ref(
1824 crate::provider::Baked::SINGLETON_NORMALIZER_NFD_V1,
1825 ),
1826 supplementary_decompositions: Some(SupplementPayloadHolder::Uts46(
1827 DataPayload::from_static_ref(
1828 crate::provider::Baked::SINGLETON_NORMALIZER_UTS46D_V1,
1829 ),
1830 )),
1831 tables: DataPayload::from_static_ref(
1832 crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1,
1833 ),
1834 supplementary_tables: Some(DataPayload::from_static_ref(
1835 crate::provider::Baked::SINGLETON_NORMALIZER_NFKDEX_V1,
1836 )),
1837 decomposition_passthrough_bound: decomposition_capped as u8,
1838 composition_passthrough_bound: composition_capped,
1839 }
1840 }
1841
1842 #[doc(hidden)]
1862 pub(crate) fn try_new_uts46_decomposed_unstable<D>(
1863 provider: &D,
1864 ) -> Result<Self, NormalizerError>
1865 where
1866 D: DataProvider<CanonicalDecompositionDataV1Marker>
1867 + DataProvider<Uts46DecompositionSupplementV1Marker>
1868 + DataProvider<CanonicalDecompositionTablesV1Marker>
1869 + DataProvider<CompatibilityDecompositionTablesV1Marker>
1870 + ?Sized,
1872 {
1873 let decompositions: DataPayload<CanonicalDecompositionDataV1Marker> =
1874 provider.load(Default::default())?.take_payload()?;
1875 let supplementary_decompositions: DataPayload<Uts46DecompositionSupplementV1Marker> =
1876 provider.load(Default::default())?.take_payload()?;
1877 let tables: DataPayload<CanonicalDecompositionTablesV1Marker> =
1878 provider.load(Default::default())?.take_payload()?;
1879 let supplementary_tables: DataPayload<CompatibilityDecompositionTablesV1Marker> =
1880 provider.load(Default::default())?.take_payload()?;
1881
1882 if tables.get().scalars16.len()
1883 + tables.get().scalars24.len()
1884 + supplementary_tables.get().scalars16.len()
1885 + supplementary_tables.get().scalars24.len()
1886 > 0xFFF
1887 {
1888 return Err(NormalizerError::FutureExtension);
1895 }
1896
1897 let cap = supplementary_decompositions.get().passthrough_cap;
1898 if cap > 0x0300 {
1899 return Err(NormalizerError::ValidationError);
1900 }
1901 let decomposition_capped = cap.min(0xC0);
1902 let composition_capped = cap.min(0x0300);
1903
1904 Ok(DecomposingNormalizer {
1905 decompositions,
1906 supplementary_decompositions: Some(SupplementPayloadHolder::Uts46(
1907 supplementary_decompositions,
1908 )),
1909 tables,
1910 supplementary_tables: Some(supplementary_tables),
1911 decomposition_passthrough_bound: decomposition_capped as u8,
1912 composition_passthrough_bound: composition_capped,
1913 })
1914 }
1915
1916 pub fn normalize_iter<I: Iterator<Item = char>>(&self, iter: I) -> Decomposition<I> {
1919 Decomposition::new_with_supplements(
1920 iter,
1921 self.decompositions.get(),
1922 self.supplementary_decompositions.as_ref().map(|s| s.get()),
1923 self.tables.get(),
1924 self.supplementary_tables.as_ref().map(|s| s.get()),
1925 self.decomposition_passthrough_bound,
1926 IgnorableBehavior::Unsupported,
1927 )
1928 }
1929
1930 normalizer_methods!();
1931
1932 decomposing_normalize_to!(
1933 ,
1935 normalize_to,
1936 core::fmt::Write,
1937 &str,
1938 {
1939 },
1940 as_str,
1941 {
1942 let decomposition_passthrough_byte_bound = if decomposition_passthrough_bound == 0xC0 {
1943 0xC3u8
1944 } else {
1945 decomposition_passthrough_bound.min(0x80) as u8
1946 };
1947 #[allow(clippy::unwrap_used)]
1949 'fast: loop {
1950 let mut code_unit_iter = decomposition.delegate.as_str().as_bytes().iter();
1951 'fastest: loop {
1952 if let Some(&upcoming_byte) = code_unit_iter.next() {
1953 if upcoming_byte < decomposition_passthrough_byte_bound {
1954 continue 'fastest;
1956 }
1957 decomposition.delegate = pending_slice[pending_slice.len() - code_unit_iter.as_slice().len() - 1..].chars();
1958 break 'fastest;
1959 }
1960 sink.write_str(pending_slice)?;
1962 return Ok(());
1963 }
1964
1965 let upcoming = decomposition.delegate.next().unwrap();
1968 let upcoming_with_trie_value = decomposition.attach_trie_value(upcoming);
1969 if upcoming_with_trie_value.starter_and_decomposes_to_self() {
1970 continue 'fast;
1971 }
1972 let consumed_so_far_slice = &pending_slice[..pending_slice.len()
1973 - decomposition.delegate.as_str().len()
1974 - upcoming.len_utf8()];
1975 sink.write_str(consumed_so_far_slice)?;
1976
1977 if decomposition_starts_with_non_starter(
1979 upcoming_with_trie_value.trie_val,
1980 ) {
1981 decomposition.pending = Some(upcoming_with_trie_value);
1984 decomposition.gather_and_sort_combining(0);
1985 continue 'outer;
1986 }
1987 undecomposed_starter = upcoming_with_trie_value;
1988 debug_assert!(decomposition.pending.is_none());
1989 break 'fast;
1990 }
1991 },
1992 text,
1993 sink,
1994 decomposition,
1995 decomposition_passthrough_bound,
1996 undecomposed_starter,
1997 pending_slice,
1998 'outer,
1999 );
2000
2001 decomposing_normalize_to!(
2002 ,
2007 normalize_utf8_to,
2008 core::fmt::Write,
2009 &[u8],
2010 {
2011 },
2012 as_slice,
2013 {
2014 let decomposition_passthrough_byte_bound = decomposition_passthrough_bound.min(0x80) as u8;
2015 #[allow(clippy::unwrap_used)]
2017 'fast: loop {
2018 let mut code_unit_iter = decomposition.delegate.as_slice().iter();
2019 'fastest: loop {
2020 if let Some(&upcoming_byte) = code_unit_iter.next() {
2021 if upcoming_byte < decomposition_passthrough_byte_bound {
2022 continue 'fastest;
2024 }
2025 break 'fastest;
2026 }
2027 sink.write_str(unsafe { from_utf8_unchecked(pending_slice) })?;
2029 return Ok(());
2030 }
2031 decomposition.delegate = pending_slice[pending_slice.len() - code_unit_iter.as_slice().len() - 1..].chars();
2032
2033 let upcoming = decomposition.delegate.next().unwrap();
2036 let upcoming_with_trie_value = decomposition.attach_trie_value(upcoming);
2037 if upcoming_with_trie_value.starter_and_decomposes_to_self() {
2038 if upcoming != REPLACEMENT_CHARACTER {
2039 continue 'fast;
2040 }
2041 let mut consumed_so_far = pending_slice[..pending_slice.len() - decomposition.delegate.as_slice().len()].chars();
2046 let back = consumed_so_far.next_back();
2047 debug_assert_eq!(back, Some(REPLACEMENT_CHARACTER));
2048 let consumed_so_far_slice = consumed_so_far.as_slice();
2049 sink.write_str(unsafe{from_utf8_unchecked(consumed_so_far_slice)})?;
2050
2051 undecomposed_starter = upcoming_with_trie_value;
2055 debug_assert!(decomposition.pending.is_none());
2056 break 'fast;
2057 }
2058 let consumed_so_far_slice = &pending_slice[..pending_slice.len()
2059 - decomposition.delegate.as_slice().len()
2060 - upcoming.len_utf8()];
2061 sink.write_str(unsafe{from_utf8_unchecked(consumed_so_far_slice)})?;
2062
2063 if decomposition_starts_with_non_starter(
2065 upcoming_with_trie_value.trie_val,
2066 ) {
2067 decomposition.pending = Some(upcoming_with_trie_value);
2070 decomposition.gather_and_sort_combining(0);
2071 continue 'outer;
2072 }
2073 undecomposed_starter = upcoming_with_trie_value;
2074 debug_assert!(decomposition.pending.is_none());
2075 break 'fast;
2076 }
2077 },
2078 text,
2079 sink,
2080 decomposition,
2081 decomposition_passthrough_bound,
2082 undecomposed_starter,
2083 pending_slice,
2084 'outer,
2085 );
2086
2087 decomposing_normalize_to!(
2088 ,
2093 normalize_utf16_to,
2094 write16::Write16,
2095 &[u16],
2096 {
2097 sink.size_hint(text.len())?;
2098 },
2099 as_slice,
2100 {
2101 let mut code_unit_iter = decomposition.delegate.as_slice().iter();
2102 let mut counter = UTF16_FAST_PATH_FLUSH_THRESHOLD;
2107 'fast: loop {
2108 counter -= 1;
2109 if let Some(&upcoming_code_unit) = code_unit_iter.next() {
2110 let mut upcoming32 = u32::from(upcoming_code_unit);
2111 if upcoming32 < decomposition_passthrough_bound && counter != 0 {
2112 continue 'fast;
2113 }
2114 #[allow(clippy::never_loop)]
2116 'surrogateloop: loop {
2117 let surrogate_base = upcoming32.wrapping_sub(0xD800);
2118 if surrogate_base > (0xDFFF - 0xD800) {
2119 break 'surrogateloop;
2121 }
2122 if surrogate_base <= (0xDBFF - 0xD800) {
2123 let iter_backup = code_unit_iter.clone();
2124 if let Some(&low) = code_unit_iter.next() {
2125 if in_inclusive_range16(low, 0xDC00, 0xDFFF) {
2126 upcoming32 = (upcoming32 << 10) + u32::from(low)
2127 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
2128 break 'surrogateloop;
2129 } else {
2130 code_unit_iter = iter_backup;
2131 }
2132 }
2133 }
2134 let slice_to_write = &pending_slice
2136 [..pending_slice.len() - code_unit_iter.as_slice().len() - 1];
2137 sink.write_slice(slice_to_write)?;
2138 undecomposed_starter =
2139 CharacterAndTrieValue::new(REPLACEMENT_CHARACTER, 0);
2140 debug_assert!(decomposition.pending.is_none());
2141 break 'fast;
2144 }
2145 let upcoming = unsafe { char::from_u32_unchecked(upcoming32) };
2147 let upcoming_with_trie_value =
2148 decomposition.attach_trie_value(upcoming);
2149 if upcoming_with_trie_value.starter_and_decomposes_to_self() && counter != 0 {
2150 continue 'fast;
2151 }
2152 let consumed_so_far_slice = &pending_slice[..pending_slice.len()
2153 - code_unit_iter.as_slice().len()
2154 - upcoming.len_utf16()];
2155 sink.write_slice(consumed_so_far_slice)?;
2156
2157 if decomposition_starts_with_non_starter(
2159 upcoming_with_trie_value.trie_val,
2160 ) {
2161 decomposition.delegate = code_unit_iter.as_slice().chars();
2163 decomposition.pending = Some(upcoming_with_trie_value);
2166 decomposition.gather_and_sort_combining(0);
2167 continue 'outer;
2168 }
2169 undecomposed_starter = upcoming_with_trie_value;
2170 debug_assert!(decomposition.pending.is_none());
2171 break 'fast;
2172 }
2173 sink.write_slice(pending_slice)?;
2175 return Ok(());
2176 }
2177 decomposition.delegate = code_unit_iter.as_slice().chars();
2179 },
2180 text,
2181 sink,
2182 decomposition,
2183 decomposition_passthrough_bound,
2184 undecomposed_starter,
2185 pending_slice,
2186 'outer,
2187 );
2188}
2189
2190#[derive(Debug)]
2192pub struct ComposingNormalizer {
2193 decomposing_normalizer: DecomposingNormalizer,
2194 canonical_compositions: DataPayload<CanonicalCompositionsV1Marker>,
2195}
2196
2197impl ComposingNormalizer {
2198 #[cfg(feature = "compiled_data")]
2204 pub const fn new_nfc() -> Self {
2205 ComposingNormalizer {
2206 decomposing_normalizer: DecomposingNormalizer::new_nfd(),
2207 canonical_compositions: DataPayload::from_static_ref(
2208 crate::provider::Baked::SINGLETON_NORMALIZER_COMP_V1,
2209 ),
2210 }
2211 }
2212
2213 icu_provider::gen_any_buffer_data_constructors!(
2214 locale: skip,
2215 options: skip,
2216 error: NormalizerError,
2217 #[cfg(skip)]
2218 functions: [
2219 new_nfc,
2220 try_new_nfc_with_any_provider,
2221 try_new_nfc_with_buffer_provider,
2222 try_new_nfc_unstable,
2223 Self,
2224 ]
2225 );
2226
2227 #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_nfc)]
2228 pub fn try_new_nfc_unstable<D>(provider: &D) -> Result<Self, NormalizerError>
2229 where
2230 D: DataProvider<CanonicalDecompositionDataV1Marker>
2231 + DataProvider<CanonicalDecompositionTablesV1Marker>
2232 + DataProvider<CanonicalCompositionsV1Marker>
2233 + ?Sized,
2234 {
2235 let decomposing_normalizer = DecomposingNormalizer::try_new_nfd_unstable(provider)?;
2236
2237 let canonical_compositions: DataPayload<CanonicalCompositionsV1Marker> =
2238 provider.load(Default::default())?.take_payload()?;
2239
2240 Ok(ComposingNormalizer {
2241 decomposing_normalizer,
2242 canonical_compositions,
2243 })
2244 }
2245
2246 #[cfg(feature = "compiled_data")]
2252 pub const fn new_nfkc() -> Self {
2253 ComposingNormalizer {
2254 decomposing_normalizer: DecomposingNormalizer::new_nfkd(),
2255 canonical_compositions: DataPayload::from_static_ref(
2256 crate::provider::Baked::SINGLETON_NORMALIZER_COMP_V1,
2257 ),
2258 }
2259 }
2260
2261 icu_provider::gen_any_buffer_data_constructors!(
2262 locale: skip,
2263 options: skip,
2264 error: NormalizerError,
2265 #[cfg(skip)]
2266 functions: [
2267 new_nfkc,
2268 try_new_nfkc_with_any_provider,
2269 try_new_nfkc_with_buffer_provider,
2270 try_new_nfkc_unstable,
2271 Self,
2272 ]
2273 );
2274
2275 #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_nfkc)]
2276 pub fn try_new_nfkc_unstable<D>(provider: &D) -> Result<Self, NormalizerError>
2277 where
2278 D: DataProvider<CanonicalDecompositionDataV1Marker>
2279 + DataProvider<CompatibilityDecompositionSupplementV1Marker>
2280 + DataProvider<CanonicalDecompositionTablesV1Marker>
2281 + DataProvider<CompatibilityDecompositionTablesV1Marker>
2282 + DataProvider<CanonicalCompositionsV1Marker>
2283 + ?Sized,
2284 {
2285 let decomposing_normalizer = DecomposingNormalizer::try_new_nfkd_unstable(provider)?;
2286
2287 let canonical_compositions: DataPayload<CanonicalCompositionsV1Marker> =
2288 provider.load(Default::default())?.take_payload()?;
2289
2290 Ok(ComposingNormalizer {
2291 decomposing_normalizer,
2292 canonical_compositions,
2293 })
2294 }
2295
2296 #[cfg(feature = "compiled_data")]
2306 pub(crate) const fn new_uts46() -> Self {
2307 ComposingNormalizer {
2308 decomposing_normalizer: DecomposingNormalizer::new_uts46_decomposed(),
2309 canonical_compositions: DataPayload::from_static_ref(
2310 crate::provider::Baked::SINGLETON_NORMALIZER_COMP_V1,
2311 ),
2312 }
2313 }
2314
2315 #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_uts46)]
2316 pub(crate) fn try_new_uts46_unstable<D>(provider: &D) -> Result<Self, NormalizerError>
2317 where
2318 D: DataProvider<CanonicalDecompositionDataV1Marker>
2319 + DataProvider<Uts46DecompositionSupplementV1Marker>
2320 + DataProvider<CanonicalDecompositionTablesV1Marker>
2321 + DataProvider<CompatibilityDecompositionTablesV1Marker>
2322 + DataProvider<CanonicalCompositionsV1Marker>
2324 + ?Sized,
2325 {
2326 let decomposing_normalizer =
2327 DecomposingNormalizer::try_new_uts46_decomposed_unstable(provider)?;
2328
2329 let canonical_compositions: DataPayload<CanonicalCompositionsV1Marker> =
2330 provider.load(Default::default())?.take_payload()?;
2331
2332 Ok(ComposingNormalizer {
2333 decomposing_normalizer,
2334 canonical_compositions,
2335 })
2336 }
2337
2338 pub fn normalize_iter<I: Iterator<Item = char>>(&self, iter: I) -> Composition<I> {
2341 self.normalize_iter_private(iter, IgnorableBehavior::Unsupported)
2342 }
2343
2344 fn normalize_iter_private<I: Iterator<Item = char>>(
2345 &self,
2346 iter: I,
2347 ignorable_behavior: IgnorableBehavior,
2348 ) -> Composition<I> {
2349 Composition::new(
2350 Decomposition::new_with_supplements(
2351 iter,
2352 self.decomposing_normalizer.decompositions.get(),
2353 self.decomposing_normalizer
2354 .supplementary_decompositions
2355 .as_ref()
2356 .map(|s| s.get()),
2357 self.decomposing_normalizer.tables.get(),
2358 self.decomposing_normalizer
2359 .supplementary_tables
2360 .as_ref()
2361 .map(|s| s.get()),
2362 self.decomposing_normalizer.decomposition_passthrough_bound,
2363 ignorable_behavior,
2364 ),
2365 ZeroFrom::zero_from(&self.canonical_compositions.get().canonical_compositions),
2366 self.decomposing_normalizer.composition_passthrough_bound,
2367 )
2368 }
2369
2370 normalizer_methods!();
2371
2372 composing_normalize_to!(
2373 ,
2375 normalize_to,
2376 core::fmt::Write,
2377 &str,
2378 {},
2379 true,
2380 as_str,
2381 {
2382 let composition_passthrough_byte_bound = if composition_passthrough_bound == 0x300 {
2384 0xCCu8
2385 } else {
2386 composition_passthrough_bound.min(0x80) as u8
2389 };
2390 let mut undecomposed_starter_valid = true;
2395 #[allow(clippy::unwrap_used)]
2398 'fast: loop {
2399 let mut code_unit_iter = composition.decomposition.delegate.as_str().as_bytes().iter();
2400 'fastest: loop {
2401 if let Some(&upcoming_byte) = code_unit_iter.next() {
2402 if upcoming_byte < composition_passthrough_byte_bound {
2403 undecomposed_starter_valid = false;
2405 continue 'fastest;
2406 }
2407 composition.decomposition.delegate = pending_slice[pending_slice.len() - code_unit_iter.as_slice().len() - 1..].chars();
2408 break 'fastest;
2409 }
2410 sink.write_str(pending_slice)?;
2412 return Ok(());
2413 }
2414 let upcoming = composition.decomposition.delegate.next().unwrap();
2417 let upcoming_with_trie_value = composition.decomposition.attach_trie_value(upcoming);
2418 if upcoming_with_trie_value.potential_passthrough_and_cannot_combine_backwards() {
2419 undecomposed_starter = upcoming_with_trie_value;
2424 undecomposed_starter_valid = true;
2425 continue 'fast;
2426 }
2427 composition.decomposition.pending = Some(upcoming_with_trie_value);
2429 let consumed_so_far_slice = if undecomposed_starter_valid {
2430 &pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_str().len() - upcoming.len_utf8() - undecomposed_starter.character.len_utf8()]
2431 } else {
2432 let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_str().len() - upcoming.len_utf8()].chars();
2434 undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap());
2436 undecomposed_starter_valid = true;
2437 consumed_so_far.as_str()
2438 };
2439 sink.write_str(consumed_so_far_slice)?;
2440 break 'fast;
2441 }
2442 debug_assert!(undecomposed_starter_valid);
2443 },
2444 text,
2445 sink,
2446 composition,
2447 composition_passthrough_bound,
2448 undecomposed_starter,
2449 pending_slice,
2450 len_utf8,
2451 );
2452
2453 composing_normalize_to!(
2454 ,
2459 normalize_utf8_to,
2460 core::fmt::Write,
2461 &[u8],
2462 {},
2463 false,
2464 as_slice,
2465 {
2466 let mut undecomposed_starter_valid = true;
2471 'fast: loop {
2472 if let Some(upcoming) = composition.decomposition.delegate.next() {
2473 if u32::from(upcoming) < composition_passthrough_bound {
2474 undecomposed_starter_valid = false;
2476 continue 'fast;
2477 }
2478 if upcoming == REPLACEMENT_CHARACTER {
2480 let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_slice().len()].chars();
2486 let back = consumed_so_far.next_back();
2487 debug_assert_eq!(back, Some(REPLACEMENT_CHARACTER));
2488 let consumed_so_far_slice = consumed_so_far.as_slice();
2489 sink.write_str(unsafe{ from_utf8_unchecked(consumed_so_far_slice)})?;
2490 undecomposed_starter = CharacterAndTrieValue::new(REPLACEMENT_CHARACTER, 0);
2491 undecomposed_starter_valid = true;
2492 composition.decomposition.pending = None;
2493 break 'fast;
2494 }
2495 let upcoming_with_trie_value = composition.decomposition.attach_trie_value(upcoming);
2496 if upcoming_with_trie_value.potential_passthrough_and_cannot_combine_backwards() {
2497 undecomposed_starter = upcoming_with_trie_value;
2502 undecomposed_starter_valid = true;
2503 continue 'fast;
2504 }
2505 composition.decomposition.pending = Some(upcoming_with_trie_value);
2507 #[allow(clippy::unwrap_used)]
2510 let consumed_so_far_slice = if undecomposed_starter_valid {
2511 &pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_slice().len() - upcoming.len_utf8() - undecomposed_starter.character.len_utf8()]
2512 } else {
2513 let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_slice().len() - upcoming.len_utf8()].chars();
2515 undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap());
2517 undecomposed_starter_valid = true;
2518 consumed_so_far.as_slice()
2519 };
2520 sink.write_str(unsafe { from_utf8_unchecked(consumed_so_far_slice)})?;
2521 break 'fast;
2522 }
2523 sink.write_str(unsafe {from_utf8_unchecked(pending_slice) })?;
2525 return Ok(());
2526 }
2527 debug_assert!(undecomposed_starter_valid);
2528 },
2529 text,
2530 sink,
2531 composition,
2532 composition_passthrough_bound,
2533 undecomposed_starter,
2534 pending_slice,
2535 len_utf8,
2536 );
2537
2538 composing_normalize_to!(
2539 ,
2544 normalize_utf16_to,
2545 write16::Write16,
2546 &[u16],
2547 {
2548 sink.size_hint(text.len())?;
2549 },
2550 false,
2551 as_slice,
2552 {
2553 let mut code_unit_iter = composition.decomposition.delegate.as_slice().iter();
2554 let mut upcoming32;
2555 let mut undecomposed_starter_valid;
2560 let mut counter = UTF16_FAST_PATH_FLUSH_THRESHOLD;
2565 let mut counter_reference = counter - 1;
2569 'fast: loop {
2570 counter -= 1;
2571 if let Some(&upcoming_code_unit) = code_unit_iter.next() {
2572 upcoming32 = u32::from(upcoming_code_unit); if upcoming32 < composition_passthrough_bound && counter != 0 {
2574 continue 'fast;
2579 }
2580 undecomposed_starter_valid = counter == counter_reference;
2584 #[allow(clippy::never_loop)]
2586 'surrogateloop: loop {
2587 let surrogate_base = upcoming32.wrapping_sub(0xD800);
2588 if surrogate_base > (0xDFFF - 0xD800) {
2589 break 'surrogateloop;
2591 }
2592 if surrogate_base <= (0xDBFF - 0xD800) {
2593 let iter_backup = code_unit_iter.clone();
2594 if let Some(&low) = code_unit_iter.next() {
2595 if in_inclusive_range16(low, 0xDC00, 0xDFFF) {
2596 upcoming32 = (upcoming32 << 10) + u32::from(low)
2597 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
2598 break 'surrogateloop;
2599 } else {
2600 code_unit_iter = iter_backup;
2601 }
2602 }
2603 }
2604 let slice_to_write = &pending_slice[..pending_slice.len() - code_unit_iter.as_slice().len() - 1];
2606 sink.write_slice(slice_to_write)?;
2607 undecomposed_starter = CharacterAndTrieValue::new(REPLACEMENT_CHARACTER, 0);
2608 undecomposed_starter_valid = true;
2609 composition.decomposition.pending = None;
2610 break 'fast;
2611 }
2612 let upcoming = unsafe { char::from_u32_unchecked(upcoming32) };
2614 let upcoming_with_trie_value = composition.decomposition.attach_trie_value(upcoming);
2615 if upcoming_with_trie_value.potential_passthrough_and_cannot_combine_backwards() && counter != 0 {
2616 undecomposed_starter = upcoming_with_trie_value;
2621 counter_reference = counter - 1;
2626 continue 'fast;
2627 }
2628 composition.decomposition.pending = Some(upcoming_with_trie_value);
2630 #[allow(clippy::unwrap_used)]
2633 let consumed_so_far_slice = if undecomposed_starter_valid {
2634 &pending_slice[..pending_slice.len() - code_unit_iter.as_slice().len() - upcoming.len_utf16() - undecomposed_starter.character.len_utf16()]
2635 } else {
2636 let mut consumed_so_far = pending_slice[..pending_slice.len() - code_unit_iter.as_slice().len() - upcoming.len_utf16()].chars();
2638 undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap());
2640 undecomposed_starter_valid = true;
2641 consumed_so_far.as_slice()
2642 };
2643 sink.write_slice(consumed_so_far_slice)?;
2644 break 'fast;
2645 }
2646 sink.write_slice(pending_slice)?;
2648 return Ok(());
2649 }
2650 debug_assert!(undecomposed_starter_valid);
2651 composition.decomposition.delegate = code_unit_iter.as_slice().chars();
2653 },
2654 text,
2655 sink,
2656 composition,
2657 composition_passthrough_bound,
2658 undecomposed_starter,
2659 pending_slice,
2660 len_utf16,
2661 );
2662}
2663
2664struct IsNormalizedSinkUtf16<'a> {
2665 expect: &'a [u16],
2666}
2667
2668impl<'a> IsNormalizedSinkUtf16<'a> {
2669 pub fn new(slice: &'a [u16]) -> Self {
2670 IsNormalizedSinkUtf16 { expect: slice }
2671 }
2672 pub fn finished(&self) -> bool {
2673 self.expect.is_empty()
2674 }
2675}
2676
2677impl<'a> Write16 for IsNormalizedSinkUtf16<'a> {
2678 fn write_slice(&mut self, s: &[u16]) -> core::fmt::Result {
2679 #[allow(clippy::indexing_slicing)]
2684 if s.as_ptr() == self.expect.as_ptr() {
2685 self.expect = &self.expect[s.len()..];
2686 Ok(())
2687 } else {
2688 Err(core::fmt::Error {})
2689 }
2690 }
2691
2692 fn write_char(&mut self, c: char) -> core::fmt::Result {
2693 let mut iter = self.expect.chars();
2694 if iter.next() == Some(c) {
2695 self.expect = iter.as_slice();
2696 Ok(())
2697 } else {
2698 Err(core::fmt::Error {})
2699 }
2700 }
2701}
2702
2703struct IsNormalizedSinkUtf8<'a> {
2704 expect: &'a [u8],
2705}
2706
2707impl<'a> IsNormalizedSinkUtf8<'a> {
2708 pub fn new(slice: &'a [u8]) -> Self {
2709 IsNormalizedSinkUtf8 { expect: slice }
2710 }
2711 pub fn finished(&self) -> bool {
2712 self.expect.is_empty()
2713 }
2714}
2715
2716impl<'a> core::fmt::Write for IsNormalizedSinkUtf8<'a> {
2717 fn write_str(&mut self, s: &str) -> core::fmt::Result {
2718 #[allow(clippy::indexing_slicing)]
2723 if s.as_ptr() == self.expect.as_ptr() {
2724 self.expect = &self.expect[s.len()..];
2725 Ok(())
2726 } else {
2727 Err(core::fmt::Error {})
2728 }
2729 }
2730
2731 fn write_char(&mut self, c: char) -> core::fmt::Result {
2732 let mut iter = self.expect.chars();
2733 if iter.next() == Some(c) {
2734 self.expect = iter.as_slice();
2735 Ok(())
2736 } else {
2737 Err(core::fmt::Error {})
2738 }
2739 }
2740}
2741
2742struct IsNormalizedSinkStr<'a> {
2743 expect: &'a str,
2744}
2745
2746impl<'a> IsNormalizedSinkStr<'a> {
2747 pub fn new(slice: &'a str) -> Self {
2748 IsNormalizedSinkStr { expect: slice }
2749 }
2750 pub fn finished(&self) -> bool {
2751 self.expect.is_empty()
2752 }
2753}
2754
2755impl<'a> core::fmt::Write for IsNormalizedSinkStr<'a> {
2756 fn write_str(&mut self, s: &str) -> core::fmt::Result {
2757 #[allow(clippy::indexing_slicing)]
2762 if s.as_ptr() == self.expect.as_ptr() {
2763 self.expect = &self.expect[s.len()..];
2764 Ok(())
2765 } else {
2766 Err(core::fmt::Error {})
2767 }
2768 }
2769
2770 fn write_char(&mut self, c: char) -> core::fmt::Result {
2771 let mut iter = self.expect.chars();
2772 if iter.next() == Some(c) {
2773 self.expect = iter.as_str();
2774 Ok(())
2775 } else {
2776 Err(core::fmt::Error {})
2777 }
2778 }
2779}