Skip to main content

utf8parse/
types.rs

1//! Types supporting the UTF-8 parser
2
3/// Action to take when receiving a byte
4#[derive(#[automatically_derived]
impl ::core::fmt::Debug for Action {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f,
            match self {
                Action::InvalidSequence => "InvalidSequence",
                Action::EmitByte => "EmitByte",
                Action::SetByte1 => "SetByte1",
                Action::SetByte2 => "SetByte2",
                Action::SetByte2Top => "SetByte2Top",
                Action::SetByte3 => "SetByte3",
                Action::SetByte3Top => "SetByte3Top",
                Action::SetByte4 => "SetByte4",
            })
    }
}Debug, #[automatically_derived]
impl ::core::marker::Copy for Action { }Copy, #[automatically_derived]
impl ::core::clone::Clone for Action {
    #[inline]
    fn clone(&self) -> Action { *self }
}Clone)]
5pub enum Action {
6    /// Unexpected byte; sequence is invalid
7    InvalidSequence = 0,
8    /// Received valid 7-bit ASCII byte which can be directly emitted.
9    EmitByte = 1,
10    /// Set the bottom continuation byte
11    SetByte1 = 2,
12    /// Set the 2nd-from-last continuation byte
13    SetByte2 = 3,
14    /// Set the 2nd-from-last byte which is part of a two byte sequence
15    SetByte2Top = 4,
16    /// Set the 3rd-from-last continuation byte
17    SetByte3 = 5,
18    /// Set the 3rd-from-last byte which is part of a three byte sequence
19    SetByte3Top = 6,
20    /// Set the top byte of a four byte sequence.
21    SetByte4 = 7,
22}
23
24/// States the parser can be in.
25///
26/// There is a state for each initial input of the 3 and 4 byte sequences since
27/// the following bytes are subject to different conditions than a tail byte.
28#[allow(non_camel_case_types)]
29#[derive(#[automatically_derived]
#[allow(non_camel_case_types)]
impl ::core::fmt::Debug for State {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f,
            match self {
                State::Ground => "Ground",
                State::Tail3 => "Tail3",
                State::Tail2 => "Tail2",
                State::Tail1 => "Tail1",
                State::U3_2_e0 => "U3_2_e0",
                State::U3_2_ed => "U3_2_ed",
                State::Utf8_4_3_f0 => "Utf8_4_3_f0",
                State::Utf8_4_3_f4 => "Utf8_4_3_f4",
            })
    }
}Debug, #[automatically_derived]
#[allow(non_camel_case_types)]
impl ::core::default::Default for State {
    #[inline]
    fn default() -> State { Self::Ground }
}Default, #[automatically_derived]
#[allow(non_camel_case_types)]
impl ::core::marker::Copy for State { }Copy, #[automatically_derived]
#[allow(non_camel_case_types)]
impl ::core::clone::Clone for State {
    #[inline]
    fn clone(&self) -> State { *self }
}Clone, #[automatically_derived]
#[allow(non_camel_case_types)]
impl ::core::cmp::PartialEq for State {
    #[inline]
    fn eq(&self, other: &State) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr
    }
}PartialEq, #[automatically_derived]
#[allow(non_camel_case_types)]
impl ::core::cmp::Eq for State {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {}
}Eq)]
30pub enum State {
31    /// Ground state; expect anything
32    #[default]
33    Ground = 0,
34    /// 3 tail bytes
35    Tail3 = 1,
36    /// 2 tail bytes
37    Tail2 = 2,
38    /// 1 tail byte
39    Tail1 = 3,
40    /// UTF8-3 starting with E0
41    U3_2_e0 = 4,
42    /// UTF8-3 starting with ED
43    U3_2_ed = 5,
44    /// UTF8-4 starting with F0
45    Utf8_4_3_f0 = 6,
46    /// UTF8-4 starting with F4
47    Utf8_4_3_f4 = 7,
48}
49
50impl State {
51    /// Advance the parser state.
52    ///
53    /// This takes the current state and input byte into consideration, to determine the next state
54    /// and any action that should be taken.
55    #[inline]
56    pub fn advance(self, byte: u8) -> (State, Action) {
57        match self {
58            State::Ground => match byte {
59                0x00..=0x7f => (State::Ground, Action::EmitByte),
60                0xc2..=0xdf => (State::Tail1, Action::SetByte2Top),
61                0xe0 => (State::U3_2_e0, Action::SetByte3Top),
62                0xe1..=0xec => (State::Tail2, Action::SetByte3Top),
63                0xed => (State::U3_2_ed, Action::SetByte3Top),
64                0xee..=0xef => (State::Tail2, Action::SetByte3Top),
65                0xf0 => (State::Utf8_4_3_f0, Action::SetByte4),
66                0xf1..=0xf3 => (State::Tail3, Action::SetByte4),
67                0xf4 => (State::Utf8_4_3_f4, Action::SetByte4),
68                _ => (State::Ground, Action::InvalidSequence),
69            },
70            State::U3_2_e0 => match byte {
71                0xa0..=0xbf => (State::Tail1, Action::SetByte2),
72                _ => (State::Ground, Action::InvalidSequence),
73            },
74            State::U3_2_ed => match byte {
75                0x80..=0x9f => (State::Tail1, Action::SetByte2),
76                _ => (State::Ground, Action::InvalidSequence),
77            },
78            State::Utf8_4_3_f0 => match byte {
79                0x90..=0xbf => (State::Tail2, Action::SetByte3),
80                _ => (State::Ground, Action::InvalidSequence),
81            },
82            State::Utf8_4_3_f4 => match byte {
83                0x80..=0x8f => (State::Tail2, Action::SetByte3),
84                _ => (State::Ground, Action::InvalidSequence),
85            },
86            State::Tail3 => match byte {
87                0x80..=0xbf => (State::Tail2, Action::SetByte3),
88                _ => (State::Ground, Action::InvalidSequence),
89            },
90            State::Tail2 => match byte {
91                0x80..=0xbf => (State::Tail1, Action::SetByte2),
92                _ => (State::Ground, Action::InvalidSequence),
93            },
94            State::Tail1 => match byte {
95                0x80..=0xbf => (State::Ground, Action::SetByte1),
96                _ => (State::Ground, Action::InvalidSequence),
97            },
98        }
99    }
100}