1#![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)]
7#![cfg_attr(all(feature = "nightly", test), feature(test))]
8#![no_std]
9
10use core::char;
11
12mod types;
13
14use types::{Action, State};
15
16pub trait Receiver {
18 fn codepoint(&mut self, _: char);
20
21 fn invalid_sequence(&mut self);
23}
24
25#[derive(#[automatically_derived]
impl ::core::clone::Clone for Parser {
#[inline]
fn clone(&self) -> Parser {
Parser {
point: ::core::clone::Clone::clone(&self.point),
state: ::core::clone::Clone::clone(&self.state),
}
}
}Clone, #[automatically_derived]
impl ::core::default::Default for Parser {
#[inline]
fn default() -> Parser {
Parser {
point: ::core::default::Default::default(),
state: ::core::default::Default::default(),
}
}
}Default, #[automatically_derived]
impl ::core::cmp::PartialEq for Parser {
#[inline]
fn eq(&self, other: &Parser) -> bool {
self.point == other.point && self.state == other.state
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for Parser {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_fields_are_eq(&self) {
let _: ::core::cmp::AssertParamIsEq<u32>;
let _: ::core::cmp::AssertParamIsEq<State>;
}
}Eq, #[automatically_derived]
impl ::core::fmt::Debug for Parser {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "Parser",
"point", &self.point, "state", &&self.state)
}
}Debug)]
29pub struct Parser {
30 point: u32,
31 state: State,
32}
33
34const CONTINUATION_MASK: u8 = 0b0011_1111;
36
37impl Parser {
38 pub fn new() -> Parser {
40 Parser { point: 0, state: State::Ground }
41 }
42
43 pub fn advance<R>(&mut self, receiver: &mut R, byte: u8)
48 where
49 R: Receiver,
50 {
51 let (state, action) = self.state.advance(byte);
52 self.perform_action(receiver, byte, action);
53 self.state = state;
54 }
55
56 fn perform_action<R>(&mut self, receiver: &mut R, byte: u8, action: Action)
57 where
58 R: Receiver,
59 {
60 match action {
61 Action::InvalidSequence => {
62 self.point = 0;
63 receiver.invalid_sequence();
64 },
65 Action::EmitByte => {
66 receiver.codepoint(byte as char);
67 },
68 Action::SetByte1 => {
69 let point = self.point | ((byte & CONTINUATION_MASK) as u32);
70 let c = unsafe { char::from_u32_unchecked(point) };
71 self.point = 0;
72
73 receiver.codepoint(c);
74 },
75 Action::SetByte2 => {
76 self.point |= ((byte & CONTINUATION_MASK) as u32) << 6;
77 },
78 Action::SetByte2Top => {
79 self.point |= ((byte & 0b0001_1111) as u32) << 6;
80 },
81 Action::SetByte3 => {
82 self.point |= ((byte & CONTINUATION_MASK) as u32) << 12;
83 },
84 Action::SetByte3Top => {
85 self.point |= ((byte & 0b0000_1111) as u32) << 12;
86 },
87 Action::SetByte4 => {
88 self.point |= ((byte & 0b0000_0111) as u32) << 18;
89 },
90 }
91 }
92}
93
94#[cfg(all(feature = "nightly", test))]
95mod benches {
96 extern crate std;
97 extern crate test;
98
99 use super::{Parser, Receiver};
100
101 use self::test::{black_box, Bencher};
102
103 static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt");
104
105 impl Receiver for () {
106 fn codepoint(&mut self, c: char) {
107 black_box(c);
108 }
109
110 fn invalid_sequence(&mut self) {}
111 }
112
113 #[bench]
114 fn parse_bench_utf8_demo(b: &mut Bencher) {
115 let mut parser = Parser::new();
116
117 b.iter(|| {
118 for byte in UTF8_DEMO {
119 parser.advance(&mut (), *byte);
120 }
121 })
122 }
123
124 #[bench]
125 fn std_string_parse_utf8(b: &mut Bencher) {
126 b.iter(|| {
127 for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() {
128 black_box(c);
129 }
130 });
131 }
132}