1/*!
2Types and routines that support the wire format of finite automata.
34Currently, this module just exports a few error types and some small helpers
5for deserializing [dense DFAs](crate::dfa::dense::DFA) using correct alignment.
6*/
78/*
9A collection of helper functions, types and traits for serializing automata.
1011This crate defines its own bespoke serialization mechanism for some structures
12provided in the public API, namely, DFAs. A bespoke mechanism was developed
13primarily because structures like automata demand a specific binary format.
14Attempting to encode their rich structure in an existing serialization
15format is just not feasible. Moreover, the format for each structure is
16generally designed such that deserialization is cheap. More specifically, that
17deserialization can be done in constant time. (The idea being that you can
18embed it into your binary or mmap it, and then use it immediately.)
1920In order to achieve this, the dense and sparse DFAs in this crate use an
21in-memory representation that very closely corresponds to its binary serialized
22form. This pervades and complicates everything, and in some cases, requires
23dealing with alignment and reasoning about safety.
2425This technique does have major advantages. In particular, it permits doing
26the potentially costly work of compiling a finite state machine in an offline
27manner, and then loading it at runtime not only without having to re-compile
28the regex, but even without the code required to do the compilation. This, for
29example, permits one to use a pre-compiled DFA not only in environments without
30Rust's standard library, but also in environments without a heap.
3132In the code below, whenever we insert some kind of padding, it's to enforce a
334-byte alignment, unless otherwise noted. Namely, u32 is the only state ID type
34supported. (In a previous version of this library, DFAs were generic over the
35state ID representation.)
3637Also, serialization generally requires the caller to specify endianness,
38where as deserialization always assumes native endianness (otherwise cheap
39deserialization would be impossible). This implies that serializing a structure
40generally requires serializing both its big-endian and little-endian variants,
41and then loading the correct one based on the target's endianness.
42*/
4344use core::{cmp, mem::size_of};
4546#[cfg(feature = "alloc")]
47use alloc::{vec, vec::Vec};
4849use crate::util::{
50int::Pointer,
51 primitives::{PatternID, PatternIDError, StateID, StateIDError},
52};
5354/// A hack to align a smaller type `B` with a bigger type `T`.
55///
56/// The usual use of this is with `B = [u8]` and `T = u32`. That is,
57/// it permits aligning a sequence of bytes on a 4-byte boundary. This
58/// is useful in contexts where one wants to embed a serialized [dense
59/// DFA](crate::dfa::dense::DFA) into a Rust a program while guaranteeing the
60/// alignment required for the DFA.
61///
62/// See [`dense::DFA::from_bytes`](crate::dfa::dense::DFA::from_bytes) for an
63/// example of how to use this type.
64#[repr(C)]
65#[derive(#[automatically_derived]
impl<B: ::core::fmt::Debug + ?Sized, T: ::core::fmt::Debug> ::core::fmt::Debug
for AlignAs<B, T> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "AlignAs",
"_align", &self._align, "bytes", &&self.bytes)
}
}Debug)]
66pub struct AlignAs<B: ?Sized, T> {
67/// A zero-sized field indicating the alignment we want.
68pub _align: [T; 0],
69/// A possibly non-sized field containing a sequence of bytes.
70pub bytes: B,
71}
7273/// An error that occurs when serializing an object from this crate.
74///
75/// Serialization, as used in this crate, universally refers to the process
76/// of transforming a structure (like a DFA) into a custom binary format
77/// represented by `&[u8]`. To this end, serialization is generally infallible.
78/// However, it can fail when caller provided buffer sizes are too small. When
79/// that occurs, a serialization error is reported.
80///
81/// A `SerializeError` provides no introspection capabilities. Its only
82/// supported operation is conversion to a human readable error message.
83///
84/// This error type implements the `std::error::Error` trait only when the
85/// `std` feature is enabled. Otherwise, this type is defined in all
86/// configurations.
87#[derive(#[automatically_derived]
impl ::core::fmt::Debug for SerializeError {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field1_finish(f,
"SerializeError", "what", &&self.what)
}
}Debug)]
88pub struct SerializeError {
89/// The name of the thing that a buffer is too small for.
90 ///
91 /// Currently, the only kind of serialization error is one that is
92 /// committed by a caller: providing a destination buffer that is too
93 /// small to fit the serialized object. This makes sense conceptually,
94 /// since every valid inhabitant of a type should be serializable.
95 ///
96 /// This is somewhat exposed in the public API of this crate. For example,
97 /// the `to_bytes_{big,little}_endian` APIs return a `Vec<u8>` and are
98 /// guaranteed to never panic or error. This is only possible because the
99 /// implementation guarantees that it will allocate a `Vec<u8>` that is
100 /// big enough.
101 ///
102 /// In summary, if a new serialization error kind needs to be added, then
103 /// it will need careful consideration.
104what: &'static str,
105}
106107impl SerializeError {
108pub(crate) fn buffer_too_small(what: &'static str) -> SerializeError {
109SerializeError { what }
110 }
111}
112113impl core::fmt::Displayfor SerializeError {
114fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
115f.write_fmt(format_args!("destination buffer is too small to write {0}",
self.what))write!(f, "destination buffer is too small to write {}", self.what)116 }
117}
118119#[cfg(feature = "std")]
120impl std::error::Errorfor SerializeError {}
121122/// An error that occurs when deserializing an object defined in this crate.
123///
124/// Serialization, as used in this crate, universally refers to the process
125/// of transforming a structure (like a DFA) into a custom binary format
126/// represented by `&[u8]`. Deserialization, then, refers to the process of
127/// cheaply converting this binary format back to the object's in-memory
128/// representation as defined in this crate. To the extent possible,
129/// deserialization will report this error whenever this process fails.
130///
131/// A `DeserializeError` provides no introspection capabilities. Its only
132/// supported operation is conversion to a human readable error message.
133///
134/// This error type implements the `std::error::Error` trait only when the
135/// `std` feature is enabled. Otherwise, this type is defined in all
136/// configurations.
137#[derive(#[automatically_derived]
impl ::core::fmt::Debug for DeserializeError {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_tuple_field1_finish(f,
"DeserializeError", &&self.0)
}
}Debug)]
138pub struct DeserializeError(DeserializeErrorKind);
139140#[derive(#[automatically_derived]
impl ::core::fmt::Debug for DeserializeErrorKind {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
match self {
DeserializeErrorKind::Generic { msg: __self_0 } =>
::core::fmt::Formatter::debug_struct_field1_finish(f,
"Generic", "msg", &__self_0),
DeserializeErrorKind::BufferTooSmall { what: __self_0 } =>
::core::fmt::Formatter::debug_struct_field1_finish(f,
"BufferTooSmall", "what", &__self_0),
DeserializeErrorKind::InvalidUsize { what: __self_0 } =>
::core::fmt::Formatter::debug_struct_field1_finish(f,
"InvalidUsize", "what", &__self_0),
DeserializeErrorKind::VersionMismatch {
expected: __self_0, found: __self_1 } =>
::core::fmt::Formatter::debug_struct_field2_finish(f,
"VersionMismatch", "expected", __self_0, "found",
&__self_1),
DeserializeErrorKind::EndianMismatch {
expected: __self_0, found: __self_1 } =>
::core::fmt::Formatter::debug_struct_field2_finish(f,
"EndianMismatch", "expected", __self_0, "found", &__self_1),
DeserializeErrorKind::AlignmentMismatch {
alignment: __self_0, address: __self_1 } =>
::core::fmt::Formatter::debug_struct_field2_finish(f,
"AlignmentMismatch", "alignment", __self_0, "address",
&__self_1),
DeserializeErrorKind::LabelMismatch { expected: __self_0 } =>
::core::fmt::Formatter::debug_struct_field1_finish(f,
"LabelMismatch", "expected", &__self_0),
DeserializeErrorKind::ArithmeticOverflow { what: __self_0 } =>
::core::fmt::Formatter::debug_struct_field1_finish(f,
"ArithmeticOverflow", "what", &__self_0),
DeserializeErrorKind::PatternID { err: __self_0, what: __self_1 }
=>
::core::fmt::Formatter::debug_struct_field2_finish(f,
"PatternID", "err", __self_0, "what", &__self_1),
DeserializeErrorKind::StateID { err: __self_0, what: __self_1 } =>
::core::fmt::Formatter::debug_struct_field2_finish(f,
"StateID", "err", __self_0, "what", &__self_1),
}
}
}Debug)]
141enum DeserializeErrorKind {
142 Generic { msg: &'static str },
143 BufferTooSmall { what: &'static str },
144 InvalidUsize { what: &'static str },
145 VersionMismatch { expected: u32, found: u32 },
146 EndianMismatch { expected: u32, found: u32 },
147 AlignmentMismatch { alignment: usize, address: usize },
148 LabelMismatch { expected: &'static str },
149 ArithmeticOverflow { what: &'static str },
150 PatternID { err: PatternIDError, what: &'static str },
151 StateID { err: StateIDError, what: &'static str },
152}
153154impl DeserializeError {
155pub(crate) fn generic(msg: &'static str) -> DeserializeError {
156DeserializeError(DeserializeErrorKind::Generic { msg })
157 }
158159pub(crate) fn buffer_too_small(what: &'static str) -> DeserializeError {
160DeserializeError(DeserializeErrorKind::BufferTooSmall { what })
161 }
162163fn invalid_usize(what: &'static str) -> DeserializeError {
164DeserializeError(DeserializeErrorKind::InvalidUsize { what })
165 }
166167fn version_mismatch(expected: u32, found: u32) -> DeserializeError {
168DeserializeError(DeserializeErrorKind::VersionMismatch {
169expected,
170found,
171 })
172 }
173174fn endian_mismatch(expected: u32, found: u32) -> DeserializeError {
175DeserializeError(DeserializeErrorKind::EndianMismatch {
176expected,
177found,
178 })
179 }
180181fn alignment_mismatch(
182 alignment: usize,
183 address: usize,
184 ) -> DeserializeError {
185DeserializeError(DeserializeErrorKind::AlignmentMismatch {
186alignment,
187address,
188 })
189 }
190191fn label_mismatch(expected: &'static str) -> DeserializeError {
192DeserializeError(DeserializeErrorKind::LabelMismatch { expected })
193 }
194195fn arithmetic_overflow(what: &'static str) -> DeserializeError {
196DeserializeError(DeserializeErrorKind::ArithmeticOverflow { what })
197 }
198199fn pattern_id_error(
200 err: PatternIDError,
201 what: &'static str,
202 ) -> DeserializeError {
203DeserializeError(DeserializeErrorKind::PatternID { err, what })
204 }
205206pub(crate) fn state_id_error(
207 err: StateIDError,
208 what: &'static str,
209 ) -> DeserializeError {
210DeserializeError(DeserializeErrorKind::StateID { err, what })
211 }
212}
213214#[cfg(feature = "std")]
215impl std::error::Errorfor DeserializeError {}
216217impl core::fmt::Displayfor DeserializeError {
218fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
219use self::DeserializeErrorKind::*;
220221match self.0 {
222Generic { msg } => f.write_fmt(format_args!("{0}", msg))write!(f, "{msg}"),
223BufferTooSmall { what } => {
224f.write_fmt(format_args!("buffer is too small to read {0}", what))write!(f, "buffer is too small to read {what}")225 }
226InvalidUsize { what } => {
227f.write_fmt(format_args!("{0} is too big to fit in a usize", what))write!(f, "{what} is too big to fit in a usize")228 }
229VersionMismatch { expected, found } => f.write_fmt(format_args!("unsupported version: expected version {0} but found version {1}",
expected, found))write!(
230f,
231"unsupported version: \
232 expected version {expected} but found version {found}",
233 ),
234EndianMismatch { expected, found } => f.write_fmt(format_args!("endianness mismatch: expected 0x{0:X} but got 0x{1:X}. (Are you trying to load an object serialized with a different endianness?)",
expected, found))write!(
235f,
236"endianness mismatch: expected 0x{expected:X} but \
237 got 0x{found:X}. (Are you trying to load an object \
238 serialized with a different endianness?)",
239 ),
240AlignmentMismatch { alignment, address } => f.write_fmt(format_args!("alignment mismatch: slice starts at address 0x{0:X}, which is not aligned to a {1} byte boundary",
address, alignment))write!(
241f,
242"alignment mismatch: slice starts at address 0x{address:X}, \
243 which is not aligned to a {alignment} byte boundary",
244 ),
245LabelMismatch { expected } => f.write_fmt(format_args!("label mismatch: start of serialized object should contain a NUL terminated {0:?} label, but a different label was found",
expected))write!(
246f,
247"label mismatch: start of serialized object should \
248 contain a NUL terminated {expected:?} label, but a different \
249 label was found",
250 ),
251ArithmeticOverflow { what } => {
252f.write_fmt(format_args!("arithmetic overflow for {0}", what))write!(f, "arithmetic overflow for {what}")253 }
254PatternID { ref err, what } => {
255f.write_fmt(format_args!("failed to read pattern ID for {0}: {1}", what, err))write!(f, "failed to read pattern ID for {what}: {err}")256 }
257StateID { ref err, what } => {
258f.write_fmt(format_args!("failed to read state ID for {0}: {1}", what, err))write!(f, "failed to read state ID for {what}: {err}")259 }
260 }
261 }
262}
263264/// Safely converts a `&[u32]` to `&[StateID]` with zero cost.
265#[cfg_attr(feature = "perf-inline", inline(always))]
266pub(crate) fn u32s_to_state_ids(slice: &[u32]) -> &[StateID] {
267// SAFETY: This is safe because StateID is defined to have the same memory
268 // representation as a u32 (it is repr(transparent)). While not every u32
269 // is a "valid" StateID, callers are not permitted to rely on the validity
270 // of StateIDs for memory safety. It can only lead to logical errors. (This
271 // is why StateID::new_unchecked is safe.)
272unsafe {
273 core::slice::from_raw_parts(
274slice.as_ptr().cast::<StateID>(),
275slice.len(),
276 )
277 }
278}
279280/// Safely converts a `&mut [u32]` to `&mut [StateID]` with zero cost.
281pub(crate) fn u32s_to_state_ids_mut(slice: &mut [u32]) -> &mut [StateID] {
282// SAFETY: This is safe because StateID is defined to have the same memory
283 // representation as a u32 (it is repr(transparent)). While not every u32
284 // is a "valid" StateID, callers are not permitted to rely on the validity
285 // of StateIDs for memory safety. It can only lead to logical errors. (This
286 // is why StateID::new_unchecked is safe.)
287unsafe {
288 core::slice::from_raw_parts_mut(
289slice.as_mut_ptr().cast::<StateID>(),
290slice.len(),
291 )
292 }
293}
294295/// Safely converts a `&[u32]` to `&[PatternID]` with zero cost.
296#[cfg_attr(feature = "perf-inline", inline(always))]
297pub(crate) fn u32s_to_pattern_ids(slice: &[u32]) -> &[PatternID] {
298// SAFETY: This is safe because PatternID is defined to have the same
299 // memory representation as a u32 (it is repr(transparent)). While not
300 // every u32 is a "valid" PatternID, callers are not permitted to rely
301 // on the validity of PatternIDs for memory safety. It can only lead to
302 // logical errors. (This is why PatternID::new_unchecked is safe.)
303unsafe {
304 core::slice::from_raw_parts(
305slice.as_ptr().cast::<PatternID>(),
306slice.len(),
307 )
308 }
309}
310311/// Checks that the given slice has an alignment that matches `T`.
312///
313/// This is useful for checking that a slice has an appropriate alignment
314/// before casting it to a &[T]. Note though that alignment is not itself
315/// sufficient to perform the cast for any `T`.
316pub(crate) fn check_alignment<T>(
317 slice: &[u8],
318) -> Result<(), DeserializeError> {
319let alignment = core::mem::align_of::<T>();
320let address = slice.as_ptr().as_usize();
321if address % alignment == 0 {
322return Ok(());
323 }
324Err(DeserializeError::alignment_mismatch(alignment, address))
325}
326327/// Reads a possibly empty amount of padding, up to 7 bytes, from the beginning
328/// of the given slice. All padding bytes must be NUL bytes.
329///
330/// This is useful because it can be theoretically necessary to pad the
331/// beginning of a serialized object with NUL bytes to ensure that it starts
332/// at a correctly aligned address. These padding bytes should come immediately
333/// before the label.
334///
335/// This returns the number of bytes read from the given slice.
336pub(crate) fn skip_initial_padding(slice: &[u8]) -> usize {
337let mut nread = 0;
338while nread < 7 && nread < slice.len() && slice[nread] == 0 {
339 nread += 1;
340 }
341nread342}
343344/// Allocate a byte buffer of the given size, along with some initial padding
345/// such that `buf[padding..]` has the same alignment as `T`, where the
346/// alignment of `T` must be at most `8`. In particular, callers should treat
347/// the first N bytes (second return value) as padding bytes that must not be
348/// overwritten. In all cases, the following identity holds:
349///
350/// ```ignore
351/// let (buf, padding) = alloc_aligned_buffer::<StateID>(SIZE);
352/// assert_eq!(SIZE, buf[padding..].len());
353/// ```
354///
355/// In practice, padding is often zero.
356///
357/// The requirement for `8` as a maximum here is somewhat arbitrary. In
358/// practice, we never need anything bigger in this crate, and so this function
359/// does some sanity asserts under the assumption of a max alignment of `8`.
360#[cfg(feature = "alloc")]
361pub(crate) fn alloc_aligned_buffer<T>(size: usize) -> (Vec<u8>, usize) {
362// NOTE: This is a kludge because there's no easy way to allocate a Vec<u8>
363 // with an alignment guaranteed to be greater than 1. We could create a
364 // Vec<u32>, but this cannot be safely transmuted to a Vec<u8> without
365 // concern, since reallocing or dropping the Vec<u8> is UB (different
366 // alignment than the initial allocation). We could define a wrapper type
367 // to manage this for us, but it seems like more machinery than it's worth.
368let buf = ::alloc::vec::from_elem(0, size)vec![0; size];
369let align = core::mem::align_of::<T>();
370let address = buf.as_ptr().as_usize();
371if address % align == 0 {
372return (buf, 0);
373 }
374// Let's try this again. We have to create a totally new alloc with
375 // the maximum amount of bytes we might need. We can't just extend our
376 // pre-existing 'buf' because that might create a new alloc with a
377 // different alignment.
378let extra = align - 1;
379let mut buf = ::alloc::vec::from_elem(0, size + extra)vec![0; size + extra];
380let address = buf.as_ptr().as_usize();
381// The code below handles the case where 'address' is aligned to T, so if
382 // we got lucky and 'address' is now aligned to T (when it previously
383 // wasn't), then we're done.
384if address % align == 0 {
385buf.truncate(size);
386return (buf, 0);
387 }
388let padding = ((address & !(align - 1)).checked_add(align).unwrap())
389 .checked_sub(address)
390 .unwrap();
391if !(padding <= 7) {
{
::core::panicking::panic_fmt(format_args!("padding of {0} is bigger than 7",
padding));
}
};assert!(padding <= 7, "padding of {padding} is bigger than 7");
392if !(padding <= extra) {
{
::core::panicking::panic_fmt(format_args!("padding of {0} is bigger than extra {1} bytes",
padding, extra));
}
};assert!(
393 padding <= extra,
394"padding of {padding} is bigger than extra {extra} bytes",
395 );
396buf.truncate(size + padding);
397match (&(size + padding), &buf.len()) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val, &*right_val,
::core::option::Option::None);
}
}
};assert_eq!(size + padding, buf.len());
398match (&0, &(buf[padding..].as_ptr().as_usize() % align)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val, &*right_val,
::core::option::Option::Some(format_args!("expected end of initial padding to be aligned to {0}",
align)));
}
}
};assert_eq!(
3990,
400 buf[padding..].as_ptr().as_usize() % align,
401"expected end of initial padding to be aligned to {align}",
402 );
403 (buf, padding)
404}
405406/// Reads a NUL terminated label starting at the beginning of the given slice.
407///
408/// If a NUL terminated label could not be found, then an error is returned.
409/// Similarly, if a label is found but doesn't match the expected label, then
410/// an error is returned.
411///
412/// Upon success, the total number of bytes read (including padding bytes) is
413/// returned.
414pub(crate) fn read_label(
415 slice: &[u8],
416 expected_label: &'static str,
417) -> Result<usize, DeserializeError> {
418// Set an upper bound on how many bytes we scan for a NUL. Since no label
419 // in this crate is longer than 256 bytes, if we can't find one within that
420 // range, then we have corrupted data.
421let first_nul =
422slice[..cmp::min(slice.len(), 256)].iter().position(|&b| b == 0);
423let first_nul = match first_nul {
424Some(first_nul) => first_nul,
425None => {
426return Err(DeserializeError::generic(
427"could not find NUL terminated label \
428 at start of serialized object",
429 ));
430 }
431 };
432let len = first_nul + padding_len(first_nul);
433if slice.len() < len {
434return Err(DeserializeError::generic(
435"could not find properly sized label at start of serialized object"
436));
437 }
438if expected_label.as_bytes() != &slice[..first_nul] {
439return Err(DeserializeError::label_mismatch(expected_label));
440 }
441Ok(len)
442}
443444/// Writes the given label to the buffer as a NUL terminated string. The label
445/// given must not contain NUL, otherwise this will panic. Similarly, the label
446/// must not be longer than 255 bytes, otherwise this will panic.
447///
448/// Additional NUL bytes are written as necessary to ensure that the number of
449/// bytes written is always a multiple of 4.
450///
451/// Upon success, the total number of bytes written (including padding) is
452/// returned.
453pub(crate) fn write_label(
454 label: &str,
455 dst: &mut [u8],
456) -> Result<usize, SerializeError> {
457let nwrite = write_label_len(label);
458if dst.len() < nwrite {
459return Err(SerializeError::buffer_too_small("label"));
460 }
461dst[..label.len()].copy_from_slice(label.as_bytes());
462for i in 0..(nwrite - label.len()) {
463 dst[label.len() + i] = 0;
464 }
465match (&(nwrite % 4), &0) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val, &*right_val,
::core::option::Option::None);
}
}
};assert_eq!(nwrite % 4, 0);
466Ok(nwrite)
467}
468469/// Returns the total number of bytes (including padding) that would be written
470/// for the given label. This panics if the given label contains a NUL byte or
471/// is longer than 255 bytes. (The size restriction exists so that searching
472/// for a label during deserialization can be done in small bounded space.)
473pub(crate) fn write_label_len(label: &str) -> usize {
474if !(label.len() <= 255) {
{
::core::panicking::panic_fmt(format_args!("label must not be longer than 255 bytes"));
}
};assert!(label.len() <= 255, "label must not be longer than 255 bytes");
475if !label.bytes().all(|b| b != 0) {
{
::core::panicking::panic_fmt(format_args!("label must not contain NUL bytes"));
}
};assert!(label.bytes().all(|b| b != 0), "label must not contain NUL bytes");
476let label_len = label.len() + 1; // +1 for the NUL terminator
477label_len + padding_len(label_len)
478}
479480/// Reads the endianness check from the beginning of the given slice and
481/// confirms that the endianness of the serialized object matches the expected
482/// endianness. If the slice is too small or if the endianness check fails,
483/// this returns an error.
484///
485/// Upon success, the total number of bytes read is returned.
486pub(crate) fn read_endianness_check(
487 slice: &[u8],
488) -> Result<usize, DeserializeError> {
489let (n, nr) = try_read_u32(slice, "endianness check")?;
490match (&nr, &write_endianness_check_len()) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val, &*right_val,
::core::option::Option::None);
}
}
};assert_eq!(nr, write_endianness_check_len());
491if n != 0xFEFF {
492return Err(DeserializeError::endian_mismatch(0xFEFF, n));
493 }
494Ok(nr)
495}
496497/// Writes 0xFEFF as an integer using the given endianness.
498///
499/// This is useful for writing into the header of a serialized object. It can
500/// be read during deserialization as a sanity check to ensure the proper
501/// endianness is used.
502///
503/// Upon success, the total number of bytes written is returned.
504pub(crate) fn write_endianness_check<E: Endian>(
505 dst: &mut [u8],
506) -> Result<usize, SerializeError> {
507let nwrite = write_endianness_check_len();
508if dst.len() < nwrite {
509return Err(SerializeError::buffer_too_small("endianness check"));
510 }
511 E::write_u32(0xFEFF, dst);
512Ok(nwrite)
513}
514515/// Returns the number of bytes written by the endianness check.
516pub(crate) fn write_endianness_check_len() -> usize {
517 size_of::<u32>()
518}
519520/// Reads a version number from the beginning of the given slice and confirms
521/// that is matches the expected version number given. If the slice is too
522/// small or if the version numbers aren't equivalent, this returns an error.
523///
524/// Upon success, the total number of bytes read is returned.
525///
526/// N.B. Currently, we require that the version number is exactly equivalent.
527/// In the future, if we bump the version number without a semver bump, then
528/// we'll need to relax this a bit and support older versions.
529pub(crate) fn read_version(
530 slice: &[u8],
531 expected_version: u32,
532) -> Result<usize, DeserializeError> {
533let (n, nr) = try_read_u32(slice, "version")?;
534match (&nr, &write_version_len()) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val, &*right_val,
::core::option::Option::None);
}
}
};assert_eq!(nr, write_version_len());
535if n != expected_version {
536return Err(DeserializeError::version_mismatch(expected_version, n));
537 }
538Ok(nr)
539}
540541/// Writes the given version number to the beginning of the given slice.
542///
543/// This is useful for writing into the header of a serialized object. It can
544/// be read during deserialization as a sanity check to ensure that the library
545/// code supports the format of the serialized object.
546///
547/// Upon success, the total number of bytes written is returned.
548pub(crate) fn write_version<E: Endian>(
549 version: u32,
550 dst: &mut [u8],
551) -> Result<usize, SerializeError> {
552let nwrite = write_version_len();
553if dst.len() < nwrite {
554return Err(SerializeError::buffer_too_small("version number"));
555 }
556 E::write_u32(version, dst);
557Ok(nwrite)
558}
559560/// Returns the number of bytes written by writing the version number.
561pub(crate) fn write_version_len() -> usize {
562 size_of::<u32>()
563}
564565/// Reads a pattern ID from the given slice. If the slice has insufficient
566/// length, then this panics. If the deserialized integer exceeds the pattern
567/// ID limit for the current target, then this returns an error.
568///
569/// Upon success, this also returns the number of bytes read.
570pub(crate) fn read_pattern_id(
571 slice: &[u8],
572 what: &'static str,
573) -> Result<(PatternID, usize), DeserializeError> {
574let bytes: [u8; PatternID::SIZE] =
575slice[..PatternID::SIZE].try_into().unwrap();
576let pid = PatternID::from_ne_bytes(bytes)
577 .map_err(|err| DeserializeError::pattern_id_error(err, what))?;
578Ok((pid, PatternID::SIZE))
579}
580581/// Reads a pattern ID from the given slice. If the slice has insufficient
582/// length, then this panics. Otherwise, the deserialized integer is assumed
583/// to be a valid pattern ID.
584///
585/// This also returns the number of bytes read.
586pub(crate) fn read_pattern_id_unchecked(slice: &[u8]) -> (PatternID, usize) {
587let pid = PatternID::from_ne_bytes_unchecked(
588slice[..PatternID::SIZE].try_into().unwrap(),
589 );
590 (pid, PatternID::SIZE)
591}
592593/// Write the given pattern ID to the beginning of the given slice of bytes
594/// using the specified endianness. The given slice must have length at least
595/// `PatternID::SIZE`, or else this panics. Upon success, the total number of
596/// bytes written is returned.
597pub(crate) fn write_pattern_id<E: Endian>(
598 pid: PatternID,
599 dst: &mut [u8],
600) -> usize {
601 E::write_u32(pid.as_u32(), dst);
602PatternID::SIZE603}
604605/// Attempts to read a state ID from the given slice. If the slice has an
606/// insufficient number of bytes or if the state ID exceeds the limit for
607/// the current target, then this returns an error.
608///
609/// Upon success, this also returns the number of bytes read.
610pub(crate) fn try_read_state_id(
611 slice: &[u8],
612 what: &'static str,
613) -> Result<(StateID, usize), DeserializeError> {
614if slice.len() < StateID::SIZE {
615return Err(DeserializeError::buffer_too_small(what));
616 }
617read_state_id(slice, what)
618}
619620/// Reads a state ID from the given slice. If the slice has insufficient
621/// length, then this panics. If the deserialized integer exceeds the state ID
622/// limit for the current target, then this returns an error.
623///
624/// Upon success, this also returns the number of bytes read.
625pub(crate) fn read_state_id(
626 slice: &[u8],
627 what: &'static str,
628) -> Result<(StateID, usize), DeserializeError> {
629let bytes: [u8; StateID::SIZE] =
630slice[..StateID::SIZE].try_into().unwrap();
631let sid = StateID::from_ne_bytes(bytes)
632 .map_err(|err| DeserializeError::state_id_error(err, what))?;
633Ok((sid, StateID::SIZE))
634}
635636/// Reads a state ID from the given slice. If the slice has insufficient
637/// length, then this panics. Otherwise, the deserialized integer is assumed
638/// to be a valid state ID.
639///
640/// This also returns the number of bytes read.
641pub(crate) fn read_state_id_unchecked(slice: &[u8]) -> (StateID, usize) {
642let sid = StateID::from_ne_bytes_unchecked(
643slice[..StateID::SIZE].try_into().unwrap(),
644 );
645 (sid, StateID::SIZE)
646}
647648/// Write the given state ID to the beginning of the given slice of bytes
649/// using the specified endianness. The given slice must have length at least
650/// `StateID::SIZE`, or else this panics. Upon success, the total number of
651/// bytes written is returned.
652pub(crate) fn write_state_id<E: Endian>(
653 sid: StateID,
654 dst: &mut [u8],
655) -> usize {
656 E::write_u32(sid.as_u32(), dst);
657StateID::SIZE658}
659660/// Try to read a u16 as a usize from the beginning of the given slice in
661/// native endian format. If the slice has fewer than 2 bytes or if the
662/// deserialized number cannot be represented by usize, then this returns an
663/// error. The error message will include the `what` description of what is
664/// being deserialized, for better error messages. `what` should be a noun in
665/// singular form.
666///
667/// Upon success, this also returns the number of bytes read.
668pub(crate) fn try_read_u16_as_usize(
669 slice: &[u8],
670 what: &'static str,
671) -> Result<(usize, usize), DeserializeError> {
672try_read_u16(slice, what).and_then(|(n, nr)| {
673usize::try_from(n)
674 .map(|n| (n, nr))
675 .map_err(|_| DeserializeError::invalid_usize(what))
676 })
677}
678679/// Try to read a u32 as a usize from the beginning of the given slice in
680/// native endian format. If the slice has fewer than 4 bytes or if the
681/// deserialized number cannot be represented by usize, then this returns an
682/// error. The error message will include the `what` description of what is
683/// being deserialized, for better error messages. `what` should be a noun in
684/// singular form.
685///
686/// Upon success, this also returns the number of bytes read.
687pub(crate) fn try_read_u32_as_usize(
688 slice: &[u8],
689 what: &'static str,
690) -> Result<(usize, usize), DeserializeError> {
691try_read_u32(slice, what).and_then(|(n, nr)| {
692usize::try_from(n)
693 .map(|n| (n, nr))
694 .map_err(|_| DeserializeError::invalid_usize(what))
695 })
696}
697698/// Try to read a u16 from the beginning of the given slice in native endian
699/// format. If the slice has fewer than 2 bytes, then this returns an error.
700/// The error message will include the `what` description of what is being
701/// deserialized, for better error messages. `what` should be a noun in
702/// singular form.
703///
704/// Upon success, this also returns the number of bytes read.
705pub(crate) fn try_read_u16(
706 slice: &[u8],
707 what: &'static str,
708) -> Result<(u16, usize), DeserializeError> {
709check_slice_len(slice, size_of::<u16>(), what)?;
710Ok((read_u16(slice), size_of::<u16>()))
711}
712713/// Try to read a u32 from the beginning of the given slice in native endian
714/// format. If the slice has fewer than 4 bytes, then this returns an error.
715/// The error message will include the `what` description of what is being
716/// deserialized, for better error messages. `what` should be a noun in
717/// singular form.
718///
719/// Upon success, this also returns the number of bytes read.
720pub(crate) fn try_read_u32(
721 slice: &[u8],
722 what: &'static str,
723) -> Result<(u32, usize), DeserializeError> {
724check_slice_len(slice, size_of::<u32>(), what)?;
725Ok((read_u32(slice), size_of::<u32>()))
726}
727728/// Try to read a u128 from the beginning of the given slice in native endian
729/// format. If the slice has fewer than 16 bytes, then this returns an error.
730/// The error message will include the `what` description of what is being
731/// deserialized, for better error messages. `what` should be a noun in
732/// singular form.
733///
734/// Upon success, this also returns the number of bytes read.
735pub(crate) fn try_read_u128(
736 slice: &[u8],
737 what: &'static str,
738) -> Result<(u128, usize), DeserializeError> {
739check_slice_len(slice, size_of::<u128>(), what)?;
740Ok((read_u128(slice), size_of::<u128>()))
741}
742743/// Read a u16 from the beginning of the given slice in native endian format.
744/// If the slice has fewer than 2 bytes, then this panics.
745///
746/// Marked as inline to speed up sparse searching which decodes integers from
747/// its automaton at search time.
748#[cfg_attr(feature = "perf-inline", inline(always))]
749pub(crate) fn read_u16(slice: &[u8]) -> u16 {
750let bytes: [u8; 2] = slice[..size_of::<u16>()].try_into().unwrap();
751u16::from_ne_bytes(bytes)
752}
753754/// Read a u32 from the beginning of the given slice in native endian format.
755/// If the slice has fewer than 4 bytes, then this panics.
756///
757/// Marked as inline to speed up sparse searching which decodes integers from
758/// its automaton at search time.
759#[cfg_attr(feature = "perf-inline", inline(always))]
760pub(crate) fn read_u32(slice: &[u8]) -> u32 {
761let bytes: [u8; 4] = slice[..size_of::<u32>()].try_into().unwrap();
762u32::from_ne_bytes(bytes)
763}
764765/// Read a u128 from the beginning of the given slice in native endian format.
766/// If the slice has fewer than 16 bytes, then this panics.
767pub(crate) fn read_u128(slice: &[u8]) -> u128 {
768let bytes: [u8; 16] = slice[..size_of::<u128>()].try_into().unwrap();
769u128::from_ne_bytes(bytes)
770}
771772/// Checks that the given slice has some minimal length. If it's smaller than
773/// the bound given, then a "buffer too small" error is returned with `what`
774/// describing what the buffer represents.
775pub(crate) fn check_slice_len<T>(
776 slice: &[T],
777 at_least_len: usize,
778 what: &'static str,
779) -> Result<(), DeserializeError> {
780if slice.len() < at_least_len {
781return Err(DeserializeError::buffer_too_small(what));
782 }
783Ok(())
784}
785786/// Multiply the given numbers, and on overflow, return an error that includes
787/// 'what' in the error message.
788///
789/// This is useful when doing arithmetic with untrusted data.
790pub(crate) fn mul(
791 a: usize,
792 b: usize,
793 what: &'static str,
794) -> Result<usize, DeserializeError> {
795match a.checked_mul(b) {
796Some(c) => Ok(c),
797None => Err(DeserializeError::arithmetic_overflow(what)),
798 }
799}
800801/// Add the given numbers, and on overflow, return an error that includes
802/// 'what' in the error message.
803///
804/// This is useful when doing arithmetic with untrusted data.
805pub(crate) fn add(
806 a: usize,
807 b: usize,
808 what: &'static str,
809) -> Result<usize, DeserializeError> {
810match a.checked_add(b) {
811Some(c) => Ok(c),
812None => Err(DeserializeError::arithmetic_overflow(what)),
813 }
814}
815816/// Shift `a` left by `b`, and on overflow, return an error that includes
817/// 'what' in the error message.
818///
819/// This is useful when doing arithmetic with untrusted data.
820pub(crate) fn shl(
821 a: usize,
822 b: usize,
823 what: &'static str,
824) -> Result<usize, DeserializeError> {
825let amount = u32::try_from(b)
826 .map_err(|_| DeserializeError::arithmetic_overflow(what))?;
827match a.checked_shl(amount) {
828Some(c) => Ok(c),
829None => Err(DeserializeError::arithmetic_overflow(what)),
830 }
831}
832833/// Returns the number of additional bytes required to add to the given length
834/// in order to make the total length a multiple of 4. The return value is
835/// always less than 4.
836pub(crate) fn padding_len(non_padding_len: usize) -> usize {
837 (4 - (non_padding_len & 0b11)) & 0b11
838}
839840/// A simple trait for writing code generic over endianness.
841///
842/// This is similar to what byteorder provides, but we only need a very small
843/// subset.
844pub(crate) trait Endian {
845/// Writes a u16 to the given destination buffer in a particular
846 /// endianness. If the destination buffer has a length smaller than 2, then
847 /// this panics.
848fn write_u16(n: u16, dst: &mut [u8]);
849850/// Writes a u32 to the given destination buffer in a particular
851 /// endianness. If the destination buffer has a length smaller than 4, then
852 /// this panics.
853fn write_u32(n: u32, dst: &mut [u8]);
854855/// Writes a u128 to the given destination buffer in a particular
856 /// endianness. If the destination buffer has a length smaller than 16,
857 /// then this panics.
858fn write_u128(n: u128, dst: &mut [u8]);
859}
860861/// Little endian writing.
862pub(crate) enum LE {}
863/// Big endian writing.
864pub(crate) enum BE {}
865866#[cfg(target_endian = "little")]
867pub(crate) type NE = LE;
868#[cfg(target_endian = "big")]
869pub(crate) type NE = BE;
870871impl Endianfor LE {
872fn write_u16(n: u16, dst: &mut [u8]) {
873dst[..2].copy_from_slice(&n.to_le_bytes());
874 }
875876fn write_u32(n: u32, dst: &mut [u8]) {
877dst[..4].copy_from_slice(&n.to_le_bytes());
878 }
879880fn write_u128(n: u128, dst: &mut [u8]) {
881dst[..16].copy_from_slice(&n.to_le_bytes());
882 }
883}
884885impl Endianfor BE {
886fn write_u16(n: u16, dst: &mut [u8]) {
887dst[..2].copy_from_slice(&n.to_be_bytes());
888 }
889890fn write_u32(n: u32, dst: &mut [u8]) {
891dst[..4].copy_from_slice(&n.to_be_bytes());
892 }
893894fn write_u128(n: u128, dst: &mut [u8]) {
895dst[..16].copy_from_slice(&n.to_be_bytes());
896 }
897}
898899#[cfg(all(test, feature = "alloc"))]
900mod tests {
901use super::*;
902903#[test]
904fn labels() {
905let mut buf = [0; 1024];
906907let nwrite = write_label("fooba", &mut buf).unwrap();
908assert_eq!(nwrite, 8);
909assert_eq!(&buf[..nwrite], b"fooba\x00\x00\x00");
910911let nread = read_label(&buf, "fooba").unwrap();
912assert_eq!(nread, 8);
913 }
914915#[test]
916 #[should_panic]
917fn bad_label_interior_nul() {
918// interior NULs are not allowed
919write_label("foo\x00bar", &mut [0; 1024]).unwrap();
920 }
921922#[test]
923fn bad_label_almost_too_long() {
924// ok
925write_label(&"z".repeat(255), &mut [0; 1024]).unwrap();
926 }
927928#[test]
929 #[should_panic]
930fn bad_label_too_long() {
931// labels longer than 255 bytes are banned
932write_label(&"z".repeat(256), &mut [0; 1024]).unwrap();
933 }
934935#[test]
936fn padding() {
937assert_eq!(0, padding_len(8));
938assert_eq!(3, padding_len(9));
939assert_eq!(2, padding_len(10));
940assert_eq!(1, padding_len(11));
941assert_eq!(0, padding_len(12));
942assert_eq!(3, padding_len(13));
943assert_eq!(2, padding_len(14));
944assert_eq!(1, padding_len(15));
945assert_eq!(0, padding_len(16));
946 }
947}