1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
45use crate::ule::*;
6use crate::varzerovec::VarZeroVecFormat;
7use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec};
8#[cfg(feature = "alloc")]
9use alloc::borrow::{Cow, ToOwned};
10#[cfg(feature = "alloc")]
11use alloc::boxed::Box;
12#[cfg(feature = "alloc")]
13use alloc::string::String;
14#[cfg(feature = "alloc")]
15use alloc::{vec, vec::Vec};
16#[cfg(feature = "alloc")]
17use core::mem;
1819/// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on
20/// custom DSTs where the type cannot be obtained as a reference to some other type.
21///
22/// [`Self::encode_var_ule_as_slices()`] should be implemented by providing an encoded slice for each field
23/// of the VarULE type to the callback, in order. For an implementation to be safe, the slices
24/// to the callback must, when concatenated, be a valid instance of the VarULE type.
25///
26/// See the [custom VarULEdocumentation](crate::ule::custom) for examples.
27///
28/// [`Self::encode_var_ule_as_slices()`] is only used to provide default implementations for [`Self::encode_var_ule_write()`]
29/// and [`Self::encode_var_ule_len()`]. If you override the default implementations it is totally valid to
30/// replace [`Self::encode_var_ule_as_slices()`]'s body with `unreachable!()`. This can be done for cases where
31/// it is not possible to implement [`Self::encode_var_ule_as_slices()`] but the other methods still work.
32///
33/// A typical implementation will take each field in the order found in the [`VarULE`] type,
34/// convert it to ULE, call [`ULE::slice_as_bytes()`] on them, and pass the slices to `cb` in order.
35/// A trailing [`ZeroVec`](crate::ZeroVec) or [`VarZeroVec`](crate::VarZeroVec) can have their underlying
36/// byte representation passed through.
37///
38/// In case the compiler is not optimizing [`Self::encode_var_ule_len()`], it can be overridden. A typical
39/// implementation will add up the sizes of each field on the [`VarULE`] type and then add in the byte length of the
40/// dynamically-sized part.
41///
42/// # Reverse-encoding VarULE
43///
44/// This trait maps a struct to its bytes representation ("serialization"), and
45/// [`ZeroFrom`](zerofrom::ZeroFrom) performs the opposite operation, taking those bytes and
46/// creating a struct from them ("deserialization").
47///
48/// # Safety
49///
50/// The safety invariants of [`Self::encode_var_ule_as_slices()`] are:
51/// - It must call `cb` (only once)
52/// - The slices passed to `cb`, if concatenated, should be a valid instance of the `T` [`VarULE`] type
53/// (i.e. if fed to [`VarULE::validate_bytes()`] they must produce a successful result)
54/// - It must return the return value of `cb` to the caller
55///
56/// One or more of [`Self::encode_var_ule_len()`] and [`Self::encode_var_ule_write()`] may be provided.
57/// If both are, then `zerovec` code is guaranteed to not call [`Self::encode_var_ule_as_slices()`], and it may be replaced
58/// with `unreachable!()`.
59///
60/// The safety invariants of [`Self::encode_var_ule_len()`] are:
61/// - It must return the length of the corresponding VarULE type
62///
63/// The safety invariants of [`Self::encode_var_ule_write()`] are:
64/// - The slice written to `dst` must be a valid instance of the `T` [`VarULE`] type
65pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> {
66/// Calls `cb` with a piecewise list of byte slices that when concatenated
67 /// produce the memory pattern of the corresponding instance of `T`.
68 ///
69 /// Do not call this function directly; instead use the other two. Some implementors
70 /// may define this function to panic.
71fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R;
7273/// Return the length, in bytes, of the corresponding [`VarULE`] type
74fn encode_var_ule_len(&self) -> usize {
75self.encode_var_ule_as_slices(|slices| slices.iter().map(|s| s.len()).sum())
76 }
7778/// Write the corresponding [`VarULE`] type to the `dst` buffer. `dst` should
79 /// be the size of [`Self::encode_var_ule_len()`]
80fn encode_var_ule_write(&self, mut dst: &mut [u8]) {
81if true {
match (&self.encode_var_ule_len(), &dst.len()) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(self.encode_var_ule_len(), dst.len());
82self.encode_var_ule_as_slices(move |slices| {
83#[expect(clippy::indexing_slicing)] // by debug_assert
84for slice in slices {
85 dst[..slice.len()].copy_from_slice(slice);
86 dst = &mut dst[slice.len()..];
87 }
88 });
89 }
90}
9192/// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box<T>`
93///
94/// This is primarily useful for generating `Deserialize` impls for VarULE types
95#[cfg(feature = "alloc")]
96pub fn encode_varule_to_box<S: EncodeAsVarULE<T> + ?Sized, T: VarULE + ?Sized>(x: &S) -> Box<T> {
97// zero-fill the vector to avoid uninitialized data UB
98let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()];
99 x.encode_var_ule_write(&mut vec);
100let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice());
101unsafe {
102// Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]`
103 // and can be recouped via from_bytes_unchecked()
104let ptr: *mut T = T::from_bytes_unchecked(&boxed) as *const T as *mut T;
105106// Safety: we can construct an owned version since we have mem::forgotten the older owner
107Box::from_raw(ptr)
108 }
109}
110111unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for T {
112fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
113cb(&[T::as_bytes(self)])
114 }
115}
116117unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ T {
118fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
119cb(&[T::as_bytes(self)])
120 }
121}
122123unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ &'_ T {
124fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
125cb(&[T::as_bytes(self)])
126 }
127}
128129#[cfg(feature = "alloc")]
130unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Cow<'_, T>
131where
132T: ToOwned,
133{
134fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
135 cb(&[T::as_bytes(self.as_ref())])
136 }
137}
138139#[cfg(feature = "alloc")]
140unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Box<T> {
141fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
142 cb(&[T::as_bytes(self)])
143 }
144}
145146#[cfg(feature = "alloc")]
147unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ Box<T> {
148fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
149 cb(&[T::as_bytes(self)])
150 }
151}
152153#[cfg(feature = "alloc")]
154unsafe impl EncodeAsVarULE<str> for String {
155fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
156 cb(&[self.as_bytes()])
157 }
158}
159160#[cfg(feature = "alloc")]
161unsafe impl EncodeAsVarULE<str> for &'_ String {
162fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
163 cb(&[self.as_bytes()])
164 }
165}
166167// Note: This impl could technically use `T: AsULE`, but we want users to prefer `ZeroSlice<T>`
168// for cases where T is not a ULE. Therefore, we can use the more efficient `memcpy` impl here.
169#[cfg(feature = "alloc")]
170unsafe impl<T> EncodeAsVarULE<[T]> for Vec<T>
171where
172T: ULE,
173{
174fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
175 cb(&[<[T] as VarULE>::as_bytes(self)])
176 }
177}
178179unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for &'_ [T]
180where
181T: AsULE + 'static,
182{
183fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
184// unnecessary if the other two are implemented
185::core::panicking::panic("internal error: entered unreachable code")unreachable!()186 }
187188#[inline]
189fn encode_var_ule_len(&self) -> usize {
190self.len() * core::mem::size_of::<T::ULE>()
191 }
192193fn encode_var_ule_write(&self, dst: &mut [u8]) {
194#[allow(non_snake_case)]
195let S = core::mem::size_of::<T::ULE>();
196if true {
match (&(self.len() * S), &dst.len()) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(self.len() * S, dst.len());
197for (item, ref mut chunk) in self.iter().zip(dst.chunks_mut(S)) {
198let ule = item.to_unaligned();
199 chunk.copy_from_slice(ULE::slice_as_bytes(core::slice::from_ref(&ule)));
200 }
201 }
202}
203204#[cfg(feature = "alloc")]
205unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for Vec<T>
206where
207T: AsULE + 'static,
208{
209fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
210// unnecessary if the other two are implemented
211unreachable!()
212 }
213214#[inline]
215fn encode_var_ule_len(&self) -> usize {
216self.as_slice().encode_var_ule_len()
217 }
218219#[inline]
220fn encode_var_ule_write(&self, dst: &mut [u8]) {
221self.as_slice().encode_var_ule_write(dst)
222 }
223}
224225unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for ZeroVec<'_, T>
226where
227T: AsULE + 'static,
228{
229fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
230// unnecessary if the other two are implemented
231::core::panicking::panic("internal error: entered unreachable code")unreachable!()232 }
233234#[inline]
235fn encode_var_ule_len(&self) -> usize {
236self.as_bytes().len()
237 }
238239fn encode_var_ule_write(&self, dst: &mut [u8]) {
240if true {
match (&self.as_bytes().len(), &dst.len()) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(self.as_bytes().len(), dst.len());
241dst.copy_from_slice(self.as_bytes());
242 }
243}
244245unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for &'_ [E]
246where
247T: VarULE + ?Sized,
248 E: EncodeAsVarULE<T>,
249 F: VarZeroVecFormat,
250{
251fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
252// unnecessary if the other two are implemented
253::core::panicking::panic("not implemented")unimplemented!()254 }
255256#[expect(clippy::unwrap_used)] // TODO(#1410): Rethink length errors in VZV.
257fn encode_var_ule_len(&self) -> usize {
258crate::varzerovec::components::compute_serializable_len::<T, E, F>(self).unwrap() as usize259 }
260261fn encode_var_ule_write(&self, dst: &mut [u8]) {
262crate::varzerovec::components::write_serializable_bytes::<T, E, F>(self, dst)
263 }
264}
265266#[cfg(feature = "alloc")]
267unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for Vec<E>
268where
269T: VarULE + ?Sized,
270 E: EncodeAsVarULE<T>,
271 F: VarZeroVecFormat,
272{
273fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
274// unnecessary if the other two are implemented
275unreachable!()
276 }
277278#[inline]
279fn encode_var_ule_len(&self) -> usize {
280 <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_len(&self.as_slice())
281 }
282283#[inline]
284fn encode_var_ule_write(&self, dst: &mut [u8]) {
285 <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_write(&self.as_slice(), dst)
286 }
287}
288289unsafe impl<T, F> EncodeAsVarULE<VarZeroSlice<T, F>> for VarZeroVec<'_, T, F>
290where
291T: VarULE + ?Sized,
292 F: VarZeroVecFormat,
293{
294fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
295// unnecessary if the other two are implemented
296::core::panicking::panic("internal error: entered unreachable code")unreachable!()297 }
298299#[inline]
300fn encode_var_ule_len(&self) -> usize {
301self.as_bytes().len()
302 }
303304#[inline]
305fn encode_var_ule_write(&self, dst: &mut [u8]) {
306if true {
match (&self.as_bytes().len(), &dst.len()) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(self.as_bytes().len(), dst.len());
307dst.copy_from_slice(self.as_bytes());
308 }
309}
310311#[cfg(test)]
312mod test {
313use super::*;
314315const STRING_ARRAY: [&str; 2] = ["hello", "world"];
316317const STRING_SLICE: &[&str] = &STRING_ARRAY;
318319const U8_ARRAY: [u8; 8] = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07];
320321const U8_2D_ARRAY: [&[u8]; 2] = [&U8_ARRAY, &U8_ARRAY];
322323const U8_2D_SLICE: &[&[u8]] = &[&U8_ARRAY, &U8_ARRAY];
324325const U8_3D_ARRAY: [&[&[u8]]; 2] = [U8_2D_SLICE, U8_2D_SLICE];
326327const U8_3D_SLICE: &[&[&[u8]]] = &[U8_2D_SLICE, U8_2D_SLICE];
328329const U32_ARRAY: [u32; 4] = [0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F];
330331const U32_2D_ARRAY: [&[u32]; 2] = [&U32_ARRAY, &U32_ARRAY];
332333const U32_2D_SLICE: &[&[u32]] = &[&U32_ARRAY, &U32_ARRAY];
334335const U32_3D_ARRAY: [&[&[u32]]; 2] = [U32_2D_SLICE, U32_2D_SLICE];
336337const U32_3D_SLICE: &[&[&[u32]]] = &[U32_2D_SLICE, U32_2D_SLICE];
338339#[test]
340fn test_vzv_from() {
341type VZV<'a, T> = VarZeroVec<'a, T>;
342type ZS<T> = ZeroSlice<T>;
343type VZS<T> = VarZeroSlice<T>;
344345let u8_zerovec: ZeroVec<u8> = ZeroVec::from_slice_or_alloc(&U8_ARRAY);
346let u8_2d_zerovec: [ZeroVec<u8>; 2] = [u8_zerovec.clone(), u8_zerovec.clone()];
347let u8_2d_vec: Vec<Vec<u8>> = vec![U8_ARRAY.into(), U8_ARRAY.into()];
348let u8_3d_vec: Vec<Vec<Vec<u8>>> = vec![u8_2d_vec.clone(), u8_2d_vec.clone()];
349350let u32_zerovec: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&U32_ARRAY);
351let u32_2d_zerovec: [ZeroVec<u32>; 2] = [u32_zerovec.clone(), u32_zerovec.clone()];
352let u32_2d_vec: Vec<Vec<u32>> = vec![U32_ARRAY.into(), U32_ARRAY.into()];
353let u32_3d_vec: Vec<Vec<Vec<u32>>> = vec![u32_2d_vec.clone(), u32_2d_vec.clone()];
354355let a: VZV<str> = VarZeroVec::from(&STRING_ARRAY);
356let b: VZV<str> = VarZeroVec::from(STRING_SLICE);
357let c: VZV<str> = VarZeroVec::from(&Vec::from(STRING_SLICE));
358assert_eq!(a, STRING_SLICE);
359assert_eq!(a, b);
360assert_eq!(a, c);
361362let a: VZV<[u8]> = VarZeroVec::from(&U8_2D_ARRAY);
363let b: VZV<[u8]> = VarZeroVec::from(U8_2D_SLICE);
364let c: VZV<[u8]> = VarZeroVec::from(&u8_2d_vec);
365assert_eq!(a, U8_2D_SLICE);
366assert_eq!(a, b);
367assert_eq!(a, c);
368let u8_3d_vzv_brackets = &[a.clone(), a.clone()];
369370let a: VZV<ZS<u8>> = VarZeroVec::from(&U8_2D_ARRAY);
371let b: VZV<ZS<u8>> = VarZeroVec::from(U8_2D_SLICE);
372let c: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_vec);
373let d: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_zerovec);
374assert_eq!(a, U8_2D_SLICE);
375assert_eq!(a, b);
376assert_eq!(a, c);
377assert_eq!(a, d);
378let u8_3d_vzv_zeroslice = &[a.clone(), a.clone()];
379380let a: VZV<VZS<[u8]>> = VarZeroVec::from(&U8_3D_ARRAY);
381let b: VZV<VZS<[u8]>> = VarZeroVec::from(U8_3D_SLICE);
382let c: VZV<VZS<[u8]>> = VarZeroVec::from(&u8_3d_vec);
383let d: VZV<VZS<[u8]>> = VarZeroVec::from(u8_3d_vzv_brackets);
384assert_eq!(
385 a.iter()
386 .map(|x| x.iter().map(|y| y.to_vec()).collect::<Vec<Vec<u8>>>())
387 .collect::<Vec<Vec<Vec<u8>>>>(),
388 u8_3d_vec
389 );
390assert_eq!(a, b);
391assert_eq!(a, c);
392assert_eq!(a, d);
393394let a: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&U8_3D_ARRAY);
395let b: VZV<VZS<ZS<u8>>> = VarZeroVec::from(U8_3D_SLICE);
396let c: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&u8_3d_vec);
397let d: VZV<VZS<ZS<u8>>> = VarZeroVec::from(u8_3d_vzv_zeroslice);
398assert_eq!(
399 a.iter()
400 .map(|x| x
401 .iter()
402 .map(|y| y.iter().collect::<Vec<u8>>())
403 .collect::<Vec<Vec<u8>>>())
404 .collect::<Vec<Vec<Vec<u8>>>>(),
405 u8_3d_vec
406 );
407assert_eq!(a, b);
408assert_eq!(a, c);
409assert_eq!(a, d);
410411let a: VZV<ZS<u32>> = VarZeroVec::from(&U32_2D_ARRAY);
412let b: VZV<ZS<u32>> = VarZeroVec::from(U32_2D_SLICE);
413let c: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_vec);
414let d: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_zerovec);
415assert_eq!(a, u32_2d_zerovec);
416assert_eq!(a, b);
417assert_eq!(a, c);
418assert_eq!(a, d);
419let u32_3d_vzv = &[a.clone(), a.clone()];
420421let a: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&U32_3D_ARRAY);
422let b: VZV<VZS<ZS<u32>>> = VarZeroVec::from(U32_3D_SLICE);
423let c: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&u32_3d_vec);
424let d: VZV<VZS<ZS<u32>>> = VarZeroVec::from(u32_3d_vzv);
425assert_eq!(
426 a.iter()
427 .map(|x| x
428 .iter()
429 .map(|y| y.iter().collect::<Vec<u32>>())
430 .collect::<Vec<Vec<u32>>>())
431 .collect::<Vec<Vec<Vec<u32>>>>(),
432 u32_3d_vec
433 );
434assert_eq!(a, b);
435assert_eq!(a, c);
436assert_eq!(a, d);
437 }
438}