Skip to main content

zerovec/ule/
encode.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::ule::*;
6use crate::varzerovec::VarZeroVecFormat;
7use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec};
8#[cfg(feature = "alloc")]
9use alloc::borrow::{Cow, ToOwned};
10#[cfg(feature = "alloc")]
11use alloc::boxed::Box;
12#[cfg(feature = "alloc")]
13use alloc::string::String;
14#[cfg(feature = "alloc")]
15use alloc::{vec, vec::Vec};
16
17/// Allows types to be encoded as [`VarULE`]s. This is highly useful for implementing [`VarULE`] on
18/// custom DSTs where the type cannot be obtained as a reference to some other type.
19///
20/// [`Self::encode_var_ule_as_slices()`] should be implemented by providing an encoded slice for each field
21/// of the [`VarULE`] type to the callback, in order. For an implementation to be safe, the slices
22/// to the callback must, when concatenated, be a valid instance of the [`VarULE`] type.
23///
24/// See the [custom `VarULEdocumentation`](crate::ule::custom) for examples.
25///
26/// [`Self::encode_var_ule_as_slices()`] is only used to provide default implementations for [`Self::encode_var_ule_write()`]
27/// and [`Self::encode_var_ule_len()`]. If you override the default implementations it is totally valid to
28/// replace [`Self::encode_var_ule_as_slices()`]'s body with `unreachable!()`. This can be done for cases where
29/// it is not possible to implement [`Self::encode_var_ule_as_slices()`] but the other methods still work.
30///
31/// A typical implementation will take each field in the order found in the [`VarULE`] type,
32/// convert it to ULE, call [`ULE::slice_as_bytes()`] on them, and pass the slices to `cb` in order.
33/// A trailing [`ZeroVec`](crate::ZeroVec) or [`VarZeroVec`](crate::VarZeroVec) can have their underlying
34/// byte representation passed through.
35///
36/// In case the compiler is not optimizing [`Self::encode_var_ule_len()`], it can be overridden. A typical
37/// implementation will add up the sizes of each field on the [`VarULE`] type and then add in the byte length of the
38/// dynamically-sized part.
39///
40/// # Reverse-encoding [`VarULE`]
41///
42/// This trait maps a struct to its bytes representation ("serialization"), and
43/// [`ZeroFrom`](zerofrom::ZeroFrom) performs the opposite operation, taking those bytes and
44/// creating a struct from them ("deserialization").
45///
46/// # Safety
47///
48/// The safety invariants of [`Self::encode_var_ule_as_slices()`] are:
49/// - It must call `cb` (only once)
50/// - The slices passed to `cb`, if concatenated, should be a valid instance of the `T` [`VarULE`] type
51///   (i.e. if fed to [`VarULE::validate_bytes()`] they must produce a successful result)
52/// - It must return the return value of `cb` to the caller
53///
54/// One or more of [`Self::encode_var_ule_len()`] and [`Self::encode_var_ule_write()`] may be provided.
55/// If both are, then `zerovec` code is guaranteed to not call [`Self::encode_var_ule_as_slices()`], and it may be replaced
56/// with `unreachable!()`.
57///
58/// The safety invariants of [`Self::encode_var_ule_len()`] are:
59/// - It must return the length of the corresponding [`VarULE`] type
60///
61/// The safety invariants of [`Self::encode_var_ule_write()`] are:
62/// - The slice written to `dst` must be a valid instance of the `T` [`VarULE`] type
63pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> {
64    /// Calls `cb` with a piecewise list of byte slices that when concatenated
65    /// produce the memory pattern of the corresponding instance of `T`.
66    ///
67    /// Do not call this function directly; instead use the other two. Some implementors
68    /// may define this function to panic.
69    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R;
70
71    /// Return the length, in bytes, of the corresponding [`VarULE`] type
72    fn encode_var_ule_len(&self) -> usize {
73        self.encode_var_ule_as_slices(|slices| slices.iter().map(|s| s.len()).sum())
74    }
75
76    /// Write the corresponding [`VarULE`] type to the `dst` buffer. `dst` should
77    /// be the size of [`Self::encode_var_ule_len()`]
78    fn encode_var_ule_write(&self, mut dst: &mut [u8]) {
79        if true {
    match (&self.encode_var_ule_len(), &dst.len()) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(self.encode_var_ule_len(), dst.len());
80        self.encode_var_ule_as_slices(move |slices| {
81            #[expect(clippy::indexing_slicing)] // by debug_assert
82            for slice in slices {
83                dst[..slice.len()].copy_from_slice(slice);
84                dst = &mut dst[slice.len()..];
85            }
86        });
87    }
88}
89
90/// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box<T>`
91///
92/// This is primarily useful for generating `Deserialize` impls for [`VarULE`] types
93#[cfg(feature = "alloc")]
94pub fn encode_varule_to_box<S: EncodeAsVarULE<T> + ?Sized, T: VarULE + ?Sized>(x: &S) -> Box<T> {
95    // zero-fill the vector to avoid uninitialized data UB
96    let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()];
97    x.encode_var_ule_write(&mut vec);
98    let boxed = core::mem::ManuallyDrop::new(vec.into_boxed_slice());
99    unsafe {
100        // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]`
101        // and can be recouped via from_bytes_unchecked()
102        let ptr: *mut T = T::from_bytes_unchecked(&boxed) as *const T as *mut T;
103
104        // Safety: we can construct an owned version since we have mem::forgotten the older owner
105        Box::from_raw(ptr)
106    }
107}
108
109unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for T {
110    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
111        cb(&[T::as_bytes(self)])
112    }
113}
114
115unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ T {
116    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
117        cb(&[T::as_bytes(self)])
118    }
119}
120
121unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ &'_ T {
122    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
123        cb(&[T::as_bytes(self)])
124    }
125}
126
127#[cfg(feature = "alloc")]
128unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Cow<'_, T>
129where
130    T: ToOwned,
131{
132    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
133        cb(&[T::as_bytes(self.as_ref())])
134    }
135}
136
137#[cfg(feature = "alloc")]
138unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Box<T> {
139    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
140        cb(&[T::as_bytes(self)])
141    }
142}
143
144#[cfg(feature = "alloc")]
145unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ Box<T> {
146    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
147        cb(&[T::as_bytes(self)])
148    }
149}
150
151#[cfg(feature = "alloc")]
152unsafe impl EncodeAsVarULE<str> for String {
153    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
154        cb(&[self.as_bytes()])
155    }
156}
157
158#[cfg(feature = "alloc")]
159unsafe impl EncodeAsVarULE<str> for &'_ String {
160    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
161        cb(&[self.as_bytes()])
162    }
163}
164
165// Note: This impl could technically use `T: AsULE`, but we want users to prefer `ZeroSlice<T>`
166// for cases where T is not a ULE. Therefore, we can use the more efficient `memcpy` impl here.
167#[cfg(feature = "alloc")]
168unsafe impl<T> EncodeAsVarULE<[T]> for Vec<T>
169where
170    T: ULE,
171{
172    fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
173        cb(&[<[T] as VarULE>::as_bytes(self)])
174    }
175}
176
177unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for &'_ [T]
178where
179    T: AsULE + 'static,
180{
181    fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
182        // unnecessary if the other two are implemented
183        ::core::panicking::panic("internal error: entered unreachable code")unreachable!()
184    }
185
186    #[inline]
187    fn encode_var_ule_len(&self) -> usize {
188        self.len() * size_of::<T::ULE>()
189    }
190
191    fn encode_var_ule_write(&self, dst: &mut [u8]) {
192        #[allow(non_snake_case)]
193        let S = size_of::<T::ULE>();
194        if true {
    match (&(self.len() * S), &dst.len()) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(self.len() * S, dst.len());
195        for (item, ref mut chunk) in self.iter().zip(dst.chunks_mut(S)) {
196            let ule = item.to_unaligned();
197            chunk.copy_from_slice(ULE::slice_as_bytes(slice::from_ref(&ule)));
198        }
199    }
200}
201
202#[cfg(feature = "alloc")]
203unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for Vec<T>
204where
205    T: AsULE + 'static,
206{
207    fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
208        // unnecessary if the other two are implemented
209        unreachable!()
210    }
211
212    #[inline]
213    fn encode_var_ule_len(&self) -> usize {
214        self.as_slice().encode_var_ule_len()
215    }
216
217    #[inline]
218    fn encode_var_ule_write(&self, dst: &mut [u8]) {
219        self.as_slice().encode_var_ule_write(dst)
220    }
221}
222
223unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for ZeroVec<'_, T>
224where
225    T: AsULE + 'static,
226{
227    fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
228        // unnecessary if the other two are implemented
229        ::core::panicking::panic("internal error: entered unreachable code")unreachable!()
230    }
231
232    #[inline]
233    fn encode_var_ule_len(&self) -> usize {
234        self.as_bytes().len()
235    }
236
237    fn encode_var_ule_write(&self, dst: &mut [u8]) {
238        if true {
    match (&self.as_bytes().len(), &dst.len()) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(self.as_bytes().len(), dst.len());
239        dst.copy_from_slice(self.as_bytes());
240    }
241}
242
243unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for &'_ [E]
244where
245    T: VarULE + ?Sized,
246    E: EncodeAsVarULE<T>,
247    F: VarZeroVecFormat,
248{
249    fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
250        // unnecessary if the other two are implemented
251        ::core::panicking::panic("not implemented")unimplemented!()
252    }
253
254    #[expect(clippy::unwrap_used)] // TODO(#1410): Rethink length errors in VZV.
255    fn encode_var_ule_len(&self) -> usize {
256        crate::varzerovec::components::compute_serializable_len::<T, E, F>(self).unwrap() as usize
257    }
258
259    fn encode_var_ule_write(&self, dst: &mut [u8]) {
260        crate::varzerovec::components::write_serializable_bytes::<T, E, F>(self, dst)
261    }
262}
263
264#[cfg(feature = "alloc")]
265unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for Vec<E>
266where
267    T: VarULE + ?Sized,
268    E: EncodeAsVarULE<T>,
269    F: VarZeroVecFormat,
270{
271    fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
272        // unnecessary if the other two are implemented
273        unreachable!()
274    }
275
276    #[inline]
277    fn encode_var_ule_len(&self) -> usize {
278        <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_len(&self.as_slice())
279    }
280
281    #[inline]
282    fn encode_var_ule_write(&self, dst: &mut [u8]) {
283        <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_write(&self.as_slice(), dst)
284    }
285}
286
287unsafe impl<T, F> EncodeAsVarULE<VarZeroSlice<T, F>> for VarZeroVec<'_, T, F>
288where
289    T: VarULE + ?Sized,
290    F: VarZeroVecFormat,
291{
292    fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
293        // unnecessary if the other two are implemented
294        ::core::panicking::panic("internal error: entered unreachable code")unreachable!()
295    }
296
297    #[inline]
298    fn encode_var_ule_len(&self) -> usize {
299        self.as_bytes().len()
300    }
301
302    #[inline]
303    fn encode_var_ule_write(&self, dst: &mut [u8]) {
304        if true {
    match (&self.as_bytes().len(), &dst.len()) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(self.as_bytes().len(), dst.len());
305        dst.copy_from_slice(self.as_bytes());
306    }
307}
308
309#[cfg(test)]
310mod test {
311    use super::*;
312
313    const STRING_ARRAY: [&str; 2] = ["hello", "world"];
314
315    const STRING_SLICE: &[&str] = &STRING_ARRAY;
316
317    const U8_ARRAY: [u8; 8] = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07];
318
319    const U8_2D_ARRAY: [&[u8]; 2] = [&U8_ARRAY, &U8_ARRAY];
320
321    const U8_2D_SLICE: &[&[u8]] = &[&U8_ARRAY, &U8_ARRAY];
322
323    const U8_3D_ARRAY: [&[&[u8]]; 2] = [U8_2D_SLICE, U8_2D_SLICE];
324
325    const U8_3D_SLICE: &[&[&[u8]]] = &[U8_2D_SLICE, U8_2D_SLICE];
326
327    const U32_ARRAY: [u32; 4] = [0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F];
328
329    const U32_2D_ARRAY: [&[u32]; 2] = [&U32_ARRAY, &U32_ARRAY];
330
331    const U32_2D_SLICE: &[&[u32]] = &[&U32_ARRAY, &U32_ARRAY];
332
333    const U32_3D_ARRAY: [&[&[u32]]; 2] = [U32_2D_SLICE, U32_2D_SLICE];
334
335    const U32_3D_SLICE: &[&[&[u32]]] = &[U32_2D_SLICE, U32_2D_SLICE];
336
337    #[test]
338    fn test_vzv_from() {
339        type VZV<'a, T> = VarZeroVec<'a, T>;
340        type ZS<T> = ZeroSlice<T>;
341        type VZS<T> = VarZeroSlice<T>;
342
343        let u8_zerovec: ZeroVec<u8> = ZeroVec::from_slice_or_alloc(&U8_ARRAY);
344        let u8_2d_zerovec: [ZeroVec<u8>; 2] = [u8_zerovec.clone(), u8_zerovec.clone()];
345        let u8_2d_vec: Vec<Vec<u8>> = vec![U8_ARRAY.into(), U8_ARRAY.into()];
346        let u8_3d_vec: Vec<Vec<Vec<u8>>> = vec![u8_2d_vec.clone(), u8_2d_vec.clone()];
347
348        let u32_zerovec: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&U32_ARRAY);
349        let u32_2d_zerovec: [ZeroVec<u32>; 2] = [u32_zerovec.clone(), u32_zerovec.clone()];
350        let u32_2d_vec: Vec<Vec<u32>> = vec![U32_ARRAY.into(), U32_ARRAY.into()];
351        let u32_3d_vec: Vec<Vec<Vec<u32>>> = vec![u32_2d_vec.clone(), u32_2d_vec.clone()];
352
353        let a: VZV<str> = VarZeroVec::from(&STRING_ARRAY);
354        let b: VZV<str> = VarZeroVec::from(STRING_SLICE);
355        let c: VZV<str> = VarZeroVec::from(&Vec::from(STRING_SLICE));
356        assert_eq!(a, STRING_SLICE);
357        assert_eq!(a, b);
358        assert_eq!(a, c);
359
360        let a: VZV<[u8]> = VarZeroVec::from(&U8_2D_ARRAY);
361        let b: VZV<[u8]> = VarZeroVec::from(U8_2D_SLICE);
362        let c: VZV<[u8]> = VarZeroVec::from(&u8_2d_vec);
363        assert_eq!(a, U8_2D_SLICE);
364        assert_eq!(a, b);
365        assert_eq!(a, c);
366        let u8_3d_vzv_brackets = &[a.clone(), a.clone()];
367
368        let a: VZV<ZS<u8>> = VarZeroVec::from(&U8_2D_ARRAY);
369        let b: VZV<ZS<u8>> = VarZeroVec::from(U8_2D_SLICE);
370        let c: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_vec);
371        let d: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_zerovec);
372        assert_eq!(a, U8_2D_SLICE);
373        assert_eq!(a, b);
374        assert_eq!(a, c);
375        assert_eq!(a, d);
376        let u8_3d_vzv_zeroslice = &[a.clone(), a.clone()];
377
378        let a: VZV<VZS<[u8]>> = VarZeroVec::from(&U8_3D_ARRAY);
379        let b: VZV<VZS<[u8]>> = VarZeroVec::from(U8_3D_SLICE);
380        let c: VZV<VZS<[u8]>> = VarZeroVec::from(&u8_3d_vec);
381        let d: VZV<VZS<[u8]>> = VarZeroVec::from(u8_3d_vzv_brackets);
382        assert_eq!(
383            a.iter()
384                .map(|x| x.iter().map(|y| y.to_vec()).collect::<Vec<Vec<u8>>>())
385                .collect::<Vec<Vec<Vec<u8>>>>(),
386            u8_3d_vec
387        );
388        assert_eq!(a, b);
389        assert_eq!(a, c);
390        assert_eq!(a, d);
391
392        let a: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&U8_3D_ARRAY);
393        let b: VZV<VZS<ZS<u8>>> = VarZeroVec::from(U8_3D_SLICE);
394        let c: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&u8_3d_vec);
395        let d: VZV<VZS<ZS<u8>>> = VarZeroVec::from(u8_3d_vzv_zeroslice);
396        assert_eq!(
397            a.iter()
398                .map(|x| x
399                    .iter()
400                    .map(|y| y.iter().collect::<Vec<u8>>())
401                    .collect::<Vec<Vec<u8>>>())
402                .collect::<Vec<Vec<Vec<u8>>>>(),
403            u8_3d_vec
404        );
405        assert_eq!(a, b);
406        assert_eq!(a, c);
407        assert_eq!(a, d);
408
409        let a: VZV<ZS<u32>> = VarZeroVec::from(&U32_2D_ARRAY);
410        let b: VZV<ZS<u32>> = VarZeroVec::from(U32_2D_SLICE);
411        let c: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_vec);
412        let d: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_zerovec);
413        assert_eq!(a, u32_2d_zerovec);
414        assert_eq!(a, b);
415        assert_eq!(a, c);
416        assert_eq!(a, d);
417        let u32_3d_vzv = &[a.clone(), a.clone()];
418
419        let a: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&U32_3D_ARRAY);
420        let b: VZV<VZS<ZS<u32>>> = VarZeroVec::from(U32_3D_SLICE);
421        let c: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&u32_3d_vec);
422        let d: VZV<VZS<ZS<u32>>> = VarZeroVec::from(u32_3d_vzv);
423        assert_eq!(
424            a.iter()
425                .map(|x| x
426                    .iter()
427                    .map(|y| y.iter().collect::<Vec<u32>>())
428                    .collect::<Vec<Vec<u32>>>())
429                .collect::<Vec<Vec<Vec<u32>>>>(),
430            u32_3d_vec
431        );
432        assert_eq!(a, b);
433        assert_eq!(a, c);
434        assert_eq!(a, d);
435    }
436}