zerovec/varzerovec/
components.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::VarZeroVecFormatError;
6use crate::ule::*;
7use core::cmp::Ordering;
8use core::convert::TryFrom;
9use core::marker::PhantomData;
10use core::mem;
11use core::ops::Range;
12
13/// This trait allows switching between different possible internal
14/// representations of VarZeroVec.
15///
16/// Currently this crate supports three formats: [`Index8`], [`Index16`] and [`Index32`],
17/// with [`Index16`] being the default for all [`VarZeroVec`](super::VarZeroVec)
18/// types unless explicitly specified otherwise.
19///
20/// Do not implement this trait, its internals may be changed in the future,
21/// and all of its associated items are hidden from the docs.
22pub trait VarZeroVecFormat: 'static + Sized {
23    /// The type to use for the indexing array
24    ///
25    /// Safety: must be a ULE for which all byte sequences are allowed
26    #[doc(hidden)]
27    type Index: IntegerULE;
28    /// The type to use for the length segment
29    ///
30    /// Safety: must be a ULE for which all byte sequences are allowed
31    #[doc(hidden)]
32    type Len: IntegerULE;
33}
34
35/// This trait represents various ULE types that can be used to represent an integer
36///
37/// Do not implement this trait, its internals may be changed in the future,
38/// and all of its associated items are hidden from the docs.
39#[doc(hidden)]
40pub unsafe trait IntegerULE: ULE {
41    /// The error to show when unable to construct a vec
42    #[doc(hidden)]
43    const TOO_LARGE_ERROR: &'static str;
44
45    /// Safety: must be sizeof(self)
46    #[doc(hidden)]
47    const SIZE: usize;
48
49    /// Safety: must be maximum integral value represented here
50    #[doc(hidden)]
51    const MAX_VALUE: u32;
52
53    /// Safety: Must roundtrip with from_usize and represent the correct
54    /// integral value
55    #[doc(hidden)]
56    fn iule_to_usize(self) -> usize;
57
58    #[doc(hidden)]
59    fn iule_from_usize(x: usize) -> Option<Self>;
60
61    /// Safety: Should always convert a buffer into an array of Self with the correct length
62    #[doc(hidden)]
63    #[cfg(feature = "alloc")]
64    fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self];
65}
66
67/// This is a [`VarZeroVecFormat`] that stores u8s in the index array, and a u8 for a length.
68///
69/// Will have a smaller data size, but it's *extremely* likely for larger arrays
70/// to be unrepresentable (and error on construction). Should probably be used
71/// for known-small arrays, where all but the last field are known-small.
72#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
73#[allow(clippy::exhaustive_structs)] // marker
74pub struct Index8;
75
76/// This is a [`VarZeroVecFormat`] that stores u16s in the index array, and a u16 for a length.
77///
78/// Will have a smaller data size, but it's more likely for larger arrays
79/// to be unrepresentable (and error on construction)
80///
81/// This is the default index size used by all [`VarZeroVec`](super::VarZeroVec) types.
82#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
83#[allow(clippy::exhaustive_structs)] // marker
84pub struct Index16;
85
86/// This is a [`VarZeroVecFormat`] that stores u32s in the index array, and a u32 for a length.
87/// Will have a larger data size, but will support large arrays without
88/// problems.
89#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
90#[allow(clippy::exhaustive_structs)] // marker
91pub struct Index32;
92
93impl VarZeroVecFormat for Index8 {
94    type Index = u8;
95    type Len = u8;
96}
97
98impl VarZeroVecFormat for Index16 {
99    type Index = RawBytesULE<2>;
100    type Len = RawBytesULE<2>;
101}
102
103impl VarZeroVecFormat for Index32 {
104    type Index = RawBytesULE<4>;
105    type Len = RawBytesULE<4>;
106}
107
108unsafe impl IntegerULE for u8 {
109    const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
110                                     cumulatively are larger than a u8 in size";
111    const SIZE: usize = mem::size_of::<Self>();
112    const MAX_VALUE: u32 = u8::MAX as u32;
113    #[inline]
114    fn iule_to_usize(self) -> usize {
115        self as usize
116    }
117    #[inline]
118    fn iule_from_usize(u: usize) -> Option<Self> {
119        u8::try_from(u).ok()
120    }
121    #[inline]
122    #[cfg(feature = "alloc")]
123    fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
124        bytes
125    }
126}
127
128unsafe impl IntegerULE for RawBytesULE<2> {
129    const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
130                                     cumulatively are larger than a u16 in size";
131    const SIZE: usize = mem::size_of::<Self>();
132    const MAX_VALUE: u32 = u16::MAX as u32;
133    #[inline]
134    fn iule_to_usize(self) -> usize {
135        self.as_unsigned_int() as usize
136    }
137    #[inline]
138    fn iule_from_usize(u: usize) -> Option<Self> {
139        u16::try_from(u).ok().map(u16::to_unaligned)
140    }
141    #[inline]
142    #[cfg(feature = "alloc")]
143    fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
144        Self::from_bytes_unchecked_mut(bytes)
145    }
146}
147
148unsafe impl IntegerULE for RawBytesULE<4> {
149    const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
150                                     cumulatively are larger than a u32 in size";
151    const SIZE: usize = mem::size_of::<Self>();
152    const MAX_VALUE: u32 = u32::MAX;
153    #[inline]
154    fn iule_to_usize(self) -> usize {
155        self.as_unsigned_int() as usize
156    }
157    #[inline]
158    fn iule_from_usize(u: usize) -> Option<Self> {
159        u32::try_from(u).ok().map(u32::to_unaligned)
160    }
161    #[inline]
162    #[cfg(feature = "alloc")]
163    fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
164        Self::from_bytes_unchecked_mut(bytes)
165    }
166}
167
168/// A more parsed version of `VarZeroSlice`. This type is where most of the VarZeroVec
169/// internal representation code lies.
170///
171/// This is *basically* an `&'a [u8]` to a zero copy buffer, but split out into
172/// the buffer components. Logically this is capable of behaving as
173/// a `&'a [T::VarULE]`, but since `T::VarULE` is unsized that type does not actually
174/// exist.
175///
176/// See [`VarZeroVecComponents::parse_bytes()`] for information on the internal invariants involved
177#[derive(Debug)]
178pub struct VarZeroVecComponents<'a, T: ?Sized, F> {
179    /// The number of elements
180    len: u32,
181    /// The list of indices into the `things` slice
182    /// Since the first element is always at things[0], the first element of the indices array is for the *second* element
183    indices: &'a [u8],
184    /// The contiguous list of `T::VarULE`s
185    things: &'a [u8],
186    marker: PhantomData<(&'a T, F)>,
187}
188
189// #[derive()] won't work here since we do not want it to be
190// bound on T: Copy
191impl<'a, T: ?Sized, F> Copy for VarZeroVecComponents<'a, T, F> {}
192impl<'a, T: ?Sized, F> Clone for VarZeroVecComponents<'a, T, F> {
193    fn clone(&self) -> Self {
194        *self
195    }
196}
197
198impl<'a, T: VarULE + ?Sized, F> Default for VarZeroVecComponents<'a, T, F> {
199    #[inline]
200    fn default() -> Self {
201        Self::new()
202    }
203}
204
205impl<'a, T: VarULE + ?Sized, F> VarZeroVecComponents<'a, T, F> {
206    #[inline]
207    pub fn new() -> Self {
208        Self {
209            len: 0,
210            indices: &[],
211            things: &[],
212            marker: PhantomData,
213        }
214    }
215}
216impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> {
217    /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size:
218    ///
219    /// - There must be either zero or at least four bytes (if four, this is the "length" parsed as a usize)
220    /// - There must be at least `4*(length - 1) + 4` bytes total, to form the array `indices` of indices
221    /// - `0..indices[0]` must index into a valid section of
222    ///   `things` (the data after `indices`), such that it parses to a `T::VarULE`
223    /// - `indices[i - 1]..indices[i]` must index into a valid section of
224    ///   `things` (the data after `indices`), such that it parses to a `T::VarULE`
225    /// - `indices[len - 2]..things.len()` must index into a valid section of
226    ///   `things`, such that it parses to a `T::VarULE`
227    #[inline]
228    pub fn parse_bytes(slice: &'a [u8]) -> Result<Self, VarZeroVecFormatError> {
229        // The empty VZV is special-cased to the empty slice
230        if slice.is_empty() {
231            return Ok(VarZeroVecComponents {
232                len: 0,
233                indices: &[],
234                things: &[],
235                marker: PhantomData,
236            });
237        }
238        let len_bytes = slice
239            .get(0..F::Len::SIZE)
240            .ok_or(VarZeroVecFormatError::Metadata)?;
241        let len_ule =
242            F::Len::parse_bytes_to_slice(len_bytes).map_err(|_| VarZeroVecFormatError::Metadata)?;
243
244        let len = len_ule
245            .first()
246            .ok_or(VarZeroVecFormatError::Metadata)?
247            .iule_to_usize();
248
249        let rest = slice
250            .get(F::Len::SIZE..)
251            .ok_or(VarZeroVecFormatError::Metadata)?;
252        let len_u32 = u32::try_from(len).map_err(|_| VarZeroVecFormatError::Metadata);
253        // We pass down the rest of the invariants
254        Self::parse_bytes_with_length(len_u32?, rest)
255    }
256
257    /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size:
258    ///
259    /// - There must be at least `4*len` bytes total, to form the array `indices` of indices.
260    /// - `indices[i]..indices[i+1]` must index into a valid section of
261    ///   `things` (the data after `indices`), such that it parses to a `T::VarULE`
262    /// - `indices[len - 1]..things.len()` must index into a valid section of
263    ///   `things`, such that it parses to a `T::VarULE`
264    #[inline]
265    pub fn parse_bytes_with_length(
266        len: u32,
267        slice: &'a [u8],
268    ) -> Result<Self, VarZeroVecFormatError> {
269        let len_minus_one = len.checked_sub(1);
270        // The empty VZV is special-cased to the empty slice
271        let Some(len_minus_one) = len_minus_one else {
272            return Ok(VarZeroVecComponents {
273                len: 0,
274                indices: &[],
275                things: &[],
276                marker: PhantomData,
277            });
278        };
279        // The indices array is one element shorter since the first index is always 0,
280        // so we use len_minus_one
281        let indices_bytes = slice
282            .get(..F::Index::SIZE * (len_minus_one as usize))
283            .ok_or(VarZeroVecFormatError::Metadata)?;
284        let things = slice
285            .get(F::Index::SIZE * (len_minus_one as usize)..)
286            .ok_or(VarZeroVecFormatError::Metadata)?;
287
288        let borrowed = VarZeroVecComponents {
289            len,
290            indices: indices_bytes,
291            things,
292            marker: PhantomData,
293        };
294
295        borrowed.check_indices_and_things()?;
296
297        Ok(borrowed)
298    }
299
300    /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously
301    /// successfully returned a [`VarZeroVecComponents`] when passed to
302    /// [`VarZeroVecComponents::parse_bytes()`]. Will return the same
303    /// object as one would get from calling [`VarZeroVecComponents::parse_bytes()`].
304    ///
305    /// # Safety
306    /// The bytes must have previously successfully run through
307    /// [`VarZeroVecComponents::parse_bytes()`]
308    pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self {
309        // The empty VZV is special-cased to the empty slice
310        if slice.is_empty() {
311            return VarZeroVecComponents {
312                len: 0,
313                indices: &[],
314                things: &[],
315                marker: PhantomData,
316            };
317        }
318        let (len_bytes, data_bytes) = unsafe { slice.split_at_unchecked(F::Len::SIZE) };
319        // Safety: F::Len allows all byte sequences
320        let len_ule = F::Len::slice_from_bytes_unchecked(len_bytes);
321
322        let len = len_ule.get_unchecked(0).iule_to_usize();
323        let len_u32 = len as u32;
324
325        // Safety: This method requires the bytes to have passed through `parse_bytes()`
326        // whereas we're calling something that asks for `parse_bytes_with_length()`.
327        // The two methods perform similar validation, with parse_bytes() validating an additional
328        // 4-byte `length` header.
329        Self::from_bytes_unchecked_with_length(len_u32, data_bytes)
330    }
331
332    /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously
333    /// successfully returned a [`VarZeroVecComponents`] when passed to
334    /// [`VarZeroVecComponents::parse_bytes()`]. Will return the same
335    /// object as one would get from calling [`VarZeroVecComponents::parse_bytes()`].
336    ///
337    /// # Safety
338    /// The len,bytes must have previously successfully run through
339    /// [`VarZeroVecComponents::parse_bytes_with_length()`]
340    pub unsafe fn from_bytes_unchecked_with_length(len: u32, slice: &'a [u8]) -> Self {
341        let len_minus_one = len.checked_sub(1);
342        // The empty VZV is special-cased to the empty slice
343        let Some(len_minus_one) = len_minus_one else {
344            return VarZeroVecComponents {
345                len: 0,
346                indices: &[],
347                things: &[],
348                marker: PhantomData,
349            };
350        };
351        // The indices array is one element shorter since the first index is always 0,
352        // so we use len_minus_one
353        let indices_bytes = slice.get_unchecked(..F::Index::SIZE * (len_minus_one as usize));
354        let things = slice.get_unchecked(F::Index::SIZE * (len_minus_one as usize)..);
355
356        VarZeroVecComponents {
357            len,
358            indices: indices_bytes,
359            things,
360            marker: PhantomData,
361        }
362    }
363
364    /// Get the number of elements in this vector
365    #[inline]
366    pub fn len(self) -> usize {
367        self.len as usize
368    }
369
370    /// Returns `true` if the vector contains no elements.
371    #[inline]
372    pub fn is_empty(self) -> bool {
373        self.len == 0
374    }
375
376    /// Get the idx'th element out of this slice. Returns `None` if out of bounds.
377    #[inline]
378    pub fn get(self, idx: usize) -> Option<&'a T> {
379        if idx >= self.len() {
380            return None;
381        }
382        Some(unsafe { self.get_unchecked(idx) })
383    }
384
385    /// Get the idx'th element out of this slice. Does not bounds check.
386    ///
387    /// Safety:
388    /// - `idx` must be in bounds (`idx < self.len()`)
389    #[inline]
390    pub(crate) unsafe fn get_unchecked(self, idx: usize) -> &'a T {
391        let range = self.get_things_range(idx);
392        let things_slice = self.things.get_unchecked(range);
393        T::from_bytes_unchecked(things_slice)
394    }
395
396    /// Get the range in `things` for the element at `idx`. Does not bounds check.
397    ///
398    /// Safety:
399    /// - `idx` must be in bounds (`idx < self.len()`)
400    #[inline]
401    pub(crate) unsafe fn get_things_range(self, idx: usize) -> Range<usize> {
402        let start = if let Some(idx_minus_one) = idx.checked_sub(1) {
403            self.indices_slice()
404                .get_unchecked(idx_minus_one)
405                .iule_to_usize()
406        } else {
407            0
408        };
409        let end = if idx + 1 == self.len() {
410            self.things.len()
411        } else {
412            self.indices_slice().get_unchecked(idx).iule_to_usize()
413        };
414        debug_assert!(start <= end);
415        start..end
416    }
417
418    /// Get the size, in bytes, of the indices array
419    pub(crate) unsafe fn get_indices_size(self) -> usize {
420        self.indices.len()
421    }
422
423    /// Check the internal invariants of VarZeroVecComponents:
424    ///
425    /// - `indices[i]..indices[i+1]` must index into a valid section of
426    ///   `things`, such that it parses to a `T::VarULE`
427    /// - `indices[len - 1]..things.len()` must index into a valid section of
428    ///   `things`, such that it parses to a `T::VarULE`
429    /// - `indices` is monotonically increasing
430    ///
431    /// This method is NOT allowed to call any other methods on VarZeroVecComponents since all other methods
432    /// assume that the slice has been passed through check_indices_and_things
433    #[inline]
434    #[expect(clippy::len_zero)] // more explicit to enforce safety invariants
435    fn check_indices_and_things(self) -> Result<(), VarZeroVecFormatError> {
436        if self.len() == 0 {
437            if self.things.len() > 0 {
438                return Err(VarZeroVecFormatError::Metadata);
439            } else {
440                return Ok(());
441            }
442        }
443        let indices_slice = self.indices_slice();
444        assert_eq!(self.len(), indices_slice.len() + 1);
445        // Safety: i is in bounds (assertion above)
446        let mut start = 0;
447        for i in 0..self.len() {
448            // The indices array is offset by 1: indices[0] is the end of the first
449            // element and the start of the next, since the start of the first element
450            // is always things[0]. So to get the end we get element `i`.
451            let end = if let Some(end) = indices_slice.get(i) {
452                end.iule_to_usize()
453            } else {
454                // This only happens at i = self.len() - 1 = indices_slice.len() + 1 - 1
455                // = indices_slice.len(). This is the last `end`, which is always the size of
456                // `things` and thus never stored in the array
457                self.things.len()
458            };
459
460            if start > end {
461                return Err(VarZeroVecFormatError::Metadata);
462            }
463            if end > self.things.len() {
464                return Err(VarZeroVecFormatError::Metadata);
465            }
466            // Safety: start..end is a valid range in self.things
467            let bytes = unsafe { self.things.get_unchecked(start..end) };
468            T::parse_bytes(bytes).map_err(VarZeroVecFormatError::Values)?;
469            start = end;
470        }
471        Ok(())
472    }
473
474    /// Create an iterator over the Ts contained in VarZeroVecComponents
475    #[inline]
476    pub fn iter(self) -> VarZeroSliceIter<'a, T, F> {
477        VarZeroSliceIter::new(self)
478    }
479
480    #[cfg(feature = "alloc")]
481    pub fn to_vec(self) -> alloc::vec::Vec<alloc::boxed::Box<T>> {
482        self.iter().map(T::to_boxed).collect()
483    }
484
485    #[inline]
486    fn indices_slice(&self) -> &'a [F::Index] {
487        unsafe { F::Index::slice_from_bytes_unchecked(self.indices) }
488    }
489
490    // Dump a debuggable representation of this type
491    #[allow(unused)] // useful for debugging
492    #[cfg(feature = "alloc")]
493    pub(crate) fn dump(&self) -> alloc::string::String {
494        let indices = self
495            .indices_slice()
496            .iter()
497            .copied()
498            .map(IntegerULE::iule_to_usize)
499            .collect::<alloc::vec::Vec<_>>();
500        alloc::format!("VarZeroVecComponents {{ indices: {indices:?} }}")
501    }
502}
503
504/// An iterator over VarZeroSlice
505#[derive(Debug)]
506pub struct VarZeroSliceIter<'a, T: ?Sized, F = Index16> {
507    components: VarZeroVecComponents<'a, T, F>,
508    index: usize,
509    // Safety invariant: must be a valid index into the data segment of `components`, or an index at the end
510    // i.e. start_index <= components.things.len()
511    //
512    // It must be a valid index into the `things` array of components, coming from `components.indices_slice()`
513    start_index: usize,
514}
515
516impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSliceIter<'a, T, F> {
517    fn new(c: VarZeroVecComponents<'a, T, F>) -> Self {
518        Self {
519            components: c,
520            index: 0,
521            // Invariant upheld, 0 is always a valid index-or-end
522            start_index: 0,
523        }
524    }
525}
526impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> Iterator for VarZeroSliceIter<'a, T, F> {
527    type Item = &'a T;
528
529    fn next(&mut self) -> Option<Self::Item> {
530        // Note: the indices array doesn't contain 0 or len, we need to specially handle those edges. The 0 is handled
531        // by start_index, and the len is handled by the code for `end`.
532
533        if self.index >= self.components.len() {
534            return None;
535        }
536
537        // Invariant established: self.index is in bounds for self.components.len(),
538        // which means it is in bounds for self.components.indices_slice() since that has the same length
539
540        let end = if self.index + 1 == self.components.len() {
541            // We don't store the end index since it is computable, so the last element should use self.components.things.len()
542            self.components.things.len()
543        } else {
544            // Safety: self.index was known to be in bounds from the bounds check above.
545            unsafe {
546                self.components
547                    .indices_slice()
548                    .get_unchecked(self.index)
549                    .iule_to_usize()
550            }
551        };
552        // Invariant established: end has the same invariant as self.start_index since it comes from indices_slice, which is guaranteed
553        // to only contain valid indexes
554
555        let item = unsafe {
556            // Safety: self.start_index and end both have in-range invariants, plus they are valid indices from indices_slice
557            // which means we can treat this data as a T
558            T::from_bytes_unchecked(self.components.things.get_unchecked(self.start_index..end))
559        };
560        self.index += 1;
561        // Invariant upheld: end has the same invariant as self.start_index
562        self.start_index = end;
563        Some(item)
564    }
565
566    fn size_hint(&self) -> (usize, Option<usize>) {
567        let remainder = self.components.len() - self.index;
568        (remainder, Some(remainder))
569    }
570}
571
572impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> ExactSizeIterator for VarZeroSliceIter<'a, T, F> {
573    fn len(&self) -> usize {
574        self.components.len() - self.index
575    }
576}
577
578impl<'a, T, F> VarZeroVecComponents<'a, T, F>
579where
580    T: VarULE,
581    T: ?Sized,
582    T: Ord,
583    F: VarZeroVecFormat,
584{
585    /// Binary searches a sorted `VarZeroVecComponents<T>` for the given element. For more information, see
586    /// the primitive function [`binary_search`](slice::binary_search).
587    pub fn binary_search(&self, needle: &T) -> Result<usize, usize> {
588        self.binary_search_by(|probe| probe.cmp(needle))
589    }
590
591    pub fn binary_search_in_range(
592        &self,
593        needle: &T,
594        range: Range<usize>,
595    ) -> Option<Result<usize, usize>> {
596        self.binary_search_in_range_by(|probe| probe.cmp(needle), range)
597    }
598}
599
600impl<'a, T, F> VarZeroVecComponents<'a, T, F>
601where
602    T: VarULE,
603    T: ?Sized,
604    F: VarZeroVecFormat,
605{
606    /// Binary searches a sorted `VarZeroVecComponents<T>` for the given predicate. For more information, see
607    /// the primitive function [`binary_search_by`](slice::binary_search_by).
608    pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
609        // Safety: 0 and len are in range
610        unsafe { self.binary_search_in_range_unchecked(predicate, 0..self.len()) }
611    }
612
613    // Binary search within a range.
614    // Values returned are relative to the range start!
615    pub fn binary_search_in_range_by(
616        &self,
617        predicate: impl FnMut(&T) -> Ordering,
618        range: Range<usize>,
619    ) -> Option<Result<usize, usize>> {
620        if range.end > self.len() {
621            return None;
622        }
623        if range.end < range.start {
624            return None;
625        }
626        // Safety: We bounds checked above: end is in-bounds or len, and start is <= end
627        let range_absolute =
628            unsafe { self.binary_search_in_range_unchecked(predicate, range.clone()) };
629        // The values returned are relative to the range start
630        Some(
631            range_absolute
632                .map(|o| o - range.start)
633                .map_err(|e| e - range.start),
634        )
635    }
636
637    /// Safety: range must be in range for the slice (start <= len, end <= len, start <= end)
638    unsafe fn binary_search_in_range_unchecked(
639        &self,
640        mut predicate: impl FnMut(&T) -> Ordering,
641        range: Range<usize>,
642    ) -> Result<usize, usize> {
643        // Function invariant: size is always end - start
644        let mut start = range.start;
645        let mut end = range.end;
646        let mut size;
647
648        // Loop invariant: 0 <= start < end <= len
649        // This invariant is initialized by the function safety invariants and the loop condition
650        while start < end {
651            size = end - start;
652            // This establishes mid < end (which implies mid < len)
653            // size is end - start. start + size is end (which is <= len).
654            // mid = start + size/2 will be less than end
655            let mid = start + size / 2;
656
657            // Safety: mid is < end <= len, so in-range
658            let cmp = predicate(self.get_unchecked(mid));
659
660            match cmp {
661                Ordering::Less => {
662                    // This retains the loop invariant since it
663                    // increments start, and we already have 0 <= start
664                    // start < end is enforced by the loop condition
665                    start = mid + 1;
666                }
667                Ordering::Greater => {
668                    // mid < end, so this decreases end.
669                    // This means end <= len is still true, and
670                    // end > start is enforced by the loop condition
671                    end = mid;
672                }
673                Ordering::Equal => return Ok(mid),
674            }
675        }
676        Err(start)
677    }
678}
679
680/// Collects the bytes for a VarZeroSlice into a Vec.
681#[cfg(feature = "alloc")]
682pub fn get_serializable_bytes_non_empty<T, A, F>(elements: &[A]) -> Option<alloc::vec::Vec<u8>>
683where
684    T: VarULE + ?Sized,
685    A: EncodeAsVarULE<T>,
686    F: VarZeroVecFormat,
687{
688    debug_assert!(!elements.is_empty());
689    let len = compute_serializable_len::<T, A, F>(elements)?;
690    debug_assert!(
691        len >= F::Len::SIZE as u32,
692        "Must have at least F::Len::SIZE bytes to hold the length of the vector"
693    );
694    let mut output = alloc::vec![0u8; len as usize];
695    write_serializable_bytes::<T, A, F>(elements, &mut output);
696    Some(output)
697}
698
699/// Writes the bytes for a VarZeroLengthlessSlice into an output buffer.
700/// Usable for a VarZeroSlice if you first write the length bytes.
701///
702/// Every byte in the buffer will be initialized after calling this function.
703///
704/// # Panics
705///
706/// Panics if the buffer is not exactly the correct length.
707pub fn write_serializable_bytes_without_length<T, A, F>(elements: &[A], output: &mut [u8])
708where
709    T: VarULE + ?Sized,
710    A: EncodeAsVarULE<T>,
711    F: VarZeroVecFormat,
712{
713    assert!(elements.len() <= F::Len::MAX_VALUE as usize);
714    if elements.is_empty() {
715        return;
716    }
717
718    // idx_offset = offset from the start of the buffer for the next index
719    let mut idx_offset: usize = 0;
720    // first_dat_offset = offset from the start of the buffer of the first data block
721    let first_dat_offset: usize = idx_offset + (elements.len() - 1) * F::Index::SIZE;
722    // dat_offset = offset from the start of the buffer of the next data block
723    let mut dat_offset: usize = first_dat_offset;
724
725    for (i, element) in elements.iter().enumerate() {
726        let element_len = element.encode_var_ule_len();
727
728        // The first index is always 0. We don't write it, or update the idx offset.
729        if i != 0 {
730            let idx_limit = idx_offset + F::Index::SIZE;
731            #[expect(clippy::indexing_slicing)] // Function contract allows panicky behavior
732            let idx_slice = &mut output[idx_offset..idx_limit];
733            // VZV expects data offsets to be stored relative to the first data block
734            let idx = dat_offset - first_dat_offset;
735            assert!(idx <= F::Index::MAX_VALUE as usize);
736            #[expect(clippy::expect_used)] // this function is explicitly panicky
737            let bytes_to_write = F::Index::iule_from_usize(idx).expect(F::Index::TOO_LARGE_ERROR);
738            idx_slice.copy_from_slice(ULE::slice_as_bytes(&[bytes_to_write]));
739
740            idx_offset = idx_limit;
741        }
742
743        let dat_limit = dat_offset + element_len;
744        #[expect(clippy::indexing_slicing)] // Function contract allows panicky behavior
745        let dat_slice = &mut output[dat_offset..dat_limit];
746        element.encode_var_ule_write(dat_slice);
747        debug_assert_eq!(T::validate_bytes(dat_slice), Ok(()));
748        dat_offset = dat_limit;
749    }
750
751    debug_assert_eq!(idx_offset, F::Index::SIZE * (elements.len() - 1));
752    assert_eq!(dat_offset, output.len());
753}
754
755/// Writes the bytes for a VarZeroSlice into an output buffer.
756///
757/// Every byte in the buffer will be initialized after calling this function.
758///
759/// # Panics
760///
761/// Panics if the buffer is not exactly the correct length.
762pub fn write_serializable_bytes<T, A, F>(elements: &[A], output: &mut [u8])
763where
764    T: VarULE + ?Sized,
765    A: EncodeAsVarULE<T>,
766    F: VarZeroVecFormat,
767{
768    if elements.is_empty() {
769        return;
770    }
771    assert!(elements.len() <= F::Len::MAX_VALUE as usize);
772    #[expect(clippy::expect_used)] // This function is explicitly panicky
773    let num_elements_ule = F::Len::iule_from_usize(elements.len()).expect(F::Len::TOO_LARGE_ERROR);
774    #[expect(clippy::indexing_slicing)] // Function contract allows panicky behavior
775    output[0..F::Len::SIZE].copy_from_slice(ULE::slice_as_bytes(&[num_elements_ule]));
776
777    #[expect(clippy::indexing_slicing)] // Function contract allows panicky behavior
778    write_serializable_bytes_without_length::<T, A, F>(elements, &mut output[F::Len::SIZE..]);
779}
780
781pub fn compute_serializable_len_without_length<T, A, F>(elements: &[A]) -> Option<u32>
782where
783    T: VarULE + ?Sized,
784    A: EncodeAsVarULE<T>,
785    F: VarZeroVecFormat,
786{
787    let elements_len = elements.len();
788    let Some(elements_len_minus_one) = elements_len.checked_sub(1) else {
789        // Empty vec is optimized to an empty byte representation
790        return Some(0);
791    };
792    let idx_len: u32 = u32::try_from(elements_len_minus_one)
793        .ok()?
794        .checked_mul(F::Index::SIZE as u32)?;
795    let data_len: u32 = elements
796        .iter()
797        .map(|v| u32::try_from(v.encode_var_ule_len()).ok())
798        .try_fold(0u32, |s, v| s.checked_add(v?))?;
799    let ret = idx_len.checked_add(data_len);
800    if let Some(r) = ret {
801        if r >= F::Index::MAX_VALUE {
802            return None;
803        }
804    }
805    ret
806}
807
808pub fn compute_serializable_len<T, A, F>(elements: &[A]) -> Option<u32>
809where
810    T: VarULE + ?Sized,
811    A: EncodeAsVarULE<T>,
812    F: VarZeroVecFormat,
813{
814    compute_serializable_len_without_length::<T, A, F>(elements).map(|x| x + F::Len::SIZE as u32)
815}