zerovec/varzerovec/
slice.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::components::{VarZeroSliceIter, VarZeroVecComponents};
6use super::vec::VarZeroVecInner;
7use super::*;
8use crate::ule::*;
9use core::cmp::{Ord, Ordering, PartialOrd};
10use core::fmt;
11use core::marker::PhantomData;
12use core::mem;
13
14use core::ops::Index;
15use core::ops::Range;
16
17/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]`
18/// where `T` is not `Sized`.
19///
20/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain
21/// owned data and as such is ideal for deserialization since most human readable
22/// serialization formats cannot unconditionally deserialize zero-copy.
23///
24/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
25/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
26/// using `VarZeroVec<ZeroSlice<T>>`.
27///
28/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
29/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
30///
31/// This type can be nested within itself to allow for multi-level nested `Vec`s.
32///
33/// # Examples
34///
35/// ## Nested Slices
36///
37/// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>`
38///
39/// ```rust
40/// use zerovec::{VarZeroSlice, VarZeroVec};
41/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"];
42/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"];
43/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"];
44/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"];
45/// let strings_12 = vec![&*strings_1, &*strings_2];
46/// let strings_34 = vec![&*strings_3, &*strings_4];
47/// let all_strings = vec![strings_12, strings_34];
48///
49/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1);
50/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2);
51/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3);
52/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4);
53/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]);
54/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]);
55/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]);
56///
57/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all
58///     .iter()
59///     .map(|v: &VarZeroSlice<VarZeroSlice<str>>| {
60///         v.iter()
61///             .map(|x: &VarZeroSlice<_>| {
62///                 x.as_varzerovec()
63///                     .iter()
64///                     .map(|s| s.to_owned())
65///                     .collect::<Vec<String>>()
66///             })
67///             .collect::<Vec<_>>()
68///     })
69///     .collect::<Vec<_>>();
70/// assert_eq!(reconstructed, all_strings);
71///
72/// let bytes = vzv_all.as_bytes();
73/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> =
74///     VarZeroVec::parse_bytes(bytes).unwrap();
75/// assert_eq!(vzv_from_bytes, vzv_all);
76/// ```
77///
78/// ## Iterate over Windows
79///
80/// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like
81/// [core::slice::Windows], this behavior can be easily modeled using an iterator:
82///
83/// ```
84/// use zerovec::VarZeroVec;
85///
86/// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]);
87/// # let mut pairs: Vec<(&str, &str)> = Vec::new();
88///
89/// let mut it = vzv.iter().peekable();
90/// while let (Some(x), Some(y)) = (it.next(), it.peek()) {
91///     // Evaluate (x, y) here.
92/// #   pairs.push((x, y));
93/// }
94/// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]);
95/// ```
96//
97// safety invariant: The slice MUST be one which parses to
98// a valid VarZeroVecComponents<T>
99#[repr(transparent)]
100pub struct VarZeroSlice<T: ?Sized, F = Index16> {
101    marker: PhantomData<(F, T)>,
102    /// The original slice this was constructed from
103    entire_slice: [u8],
104}
105
106impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
107    /// Construct a new empty VarZeroSlice
108    pub const fn new_empty() -> &'static Self {
109        // The empty VZV is special-cased to the empty slice
110        unsafe { mem::transmute(&[] as &[u8]) }
111    }
112
113    /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
114    #[inline]
115    pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> {
116        unsafe {
117            // safety: VarZeroSlice is guaranteed to parse here
118            VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice)
119        }
120    }
121
122    /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification.
123    ///
124    /// # Safety
125    ///
126    /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
127    pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
128        // self is really just a wrapper around a byte slice
129        mem::transmute(bytes)
130    }
131
132    /// Get the number of elements in this slice
133    ///
134    /// # Example
135    ///
136    /// ```rust
137    /// # use zerovec::VarZeroVec;
138    ///
139    /// let strings = vec!["foo", "bar", "baz", "quux"];
140    /// let vec = VarZeroVec::<str>::from(&strings);
141    ///
142    /// assert_eq!(vec.len(), 4);
143    /// ```
144    pub fn len(&self) -> usize {
145        self.as_components().len()
146    }
147
148    /// Returns `true` if the slice contains no elements.
149    ///
150    /// # Examples
151    ///
152    /// ```
153    /// # use zerovec::VarZeroVec;
154    ///
155    /// let strings: Vec<String> = vec![];
156    /// let vec = VarZeroVec::<str>::from(&strings);
157    ///
158    /// assert!(vec.is_empty());
159    /// ```
160    pub fn is_empty(&self) -> bool {
161        self.as_components().is_empty()
162    }
163
164    /// Obtain an iterator over this slice's elements
165    ///
166    /// # Example
167    ///
168    /// ```rust
169    /// # use zerovec::VarZeroVec;
170    ///
171    /// let strings = vec!["foo", "bar", "baz", "quux"];
172    /// let vec = VarZeroVec::<str>::from(&strings);
173    ///
174    /// let mut iter_results: Vec<&str> = vec.iter().collect();
175    /// assert_eq!(iter_results[0], "foo");
176    /// assert_eq!(iter_results[1], "bar");
177    /// assert_eq!(iter_results[2], "baz");
178    /// assert_eq!(iter_results[3], "quux");
179    /// ```
180    pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> {
181        self.as_components().iter()
182    }
183
184    /// Get one of this slice's elements, returning `None` if the index is out of bounds
185    ///
186    /// # Example
187    ///
188    /// ```rust
189    /// # use zerovec::VarZeroVec;
190    ///
191    /// let strings = vec!["foo", "bar", "baz", "quux"];
192    /// let vec = VarZeroVec::<str>::from(&strings);
193    ///
194    /// let mut iter_results: Vec<&str> = vec.iter().collect();
195    /// assert_eq!(vec.get(0), Some("foo"));
196    /// assert_eq!(vec.get(1), Some("bar"));
197    /// assert_eq!(vec.get(2), Some("baz"));
198    /// assert_eq!(vec.get(3), Some("quux"));
199    /// assert_eq!(vec.get(4), None);
200    /// ```
201    pub fn get(&self, idx: usize) -> Option<&T> {
202        self.as_components().get(idx)
203    }
204
205    /// Get one of this slice's elements
206    ///
207    /// # Safety
208    ///
209    /// `index` must be in range
210    ///
211    /// # Example
212    ///
213    /// ```rust
214    /// # use zerovec::VarZeroVec;
215    ///
216    /// let strings = vec!["foo", "bar", "baz", "quux"];
217    /// let vec = VarZeroVec::<str>::from(&strings);
218    ///
219    /// let mut iter_results: Vec<&str> = vec.iter().collect();
220    /// unsafe {
221    ///     assert_eq!(vec.get_unchecked(0), "foo");
222    ///     assert_eq!(vec.get_unchecked(1), "bar");
223    ///     assert_eq!(vec.get_unchecked(2), "baz");
224    ///     assert_eq!(vec.get_unchecked(3), "quux");
225    /// }
226    /// ```
227    pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
228        self.as_components().get_unchecked(idx)
229    }
230
231    /// Obtain an owned `Vec<Box<T>>` out of this
232    ///
233    /// ✨ *Enabled with the `alloc` Cargo feature.*
234    #[cfg(feature = "alloc")]
235    pub fn to_vec(&self) -> alloc::vec::Vec<alloc::boxed::Box<T>> {
236        self.as_components().to_vec()
237    }
238
239    /// Get a reference to the entire encoded backing buffer of this slice
240    ///
241    /// The bytes can be passed back to [`Self::parse_bytes()`].
242    ///
243    /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`].
244    ///
245    /// # Example
246    ///
247    /// ```rust
248    /// # use zerovec::VarZeroVec;
249    ///
250    /// let strings = vec!["foo", "bar", "baz"];
251    /// let vzv = VarZeroVec::<str>::from(&strings);
252    ///
253    /// assert_eq!(vzv, VarZeroVec::parse_bytes(vzv.as_bytes()).unwrap());
254    /// ```
255    #[inline]
256    pub const fn as_bytes(&self) -> &[u8] {
257        &self.entire_slice
258    }
259
260    /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`]
261    ///
262    /// If you wish to repeatedly call methods on this [`VarZeroSlice`],
263    /// it is more efficient to perform this conversion first
264    pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
265        VarZeroVec(VarZeroVecInner::Borrowed(self))
266    }
267
268    /// Parse a VarZeroSlice from a slice of the appropriate format
269    ///
270    /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]
271    pub fn parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError> {
272        <Self as VarULE>::parse_bytes(slice)
273    }
274}
275
276impl<T, F> VarZeroSlice<T, F>
277where
278    T: VarULE,
279    T: ?Sized,
280    T: Ord,
281    F: VarZeroVecFormat,
282{
283    /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see
284    /// the standard library function [`binary_search`].
285    ///
286    /// # Example
287    ///
288    /// ```
289    /// # use zerovec::VarZeroVec;
290    ///
291    /// let strings = vec!["a", "b", "f", "g"];
292    /// let vec = VarZeroVec::<str>::from(&strings);
293    ///
294    /// assert_eq!(vec.binary_search("f"), Ok(2));
295    /// assert_eq!(vec.binary_search("e"), Err(2));
296    /// ```
297    ///
298    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
299    #[inline]
300    pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
301        self.as_components().binary_search(x)
302    }
303
304    /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range.
305    ///
306    /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
307    /// to the behavior of the standard library function [`binary_search`].
308    ///
309    /// The index is returned relative to the start of the range.
310    ///
311    /// # Example
312    ///
313    /// ```
314    /// # use zerovec::VarZeroVec;
315    /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
316    /// let vec = VarZeroVec::<str>::from(&strings);
317    ///
318    /// // Same behavior as binary_search when the range covers the whole slice:
319    /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3)));
320    /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4)));
321    ///
322    /// // Will not look outside of the range:
323    /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1)));
324    /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0)));
325    ///
326    /// // Will return indices relative to the start of the range:
327    /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2)));
328    /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3)));
329    ///
330    /// // Will return `None` if the range is out of bounds:
331    /// assert_eq!(vec.binary_search_in_range("x", 100..200), None);
332    /// assert_eq!(vec.binary_search_in_range("x", 0..200), None);
333    /// ```
334    ///
335    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
336    #[inline]
337    pub fn binary_search_in_range(
338        &self,
339        x: &T,
340        range: Range<usize>,
341    ) -> Option<Result<usize, usize>> {
342        self.as_components().binary_search_in_range(x, range)
343    }
344}
345
346impl<T, F> VarZeroSlice<T, F>
347where
348    T: VarULE,
349    T: ?Sized,
350    F: VarZeroVecFormat,
351{
352    /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see
353    /// the standard library function [`binary_search_by`].
354    ///
355    /// # Example
356    ///
357    /// ```
358    /// # use zerovec::VarZeroVec;
359    /// let strings = vec!["a", "b", "f", "g"];
360    /// let vec = VarZeroVec::<str>::from(&strings);
361    ///
362    /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2));
363    /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2));
364    /// ```
365    ///
366    /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
367    #[inline]
368    pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
369        self.as_components().binary_search_by(predicate)
370    }
371
372    /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range.
373    ///
374    /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
375    /// to the behavior of the standard library function [`binary_search`].
376    ///
377    /// The index is returned relative to the start of the range.
378    ///
379    /// # Example
380    ///
381    /// ```
382    /// # use zerovec::VarZeroVec;
383    /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
384    /// let vec = VarZeroVec::<str>::from(&strings);
385    ///
386    /// // Same behavior as binary_search when the range covers the whole slice:
387    /// assert_eq!(
388    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7),
389    ///     Some(Ok(3))
390    /// );
391    /// assert_eq!(
392    ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7),
393    ///     Some(Err(4))
394    /// );
395    ///
396    /// // Will not look outside of the range:
397    /// assert_eq!(
398    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1),
399    ///     Some(Err(1))
400    /// );
401    /// assert_eq!(
402    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7),
403    ///     Some(Err(0))
404    /// );
405    ///
406    /// // Will return indices relative to the start of the range:
407    /// assert_eq!(
408    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6),
409    ///     Some(Ok(2))
410    /// );
411    /// assert_eq!(
412    ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6),
413    ///     Some(Err(3))
414    /// );
415    ///
416    /// // Will return `None` if the range is out of bounds:
417    /// assert_eq!(
418    ///     vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200),
419    ///     None
420    /// );
421    /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None);
422    /// ```
423    ///
424    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
425    pub fn binary_search_in_range_by(
426        &self,
427        predicate: impl FnMut(&T) -> Ordering,
428        range: Range<usize>,
429    ) -> Option<Result<usize, usize>> {
430        self.as_components()
431            .binary_search_in_range_by(predicate, range)
432    }
433}
434// Safety (based on the safety checklist on the VarULE trait):
435//  1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a
436//     `[u8]` slice which satisfies this invariant)
437//  2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a
438//     `[u8]` slice which satisfies this invariant)
439//  3. The impl of `validate_bytes()` returns an error if any byte is not valid.
440//  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety
441//  5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
442//  6. `as_bytes()` is equivalent to a regular transmute of the underlying data
443//  7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type)
444unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> {
445    fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
446        let _: VarZeroVecComponents<T, F> =
447            VarZeroVecComponents::parse_bytes(bytes).map_err(|_| UleError::parse::<Self>())?;
448        Ok(())
449    }
450
451    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
452        // self is really just a wrapper around a byte slice
453        mem::transmute(bytes)
454    }
455
456    fn as_bytes(&self) -> &[u8] {
457        &self.entire_slice
458    }
459}
460
461impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> {
462    type Output = T;
463    fn index(&self, index: usize) -> &Self::Output {
464        #[expect(clippy::panic)] // documented
465        match self.get(index) {
466            Some(x) => x,
467            None => {
    ::core::panicking::panic_fmt(format_args!("index out of bounds: the len is {0} but the index is {1}",
            self.len(), index));
}panic!(
468                "index out of bounds: the len is {} but the index is {index}",
469                self.len()
470            ),
471        }
472    }
473}
474
475impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F>
476where
477    T: VarULE,
478    T: ?Sized,
479    T: PartialEq,
480    F: VarZeroVecFormat,
481{
482    #[inline]
483    fn eq(&self, other: &VarZeroSlice<T, F>) -> bool {
484        // VarULE has an API guarantee that this is equivalent
485        // to `T::VarULE::eq()`
486        self.entire_slice.eq(&other.entire_slice)
487    }
488}
489
490impl<T, F> Eq for VarZeroSlice<T, F>
491where
492    T: VarULE,
493    T: ?Sized,
494    T: Eq,
495    F: VarZeroVecFormat,
496{
497}
498
499impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> {
500    #[inline]
501    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
502        self.iter().partial_cmp(other.iter())
503    }
504}
505
506impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> {
507    #[inline]
508    fn cmp(&self, other: &Self) -> Ordering {
509        self.iter().cmp(other.iter())
510    }
511}
512
513impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F>
514where
515    T: fmt::Debug,
516{
517    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
518        f.debug_list().entries(self.iter()).finish()
519    }
520}
521
522impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> {
523    fn as_ref(&self) -> &VarZeroSlice<T, F> {
524        self
525    }
526}