Skip to main content

zerovec/varzerovec/
slice.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::components::{VarZeroSliceIter, VarZeroVecComponents};
6use super::vec::VarZeroVecInner;
7use super::*;
8use crate::ule::*;
9use core::cmp::{Ord, Ordering, PartialOrd};
10use core::fmt;
11use core::marker::PhantomData;
12
13use core::ops::Index;
14use core::ops::Range;
15
16#[cfg(feature = "alloc")]
17use alloc::{boxed::Box, vec::Vec};
18
19/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]`
20/// where `T` is not `Sized`.
21///
22/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain
23/// owned data and as such is ideal for deserialization since most human readable
24/// serialization formats cannot unconditionally deserialize zero-copy.
25///
26/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
27/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
28/// using `VarZeroVec<ZeroSlice<T>>`.
29///
30/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
31/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
32///
33/// This type can be nested within itself to allow for multi-level nested `Vec`s.
34///
35/// # Examples
36///
37/// ## Nested Slices
38///
39/// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>`
40///
41/// ```rust
42/// use zerovec::{VarZeroSlice, VarZeroVec};
43/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"];
44/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"];
45/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"];
46/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"];
47/// let strings_12 = vec![&*strings_1, &*strings_2];
48/// let strings_34 = vec![&*strings_3, &*strings_4];
49/// let all_strings = vec![strings_12, strings_34];
50///
51/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1);
52/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2);
53/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3);
54/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4);
55/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]);
56/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]);
57/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]);
58///
59/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all
60///     .iter()
61///     .map(|v: &VarZeroSlice<VarZeroSlice<str>>| {
62///         v.iter()
63///             .map(|x: &VarZeroSlice<_>| {
64///                 x.as_varzerovec()
65///                     .iter()
66///                     .map(|s| s.to_owned())
67///                     .collect::<Vec<String>>()
68///             })
69///             .collect::<Vec<_>>()
70///     })
71///     .collect::<Vec<_>>();
72/// assert_eq!(reconstructed, all_strings);
73///
74/// let bytes = vzv_all.as_bytes();
75/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> =
76///     VarZeroVec::parse_bytes(bytes).unwrap();
77/// assert_eq!(vzv_from_bytes, vzv_all);
78/// ```
79///
80/// ## Iterate over Windows
81///
82/// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like
83/// [`core::slice::Windows`], this behavior can be easily modeled using an iterator:
84///
85/// ```
86/// use zerovec::VarZeroVec;
87///
88/// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]);
89/// # let mut pairs: Vec<(&str, &str)> = Vec::new();
90///
91/// let mut it = vzv.iter().peekable();
92/// while let (Some(x), Some(y)) = (it.next(), it.peek()) {
93///     // Evaluate (x, y) here.
94/// #   pairs.push((x, y));
95/// }
96/// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]);
97/// ```
98//
99// safety invariant: The slice MUST be one which parses to
100// a valid VarZeroVecComponents<T>
101#[repr(transparent)]
102pub struct VarZeroSlice<T: ?Sized, F = Index16> {
103    marker: PhantomData<(F, T)>,
104    /// The original slice this was constructed from
105    entire_slice: [u8],
106}
107
108impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
109    /// Construct a new empty [`VarZeroSlice`]
110    pub const fn new_empty() -> &'static Self {
111        // The empty VZV is special-cased to the empty slice
112        unsafe { &*(&[] as *const [u8] as *const Self) }
113    }
114
115    /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
116    #[inline]
117    pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> {
118        unsafe {
119            // safety: VarZeroSlice is guaranteed to parse here
120            VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice)
121        }
122    }
123
124    /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification.
125    ///
126    /// # Safety
127    ///
128    /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
129    pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
130        // self is really just a wrapper around a byte slice
131        &*(bytes as *const [u8] as *const Self)
132    }
133
134    /// Get the number of elements in this slice
135    ///
136    /// # Example
137    ///
138    /// ```rust
139    /// # use zerovec::VarZeroVec;
140    ///
141    /// let strings = vec!["foo", "bar", "baz", "quux"];
142    /// let vec = VarZeroVec::<str>::from(&strings);
143    ///
144    /// assert_eq!(vec.len(), 4);
145    /// ```
146    pub fn len(&self) -> usize {
147        self.as_components().len()
148    }
149
150    /// Returns `true` if the slice contains no elements.
151    ///
152    /// # Examples
153    ///
154    /// ```
155    /// # use zerovec::VarZeroVec;
156    ///
157    /// let strings: Vec<String> = vec![];
158    /// let vec = VarZeroVec::<str>::from(&strings);
159    ///
160    /// assert!(vec.is_empty());
161    /// ```
162    pub fn is_empty(&self) -> bool {
163        self.as_components().is_empty()
164    }
165
166    /// Obtain an iterator over this slice's elements
167    ///
168    /// # Example
169    ///
170    /// ```rust
171    /// # use zerovec::VarZeroVec;
172    ///
173    /// let strings = vec!["foo", "bar", "baz", "quux"];
174    /// let vec = VarZeroVec::<str>::from(&strings);
175    ///
176    /// let mut iter_results: Vec<&str> = vec.iter().collect();
177    /// assert_eq!(iter_results[0], "foo");
178    /// assert_eq!(iter_results[1], "bar");
179    /// assert_eq!(iter_results[2], "baz");
180    /// assert_eq!(iter_results[3], "quux");
181    /// ```
182    pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> {
183        self.as_components().iter()
184    }
185
186    /// Get one of this slice's elements, returning `None` if the index is out of bounds
187    ///
188    /// # Example
189    ///
190    /// ```rust
191    /// # use zerovec::VarZeroVec;
192    ///
193    /// let strings = vec!["foo", "bar", "baz", "quux"];
194    /// let vec = VarZeroVec::<str>::from(&strings);
195    ///
196    /// let mut iter_results: Vec<&str> = vec.iter().collect();
197    /// assert_eq!(vec.get(0), Some("foo"));
198    /// assert_eq!(vec.get(1), Some("bar"));
199    /// assert_eq!(vec.get(2), Some("baz"));
200    /// assert_eq!(vec.get(3), Some("quux"));
201    /// assert_eq!(vec.get(4), None);
202    /// ```
203    pub fn get(&self, idx: usize) -> Option<&T> {
204        self.as_components().get(idx)
205    }
206
207    /// Get one of this slice's elements
208    ///
209    /// # Safety
210    ///
211    /// `index` must be in range
212    ///
213    /// # Example
214    ///
215    /// ```rust
216    /// # use zerovec::VarZeroVec;
217    ///
218    /// let strings = vec!["foo", "bar", "baz", "quux"];
219    /// let vec = VarZeroVec::<str>::from(&strings);
220    ///
221    /// let mut iter_results: Vec<&str> = vec.iter().collect();
222    /// unsafe {
223    ///     assert_eq!(vec.get_unchecked(0), "foo");
224    ///     assert_eq!(vec.get_unchecked(1), "bar");
225    ///     assert_eq!(vec.get_unchecked(2), "baz");
226    ///     assert_eq!(vec.get_unchecked(3), "quux");
227    /// }
228    /// ```
229    pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
230        self.as_components().get_unchecked(idx)
231    }
232
233    /// Obtain an owned `Vec<Box<T>>` out of this
234    ///
235    /// ✨ *Enabled with the `alloc` Cargo feature.*
236    #[cfg(feature = "alloc")]
237    pub fn to_vec(&self) -> Vec<Box<T>> {
238        self.as_components().to_vec()
239    }
240
241    /// Get a reference to the entire encoded backing buffer of this slice
242    ///
243    /// The bytes can be passed back to [`Self::parse_bytes()`].
244    ///
245    /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`].
246    ///
247    /// # Example
248    ///
249    /// ```rust
250    /// # use zerovec::VarZeroVec;
251    ///
252    /// let strings = vec!["foo", "bar", "baz"];
253    /// let vzv = VarZeroVec::<str>::from(&strings);
254    ///
255    /// assert_eq!(vzv, VarZeroVec::parse_bytes(vzv.as_bytes()).unwrap());
256    /// ```
257    #[inline]
258    pub const fn as_bytes(&self) -> &[u8] {
259        &self.entire_slice
260    }
261
262    /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`]
263    ///
264    /// If you wish to repeatedly call methods on this [`VarZeroSlice`],
265    /// it is more efficient to perform this conversion first
266    pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
267        VarZeroVec(VarZeroVecInner::Borrowed(self))
268    }
269
270    /// Parse a [`VarZeroSlice`] from a slice of the appropriate format
271    ///
272    /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]
273    pub fn parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError> {
274        <Self as VarULE>::parse_bytes(slice)
275    }
276}
277
278impl<T, F> VarZeroSlice<T, F>
279where
280    T: VarULE,
281    T: ?Sized,
282    T: Ord,
283    F: VarZeroVecFormat,
284{
285    /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see
286    /// the standard library function [`binary_search`].
287    ///
288    /// # Example
289    ///
290    /// ```
291    /// # use zerovec::VarZeroVec;
292    ///
293    /// let strings = vec!["a", "b", "f", "g"];
294    /// let vec = VarZeroVec::<str>::from(&strings);
295    ///
296    /// assert_eq!(vec.binary_search("f"), Ok(2));
297    /// assert_eq!(vec.binary_search("e"), Err(2));
298    /// ```
299    ///
300    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
301    #[inline]
302    pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
303        self.as_components().binary_search(x)
304    }
305
306    /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range.
307    ///
308    /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
309    /// to the behavior of the standard library function [`binary_search`].
310    ///
311    /// The index is returned relative to the start of the range.
312    ///
313    /// # Example
314    ///
315    /// ```
316    /// # use zerovec::VarZeroVec;
317    /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
318    /// let vec = VarZeroVec::<str>::from(&strings);
319    ///
320    /// // Same behavior as binary_search when the range covers the whole slice:
321    /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3)));
322    /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4)));
323    ///
324    /// // Will not look outside of the range:
325    /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1)));
326    /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0)));
327    ///
328    /// // Will return indices relative to the start of the range:
329    /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2)));
330    /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3)));
331    ///
332    /// // Will return `None` if the range is out of bounds:
333    /// assert_eq!(vec.binary_search_in_range("x", 100..200), None);
334    /// assert_eq!(vec.binary_search_in_range("x", 0..200), None);
335    /// ```
336    ///
337    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
338    #[inline]
339    pub fn binary_search_in_range(
340        &self,
341        x: &T,
342        range: Range<usize>,
343    ) -> Option<Result<usize, usize>> {
344        self.as_components().binary_search_in_range(x, range)
345    }
346}
347
348impl<T, F> VarZeroSlice<T, F>
349where
350    T: VarULE,
351    T: ?Sized,
352    F: VarZeroVecFormat,
353{
354    /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see
355    /// the standard library function [`binary_search_by`].
356    ///
357    /// # Example
358    ///
359    /// ```
360    /// # use zerovec::VarZeroVec;
361    /// let strings = vec!["a", "b", "f", "g"];
362    /// let vec = VarZeroVec::<str>::from(&strings);
363    ///
364    /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2));
365    /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2));
366    /// ```
367    ///
368    /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
369    #[inline]
370    pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
371        self.as_components().binary_search_by(predicate)
372    }
373
374    /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range.
375    ///
376    /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
377    /// to the behavior of the standard library function [`binary_search`].
378    ///
379    /// The index is returned relative to the start of the range.
380    ///
381    /// # Example
382    ///
383    /// ```
384    /// # use zerovec::VarZeroVec;
385    /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
386    /// let vec = VarZeroVec::<str>::from(&strings);
387    ///
388    /// // Same behavior as binary_search when the range covers the whole slice:
389    /// assert_eq!(
390    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7),
391    ///     Some(Ok(3))
392    /// );
393    /// assert_eq!(
394    ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7),
395    ///     Some(Err(4))
396    /// );
397    ///
398    /// // Will not look outside of the range:
399    /// assert_eq!(
400    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1),
401    ///     Some(Err(1))
402    /// );
403    /// assert_eq!(
404    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7),
405    ///     Some(Err(0))
406    /// );
407    ///
408    /// // Will return indices relative to the start of the range:
409    /// assert_eq!(
410    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6),
411    ///     Some(Ok(2))
412    /// );
413    /// assert_eq!(
414    ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6),
415    ///     Some(Err(3))
416    /// );
417    ///
418    /// // Will return `None` if the range is out of bounds:
419    /// assert_eq!(
420    ///     vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200),
421    ///     None
422    /// );
423    /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None);
424    /// ```
425    ///
426    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
427    pub fn binary_search_in_range_by(
428        &self,
429        predicate: impl FnMut(&T) -> Ordering,
430        range: Range<usize>,
431    ) -> Option<Result<usize, usize>> {
432        self.as_components()
433            .binary_search_in_range_by(predicate, range)
434    }
435}
436// Safety (based on the safety checklist on the VarULE trait):
437//  1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a
438//     `[u8]` slice which satisfies this invariant)
439//  2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a
440//     `[u8]` slice which satisfies this invariant)
441//  3. The impl of `validate_bytes()` returns an error if any byte is not valid.
442//  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety
443//  5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
444//  6. `as_bytes()` is equivalent to a regular transmute of the underlying data
445//  7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type)
446unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> {
447    fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
448        let _ = VarZeroVecComponents::<T, F>::parse_bytes(bytes)
449            .map_err(|_| UleError::parse::<Self>())?;
450        Ok(())
451    }
452
453    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
454        // self is really just a wrapper around a byte slice
455        &*(bytes as *const [u8] as *const Self)
456    }
457
458    fn as_bytes(&self) -> &[u8] {
459        &self.entire_slice
460    }
461}
462
463impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> {
464    type Output = T;
465    fn index(&self, index: usize) -> &Self::Output {
466        #[expect(clippy::panic)] // documented
467        match self.get(index) {
468            Some(x) => x,
469            None => {
    ::core::panicking::panic_fmt(format_args!("index out of bounds: the len is {0} but the index is {1}",
            self.len(), index));
}panic!(
470                "index out of bounds: the len is {} but the index is {index}",
471                self.len()
472            ),
473        }
474    }
475}
476
477impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F>
478where
479    T: VarULE,
480    T: ?Sized,
481    T: PartialEq,
482    F: VarZeroVecFormat,
483{
484    #[inline]
485    fn eq(&self, other: &VarZeroSlice<T, F>) -> bool {
486        // VarULE has an API guarantee that this is equivalent
487        // to `T::VarULE::eq()`
488        self.entire_slice.eq(&other.entire_slice)
489    }
490}
491
492impl<T, F> Eq for VarZeroSlice<T, F>
493where
494    T: VarULE,
495    T: ?Sized,
496    T: Eq,
497    F: VarZeroVecFormat,
498{
499}
500
501impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> {
502    #[inline]
503    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
504        self.iter().partial_cmp(other.iter())
505    }
506}
507
508impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> {
509    #[inline]
510    fn cmp(&self, other: &Self) -> Ordering {
511        self.iter().cmp(other.iter())
512    }
513}
514
515impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F>
516where
517    T: fmt::Debug,
518{
519    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
520        f.debug_list().entries(self.iter()).finish()
521    }
522}
523
524impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> {
525    fn as_ref(&self) -> &VarZeroSlice<T, F> {
526        self
527    }
528}