Skip to main content

zerovec/varzerovec/
vec.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::ule::*;
6
7use core::cmp::{Ord, Ordering, PartialOrd};
8use core::fmt;
9use core::ops::Deref;
10
11use super::*;
12#[cfg(feature = "alloc")]
13use alloc::vec::Vec;
14
15/// A zero-copy, byte-aligned vector for variable-width types.
16///
17/// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is
18/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and
19/// where `T`'s data is variable-length (e.g. `String`)
20///
21/// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more
22/// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>`
23/// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate
24/// a dynamically-sized [`VarULE`] type and conversions to and from a custom type.
25///
26/// For example, here are some owned types and their zero-copy equivalents:
27///
28/// - `Vec<String>`: `VarZeroVec<'a, str>`
29/// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>`
30/// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>`
31/// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>`
32///
33/// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice).
34///
35/// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec).
36///
37/// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from
38/// owned data (and then mutated!) but can also borrow from some buffer.
39///
40/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
41/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
42///
43/// # Bytes and Equality
44///
45/// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait
46/// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format
47/// across major SemVer releases.
48///
49/// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use
50/// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a
51/// [`VarZeroVec<T>`] or vice-versa.
52///
53/// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still
54/// under consideration, with different options along the space-time spectrum. See
55/// [#1410](https://github.com/unicode-org/icu4x/issues/1410).
56///
57/// # Example
58///
59/// ```rust
60/// use zerovec::VarZeroVec;
61///
62/// // The little-endian bytes correspond to the list of strings.
63/// let strings = vec!["w", "ω", "文", "𑄃"];
64///
65/// #[derive(serde::Serialize, serde::Deserialize)]
66/// struct Data<'a> {
67///     #[serde(borrow)]
68///     strings: VarZeroVec<'a, str>,
69/// }
70///
71/// let data = Data {
72///     strings: VarZeroVec::from(&strings),
73/// };
74///
75/// let bincode_bytes =
76///     bincode::serialize(&data).expect("Serialization should be successful");
77///
78/// // Will deserialize without allocations
79/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
80///     .expect("Deserialization should be successful");
81///
82/// assert_eq!(deserialized.strings.get(2), Some("文"));
83/// assert_eq!(deserialized.strings, &*strings);
84/// ```
85///
86/// Here's another example with `ZeroSlice<T>` (similar to `[T]`):
87///
88/// ```rust
89/// use zerovec::VarZeroVec;
90/// use zerovec::ZeroSlice;
91///
92/// // The structured list correspond to the list of integers.
93/// let numbers: &[&[u32]] = &[
94///     &[12, 25, 38],
95///     &[39179, 100],
96///     &[42, 55555],
97///     &[12345, 54321, 9],
98/// ];
99///
100/// #[derive(serde::Serialize, serde::Deserialize)]
101/// struct Data<'a> {
102///     #[serde(borrow)]
103///     vecs: VarZeroVec<'a, ZeroSlice<u32>>,
104/// }
105///
106/// let data = Data {
107///     vecs: VarZeroVec::from(numbers),
108/// };
109///
110/// let bincode_bytes =
111///     bincode::serialize(&data).expect("Serialization should be successful");
112///
113/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
114///     .expect("Deserialization should be successful");
115///
116/// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25);
117/// assert_eq!(deserialized.vecs[1], *numbers[1]);
118/// ```
119///
120/// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`]
121/// for more information.
122///
123/// # How it Works
124///
125/// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will
126/// serialize to a specially formatted list of bytes. The format is:
127///
128/// -  2 bytes for `length` (interpreted as a little-endian u16)
129/// - `2 * (length - 1)` bytes of `indices` (interpreted as little-endian u16s)
130/// - Remaining bytes for actual `data`
131///
132/// The format is tweakable by setting the `F` parameter, by default it uses u16 indices and lengths but other
133/// `VarZeroVecFormat` types can set other sizes.
134///
135/// Each element in the `indices` array points to the ending index of its corresponding
136/// data part in the `data` list. The starting index can be calculated from the ending index
137/// of the next element (or 0 for the first element). The last ending index, not stored in the array, is
138/// the length of the `data` segment.
139///
140/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details.
141///
142/// [`ule`]: crate::ule
143pub struct VarZeroVec<'a, T: ?Sized, F = Index16>(pub(crate) VarZeroVecInner<'a, T, F>);
144
145pub(crate) enum VarZeroVecInner<'a, T: ?Sized, F = Index16> {
146    #[cfg(feature = "alloc")]
147    Owned(VarZeroVecOwned<T, F>),
148    Borrowed(&'a VarZeroSlice<T, F>),
149}
150
151impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> {
152    fn clone(&self) -> Self {
153        match self.0 {
154            #[cfg(feature = "alloc")]
155            VarZeroVecInner::Owned(ref o) => o.clone().into(),
156            VarZeroVecInner::Borrowed(b) => b.into(),
157        }
158    }
159}
160
161impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F>
162where
163    T: fmt::Debug,
164{
165    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
166        VarZeroSlice::fmt(self, f)
167    }
168}
169
170#[cfg(feature = "alloc")]
171impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> {
172    #[inline]
173    fn from(other: VarZeroVecOwned<T, F>) -> Self {
174        Self(VarZeroVecInner::Owned(other))
175    }
176}
177
178impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> {
179    fn from(other: &'a VarZeroSlice<T, F>) -> Self {
180        Self(VarZeroVecInner::Borrowed(other))
181    }
182}
183
184#[cfg(feature = "alloc")]
185impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>>
186    for VarZeroVecOwned<T, F>
187{
188    #[inline]
189    fn from(other: VarZeroVec<'a, T, F>) -> Self {
190        match other.0 {
191            VarZeroVecInner::Owned(o) => o,
192            VarZeroVecInner::Borrowed(b) => b.into(),
193        }
194    }
195}
196
197impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Default for VarZeroVec<'_, T, F> {
198    #[inline]
199    fn default() -> Self {
200        Self::new()
201    }
202}
203
204impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> {
205    type Target = VarZeroSlice<T, F>;
206    fn deref(&self) -> &VarZeroSlice<T, F> {
207        self.as_slice()
208    }
209}
210
211impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> {
212    /// Creates a new, empty `VarZeroVec<T>`.
213    ///
214    /// # Examples
215    ///
216    /// ```
217    /// use zerovec::VarZeroVec;
218    ///
219    /// let vzv: VarZeroVec<str> = VarZeroVec::new();
220    /// assert!(vzv.is_empty());
221    /// ```
222    #[inline]
223    pub const fn new() -> Self {
224        Self(VarZeroVecInner::Borrowed(VarZeroSlice::new_empty()))
225    }
226
227    /// Parse a [`VarZeroVec`] from a slice of the appropriate format
228    ///
229    /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`].
230    ///
231    /// # Example
232    ///
233    /// ```rust
234    /// # use zerovec::VarZeroVec;
235    ///
236    /// let strings = vec!["foo", "bar", "baz", "quux"];
237    /// let vec = VarZeroVec::<str>::from(&strings);
238    ///
239    /// assert_eq!(&vec[0], "foo");
240    /// assert_eq!(&vec[1], "bar");
241    /// assert_eq!(&vec[2], "baz");
242    /// assert_eq!(&vec[3], "quux");
243    /// ```
244    pub fn parse_bytes(slice: &'a [u8]) -> Result<Self, UleError> {
245        let borrowed = VarZeroSlice::<T, F>::parse_bytes(slice)?;
246
247        Ok(Self(VarZeroVecInner::Borrowed(borrowed)))
248    }
249
250    /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification.
251    ///
252    /// # Safety
253    ///
254    /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
255    pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
256        Self(VarZeroVecInner::Borrowed(core::mem::transmute::<
257            &[u8],
258            &VarZeroSlice<T, F>,
259        >(bytes)))
260    }
261
262    /// Convert this into a mutable vector of the owned `T` type, cloning if necessary.
263    ///
264    /// ✨ *Enabled with the `alloc` Cargo feature.*
265    ///
266    /// # Example
267    ///
268    /// ```rust,ignore
269    /// # use zerovec::VarZeroVec;
270    /// let strings = vec!["foo", "bar", "baz", "quux"];
271    /// let mut vec = VarZeroVec::<str>::from(&strings);
272    ///
273    /// assert_eq!(vec.len(), 4);
274    /// let mutvec = vec.make_mut();
275    /// mutvec.push("lorem ipsum".into());
276    /// mutvec[2] = "dolor sit".into();
277    /// assert_eq!(&vec[0], "foo");
278    /// assert_eq!(&vec[1], "bar");
279    /// assert_eq!(&vec[2], "dolor sit");
280    /// assert_eq!(&vec[3], "quux");
281    /// assert_eq!(&vec[4], "lorem ipsum");
282    /// ```
283    //
284    // This function is crate-public for now since we don't yet want to stabilize
285    // the internal implementation details
286    #[cfg(feature = "alloc")]
287    pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> {
288        match self.0 {
289            VarZeroVecInner::Owned(ref mut vec) => vec,
290            VarZeroVecInner::Borrowed(slice) => {
291                let new_self = VarZeroVecOwned::from_slice(slice);
292                *self = new_self.into();
293                // recursion is limited since we are guaranteed to hit the Owned branch
294                self.make_mut()
295            }
296        }
297    }
298
299    /// Converts a borrowed [`VarZeroVec`] to an owned [`VarZeroVec`]. No-op if already owned.
300    ///
301    /// ✨ *Enabled with the `alloc` Cargo feature.*
302    ///
303    /// # Example
304    ///
305    /// ```
306    /// # use zerovec::VarZeroVec;
307    ///
308    /// let strings = vec!["foo", "bar", "baz", "quux"];
309    /// let vec = VarZeroVec::<str>::from(&strings);
310    ///
311    /// assert_eq!(vec.len(), 4);
312    /// // has 'static lifetime
313    /// let owned = vec.into_owned();
314    /// ```
315    #[cfg(feature = "alloc")]
316    pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> {
317        self.make_mut();
318        match self.0 {
319            VarZeroVecInner::Owned(vec) => vec.into(),
320            _ => unreachable!(),
321        }
322    }
323
324    /// Obtain this [`VarZeroVec`] as a [`VarZeroSlice`]
325    pub fn as_slice(&self) -> &VarZeroSlice<T, F> {
326        match self.0 {
327            #[cfg(feature = "alloc")]
328            VarZeroVecInner::Owned(ref owned) => owned,
329            VarZeroVecInner::Borrowed(b) => b,
330        }
331    }
332
333    /// Takes the byte vector representing the encoded data of this [`VarZeroVec`]. If borrowed,
334    /// this function allocates a byte vector and copies the borrowed bytes into it.
335    ///
336    /// The bytes can be passed back to [`Self::parse_bytes()`].
337    ///
338    /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`].
339    ///
340    /// ✨ *Enabled with the `alloc` Cargo feature.*
341    ///
342    /// # Example
343    ///
344    /// ```rust
345    /// # use zerovec::VarZeroVec;
346    ///
347    /// let strings = vec!["foo", "bar", "baz"];
348    /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes();
349    ///
350    /// let mut borrowed: VarZeroVec<str> =
351    ///     VarZeroVec::parse_bytes(&bytes).unwrap();
352    /// assert_eq!(borrowed, &*strings);
353    /// ```
354    #[cfg(feature = "alloc")]
355    pub fn into_bytes(self) -> Vec<u8> {
356        match self.0 {
357            VarZeroVecInner::Owned(vec) => vec.into_bytes(),
358            VarZeroVecInner::Borrowed(vec) => vec.as_bytes().to_vec(),
359        }
360    }
361
362    /// Return whether the [`VarZeroVec`] is operating on owned or borrowed
363    /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can
364    /// be used to force it into an owned type
365    pub fn is_owned(&self) -> bool {
366        match self.0 {
367            #[cfg(feature = "alloc")]
368            VarZeroVecInner::Owned(..) => true,
369            VarZeroVecInner::Borrowed(..) => false,
370        }
371    }
372
373    #[doc(hidden)]
374    pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> {
375        self.as_slice().as_components()
376    }
377}
378
379#[cfg(feature = "alloc")]
380impl<A, T, F> From<&Vec<A>> for VarZeroVec<'static, T, F>
381where
382    T: VarULE + ?Sized,
383    A: EncodeAsVarULE<T>,
384    F: VarZeroVecFormat,
385{
386    #[inline]
387    fn from(elements: &Vec<A>) -> Self {
388        Self::from(elements.as_slice())
389    }
390}
391
392#[cfg(feature = "alloc")]
393impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F>
394where
395    T: VarULE + ?Sized,
396    A: EncodeAsVarULE<T>,
397    F: VarZeroVecFormat,
398{
399    #[inline]
400    fn from(elements: &[A]) -> Self {
401        if elements.is_empty() {
402            VarZeroSlice::new_empty().into()
403        } else {
404            #[expect(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility
405            VarZeroVecOwned::try_from_elements(elements).unwrap().into()
406        }
407    }
408}
409
410#[cfg(feature = "alloc")]
411impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F>
412where
413    T: VarULE + ?Sized,
414    A: EncodeAsVarULE<T>,
415    F: VarZeroVecFormat,
416{
417    #[inline]
418    fn from(elements: &[A; N]) -> Self {
419        Self::from(elements.as_slice())
420    }
421}
422
423impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F>
424where
425    T: VarULE,
426    T: ?Sized,
427    T: PartialEq,
428    F: VarZeroVecFormat,
429{
430    #[inline]
431    fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool {
432        // VZV::from_elements used to produce a non-canonical representation of the
433        // empty VZV, so we cannot use byte equality for empty vecs.
434        if self.is_empty() || other.is_empty() {
435            return self.is_empty() && other.is_empty();
436        }
437        // VarULE has an API guarantee that byte equality is semantic equality.
438        // For non-empty VZVs, there's only a single metadata representation,
439        // so this guarantee extends to the whole VZV representation.
440        self.as_bytes().eq(other.as_bytes())
441    }
442}
443
444impl<'a, T, F> Eq for VarZeroVec<'a, T, F>
445where
446    T: VarULE,
447    T: ?Sized,
448    T: Eq,
449    F: VarZeroVecFormat,
450{
451}
452
453impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F>
454where
455    T: VarULE + ?Sized,
456    T: PartialEq,
457    A: AsRef<T>,
458    F: VarZeroVecFormat,
459{
460    #[inline]
461    fn eq(&self, other: &&[A]) -> bool {
462        self.iter().eq(other.iter().map(|t| t.as_ref()))
463    }
464}
465
466impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F>
467where
468    T: VarULE + ?Sized,
469    T: PartialEq,
470    A: AsRef<T>,
471    F: VarZeroVecFormat,
472{
473    #[inline]
474    fn eq(&self, other: &[A; N]) -> bool {
475        self.iter().eq(other.iter().map(|t| t.as_ref()))
476    }
477}
478
479impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> {
480    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
481        self.iter().partial_cmp(other.iter())
482    }
483}
484
485impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> {
486    fn cmp(&self, other: &Self) -> Ordering {
487        self.iter().cmp(other.iter())
488    }
489}
490
491#[test]
492fn assert_single_empty_representation() {
493    assert_eq!(
494        VarZeroVec::<str>::new().as_bytes(),
495        VarZeroVec::<str>::from(&[] as &[&str]).as_bytes()
496    );
497
498    use crate::map::MutableZeroVecLike;
499    let mut vzv = VarZeroVec::<str>::from(&["hello", "world"][..]);
500    assert_eq!(vzv.len(), 2);
501    assert!(!vzv.as_bytes().is_empty());
502    vzv.zvl_remove(0);
503    assert_eq!(vzv.len(), 1);
504    assert!(!vzv.as_bytes().is_empty());
505    vzv.zvl_remove(0);
506    assert_eq!(vzv.len(), 0);
507    assert!(vzv.as_bytes().is_empty());
508    vzv.zvl_insert(0, "something");
509    assert_eq!(vzv.len(), 1);
510    assert!(!vzv.as_bytes().is_empty());
511}
512
513#[test]
514fn weird_empty_representation_equality() {
515    assert_eq!(
516        VarZeroVec::<str>::parse_bytes(&[0, 0, 0, 0]).unwrap(),
517        VarZeroVec::<str>::parse_bytes(&[]).unwrap()
518    );
519}