zerovec/varzerovec/
vec.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::ule::*;
6
7use core::cmp::{Ord, Ordering, PartialOrd};
8use core::fmt;
9use core::ops::Deref;
10
11use super::*;
12
13/// A zero-copy, byte-aligned vector for variable-width types.
14///
15/// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is
16/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and
17/// where `T`'s data is variable-length (e.g. `String`)
18///
19/// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more
20/// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>`
21/// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate
22/// a dynamically-sized [`VarULE`] type and conversions to and from a custom type.
23///
24/// For example, here are some owned types and their zero-copy equivalents:
25///
26/// - `Vec<String>`: `VarZeroVec<'a, str>`
27/// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>`
28/// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>`
29/// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>`
30///
31/// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice).
32///
33/// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec).
34///
35/// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from
36/// owned data (and then mutated!) but can also borrow from some buffer.
37///
38/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
39/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
40///
41/// # Bytes and Equality
42///
43/// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait
44/// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format
45/// across major SemVer releases.
46///
47/// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use
48/// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a
49/// [`VarZeroVec<T>`] or vice-versa.
50///
51/// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still
52/// under consideration, with different options along the space-time spectrum. See
53/// [#1410](https://github.com/unicode-org/icu4x/issues/1410).
54///
55/// # Example
56///
57/// ```rust
58/// use zerovec::VarZeroVec;
59///
60/// // The little-endian bytes correspond to the list of strings.
61/// let strings = vec!["w", "ω", "文", "𑄃"];
62///
63/// #[derive(serde::Serialize, serde::Deserialize)]
64/// struct Data<'a> {
65///     #[serde(borrow)]
66///     strings: VarZeroVec<'a, str>,
67/// }
68///
69/// let data = Data {
70///     strings: VarZeroVec::from(&strings),
71/// };
72///
73/// let bincode_bytes =
74///     bincode::serialize(&data).expect("Serialization should be successful");
75///
76/// // Will deserialize without allocations
77/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
78///     .expect("Deserialization should be successful");
79///
80/// assert_eq!(deserialized.strings.get(2), Some("文"));
81/// assert_eq!(deserialized.strings, &*strings);
82/// ```
83///
84/// Here's another example with `ZeroSlice<T>` (similar to `[T]`):
85///
86/// ```rust
87/// use zerovec::VarZeroVec;
88/// use zerovec::ZeroSlice;
89///
90/// // The structured list correspond to the list of integers.
91/// let numbers: &[&[u32]] = &[
92///     &[12, 25, 38],
93///     &[39179, 100],
94///     &[42, 55555],
95///     &[12345, 54321, 9],
96/// ];
97///
98/// #[derive(serde::Serialize, serde::Deserialize)]
99/// struct Data<'a> {
100///     #[serde(borrow)]
101///     vecs: VarZeroVec<'a, ZeroSlice<u32>>,
102/// }
103///
104/// let data = Data {
105///     vecs: VarZeroVec::from(numbers),
106/// };
107///
108/// let bincode_bytes =
109///     bincode::serialize(&data).expect("Serialization should be successful");
110///
111/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
112///     .expect("Deserialization should be successful");
113///
114/// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25);
115/// assert_eq!(deserialized.vecs[1], *numbers[1]);
116/// ```
117///
118/// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`]
119/// for more information.
120///
121/// # How it Works
122///
123/// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will
124/// serialize to a specially formatted list of bytes. The format is:
125///
126/// -  2 bytes for `length` (interpreted as a little-endian u16)
127/// - `2 * (length - 1)` bytes of `indices` (interpreted as little-endian u16s)
128/// - Remaining bytes for actual `data`
129///
130/// The format is tweakable by setting the `F` parameter, by default it uses u16 indices and lengths but other
131/// `VarZeroVecFormat` types can set other sizes.
132///
133/// Each element in the `indices` array points to the ending index of its corresponding
134/// data part in the `data` list. The starting index can be calculated from the ending index
135/// of the next element (or 0 for the first element). The last ending index, not stored in the array, is
136/// the length of the `data` segment.
137///
138/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details.
139///
140/// [`ule`]: crate::ule
141pub struct VarZeroVec<'a, T: ?Sized, F = Index16>(pub(crate) VarZeroVecInner<'a, T, F>);
142
143pub(crate) enum VarZeroVecInner<'a, T: ?Sized, F = Index16> {
144    #[cfg(feature = "alloc")]
145    Owned(VarZeroVecOwned<T, F>),
146    Borrowed(&'a VarZeroSlice<T, F>),
147}
148
149impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> {
150    fn clone(&self) -> Self {
151        match self.0 {
152            #[cfg(feature = "alloc")]
153            VarZeroVecInner::Owned(ref o) => o.clone().into(),
154            VarZeroVecInner::Borrowed(b) => b.into(),
155        }
156    }
157}
158
159impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F>
160where
161    T: fmt::Debug,
162{
163    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
164        VarZeroSlice::fmt(self, f)
165    }
166}
167
168#[cfg(feature = "alloc")]
169impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> {
170    #[inline]
171    fn from(other: VarZeroVecOwned<T, F>) -> Self {
172        Self(VarZeroVecInner::Owned(other))
173    }
174}
175
176impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> {
177    fn from(other: &'a VarZeroSlice<T, F>) -> Self {
178        Self(VarZeroVecInner::Borrowed(other))
179    }
180}
181
182#[cfg(feature = "alloc")]
183impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>>
184    for VarZeroVecOwned<T, F>
185{
186    #[inline]
187    fn from(other: VarZeroVec<'a, T, F>) -> Self {
188        match other.0 {
189            VarZeroVecInner::Owned(o) => o,
190            VarZeroVecInner::Borrowed(b) => b.into(),
191        }
192    }
193}
194
195impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Default for VarZeroVec<'_, T, F> {
196    #[inline]
197    fn default() -> Self {
198        Self::new()
199    }
200}
201
202impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> {
203    type Target = VarZeroSlice<T, F>;
204    fn deref(&self) -> &VarZeroSlice<T, F> {
205        self.as_slice()
206    }
207}
208
209impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> {
210    /// Creates a new, empty `VarZeroVec<T>`.
211    ///
212    /// # Examples
213    ///
214    /// ```
215    /// use zerovec::VarZeroVec;
216    ///
217    /// let vzv: VarZeroVec<str> = VarZeroVec::new();
218    /// assert!(vzv.is_empty());
219    /// ```
220    #[inline]
221    pub const fn new() -> Self {
222        Self(VarZeroVecInner::Borrowed(VarZeroSlice::new_empty()))
223    }
224
225    /// Parse a VarZeroVec from a slice of the appropriate format
226    ///
227    /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`].
228    ///
229    /// # Example
230    ///
231    /// ```rust
232    /// # use zerovec::VarZeroVec;
233    ///
234    /// let strings = vec!["foo", "bar", "baz", "quux"];
235    /// let vec = VarZeroVec::<str>::from(&strings);
236    ///
237    /// assert_eq!(&vec[0], "foo");
238    /// assert_eq!(&vec[1], "bar");
239    /// assert_eq!(&vec[2], "baz");
240    /// assert_eq!(&vec[3], "quux");
241    /// ```
242    pub fn parse_bytes(slice: &'a [u8]) -> Result<Self, UleError> {
243        let borrowed = VarZeroSlice::<T, F>::parse_bytes(slice)?;
244
245        Ok(Self(VarZeroVecInner::Borrowed(borrowed)))
246    }
247
248    /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification.
249    ///
250    /// # Safety
251    ///
252    /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
253    pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
254        Self(VarZeroVecInner::Borrowed(core::mem::transmute::<
255            &[u8],
256            &VarZeroSlice<T, F>,
257        >(bytes)))
258    }
259
260    /// Convert this into a mutable vector of the owned `T` type, cloning if necessary.
261    ///
262    /// ✨ *Enabled with the `alloc` Cargo feature.*
263    ///
264    /// # Example
265    ///
266    /// ```rust,ignore
267    /// # use zerovec::VarZeroVec;
268    /// let strings = vec!["foo", "bar", "baz", "quux"];
269    /// let mut vec = VarZeroVec::<str>::from(&strings);
270    ///
271    /// assert_eq!(vec.len(), 4);
272    /// let mutvec = vec.make_mut();
273    /// mutvec.push("lorem ipsum".into());
274    /// mutvec[2] = "dolor sit".into();
275    /// assert_eq!(&vec[0], "foo");
276    /// assert_eq!(&vec[1], "bar");
277    /// assert_eq!(&vec[2], "dolor sit");
278    /// assert_eq!(&vec[3], "quux");
279    /// assert_eq!(&vec[4], "lorem ipsum");
280    /// ```
281    //
282    // This function is crate-public for now since we don't yet want to stabilize
283    // the internal implementation details
284    #[cfg(feature = "alloc")]
285    pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> {
286        match self.0 {
287            VarZeroVecInner::Owned(ref mut vec) => vec,
288            VarZeroVecInner::Borrowed(slice) => {
289                let new_self = VarZeroVecOwned::from_slice(slice);
290                *self = new_self.into();
291                // recursion is limited since we are guaranteed to hit the Owned branch
292                self.make_mut()
293            }
294        }
295    }
296
297    /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned.
298    ///
299    /// ✨ *Enabled with the `alloc` Cargo feature.*
300    ///
301    /// # Example
302    ///
303    /// ```
304    /// # use zerovec::VarZeroVec;
305    ///
306    /// let strings = vec!["foo", "bar", "baz", "quux"];
307    /// let vec = VarZeroVec::<str>::from(&strings);
308    ///
309    /// assert_eq!(vec.len(), 4);
310    /// // has 'static lifetime
311    /// let owned = vec.into_owned();
312    /// ```
313    #[cfg(feature = "alloc")]
314    pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> {
315        self.make_mut();
316        match self.0 {
317            VarZeroVecInner::Owned(vec) => vec.into(),
318            _ => unreachable!(),
319        }
320    }
321
322    /// Obtain this `VarZeroVec` as a [`VarZeroSlice`]
323    pub fn as_slice(&self) -> &VarZeroSlice<T, F> {
324        match self.0 {
325            #[cfg(feature = "alloc")]
326            VarZeroVecInner::Owned(ref owned) => owned,
327            VarZeroVecInner::Borrowed(b) => b,
328        }
329    }
330
331    /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed,
332    /// this function allocates a byte vector and copies the borrowed bytes into it.
333    ///
334    /// The bytes can be passed back to [`Self::parse_bytes()`].
335    ///
336    /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`].
337    ///
338    /// ✨ *Enabled with the `alloc` Cargo feature.*
339    ///
340    /// # Example
341    ///
342    /// ```rust
343    /// # use zerovec::VarZeroVec;
344    ///
345    /// let strings = vec!["foo", "bar", "baz"];
346    /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes();
347    ///
348    /// let mut borrowed: VarZeroVec<str> =
349    ///     VarZeroVec::parse_bytes(&bytes).unwrap();
350    /// assert_eq!(borrowed, &*strings);
351    /// ```
352    #[cfg(feature = "alloc")]
353    pub fn into_bytes(self) -> alloc::vec::Vec<u8> {
354        match self.0 {
355            VarZeroVecInner::Owned(vec) => vec.into_bytes(),
356            VarZeroVecInner::Borrowed(vec) => vec.as_bytes().to_vec(),
357        }
358    }
359
360    /// Return whether the [`VarZeroVec`] is operating on owned or borrowed
361    /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can
362    /// be used to force it into an owned type
363    pub fn is_owned(&self) -> bool {
364        match self.0 {
365            #[cfg(feature = "alloc")]
366            VarZeroVecInner::Owned(..) => true,
367            VarZeroVecInner::Borrowed(..) => false,
368        }
369    }
370
371    #[doc(hidden)]
372    pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> {
373        self.as_slice().as_components()
374    }
375}
376
377#[cfg(feature = "alloc")]
378impl<A, T, F> From<&alloc::vec::Vec<A>> for VarZeroVec<'static, T, F>
379where
380    T: VarULE + ?Sized,
381    A: EncodeAsVarULE<T>,
382    F: VarZeroVecFormat,
383{
384    #[inline]
385    fn from(elements: &alloc::vec::Vec<A>) -> Self {
386        Self::from(elements.as_slice())
387    }
388}
389
390#[cfg(feature = "alloc")]
391impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F>
392where
393    T: VarULE + ?Sized,
394    A: EncodeAsVarULE<T>,
395    F: VarZeroVecFormat,
396{
397    #[inline]
398    fn from(elements: &[A]) -> Self {
399        if elements.is_empty() {
400            VarZeroSlice::new_empty().into()
401        } else {
402            #[expect(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility
403            VarZeroVecOwned::try_from_elements(elements).unwrap().into()
404        }
405    }
406}
407
408#[cfg(feature = "alloc")]
409impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F>
410where
411    T: VarULE + ?Sized,
412    A: EncodeAsVarULE<T>,
413    F: VarZeroVecFormat,
414{
415    #[inline]
416    fn from(elements: &[A; N]) -> Self {
417        Self::from(elements.as_slice())
418    }
419}
420
421impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F>
422where
423    T: VarULE,
424    T: ?Sized,
425    T: PartialEq,
426    F: VarZeroVecFormat,
427{
428    #[inline]
429    fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool {
430        // VZV::from_elements used to produce a non-canonical representation of the
431        // empty VZV, so we cannot use byte equality for empty vecs.
432        if self.is_empty() || other.is_empty() {
433            return self.is_empty() && other.is_empty();
434        }
435        // VarULE has an API guarantee that byte equality is semantic equality.
436        // For non-empty VZVs, there's only a single metadata representation,
437        // so this guarantee extends to the whole VZV representation.
438        self.as_bytes().eq(other.as_bytes())
439    }
440}
441
442impl<'a, T, F> Eq for VarZeroVec<'a, T, F>
443where
444    T: VarULE,
445    T: ?Sized,
446    T: Eq,
447    F: VarZeroVecFormat,
448{
449}
450
451impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F>
452where
453    T: VarULE + ?Sized,
454    T: PartialEq,
455    A: AsRef<T>,
456    F: VarZeroVecFormat,
457{
458    #[inline]
459    fn eq(&self, other: &&[A]) -> bool {
460        self.iter().eq(other.iter().map(|t| t.as_ref()))
461    }
462}
463
464impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F>
465where
466    T: VarULE + ?Sized,
467    T: PartialEq,
468    A: AsRef<T>,
469    F: VarZeroVecFormat,
470{
471    #[inline]
472    fn eq(&self, other: &[A; N]) -> bool {
473        self.iter().eq(other.iter().map(|t| t.as_ref()))
474    }
475}
476
477impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> {
478    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
479        self.iter().partial_cmp(other.iter())
480    }
481}
482
483impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> {
484    fn cmp(&self, other: &Self) -> Ordering {
485        self.iter().cmp(other.iter())
486    }
487}
488
489#[test]
490fn assert_single_empty_representation() {
491    assert_eq!(
492        VarZeroVec::<str>::new().as_bytes(),
493        VarZeroVec::<str>::from(&[] as &[&str]).as_bytes()
494    );
495
496    use crate::map::MutableZeroVecLike;
497    let mut vzv = VarZeroVec::<str>::from(&["hello", "world"][..]);
498    assert_eq!(vzv.len(), 2);
499    assert!(!vzv.as_bytes().is_empty());
500    vzv.zvl_remove(0);
501    assert_eq!(vzv.len(), 1);
502    assert!(!vzv.as_bytes().is_empty());
503    vzv.zvl_remove(0);
504    assert_eq!(vzv.len(), 0);
505    assert!(vzv.as_bytes().is_empty());
506    vzv.zvl_insert(0, "something");
507    assert_eq!(vzv.len(), 1);
508    assert!(!vzv.as_bytes().is_empty());
509}
510
511#[test]
512fn weird_empty_representation_equality() {
513    assert_eq!(
514        VarZeroVec::<str>::parse_bytes(&[0, 0, 0, 0]).unwrap(),
515        VarZeroVec::<str>::parse_bytes(&[]).unwrap()
516    );
517}