zerovec/varzerovec/vec.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::ule::*;
6
7use alloc::vec::Vec;
8use core::cmp::{Ord, Ordering, PartialOrd};
9use core::fmt;
10use core::ops::Deref;
11
12use super::*;
13
14/// A zero-copy, byte-aligned vector for variable-width types.
15///
16/// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is
17/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and
18/// where `T`'s data is variable-length (e.g. `String`)
19///
20/// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more
21/// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>`
22/// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate
23/// a dynamically-sized [`VarULE`] type and conversions to and from a custom type.
24///
25/// For example, here are some owned types and their zero-copy equivalents:
26///
27/// - `Vec<String>`: `VarZeroVec<'a, str>`
28/// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>`
29/// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>`
30/// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>`
31///
32/// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice).
33///
34/// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec).
35///
36/// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from
37/// owned data (and then mutated!) but can also borrow from some buffer.
38///
39/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
40/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
41///
42/// # Bytes and Equality
43///
44/// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait
45/// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format
46/// across major SemVer releases.
47///
48/// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use
49/// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a
50/// [`VarZeroVec<T>`] or vice-versa.
51///
52/// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still
53/// under consideration, with different options along the space-time spectrum. See
54/// [#1410](https://github.com/unicode-org/icu4x/issues/1410).
55///
56/// # Example
57///
58/// ```rust
59/// # use zerovec::ule::ZeroVecError;
60/// use zerovec::VarZeroVec;
61///
62/// // The little-endian bytes correspond to the list of strings.
63/// let strings = vec!["w", "ω", "文", "𑄃"];
64///
65/// #[derive(serde::Serialize, serde::Deserialize)]
66/// struct Data<'a> {
67/// #[serde(borrow)]
68/// strings: VarZeroVec<'a, str>,
69/// }
70///
71/// let data = Data {
72/// strings: VarZeroVec::from(&strings),
73/// };
74///
75/// let bincode_bytes =
76/// bincode::serialize(&data).expect("Serialization should be successful");
77///
78/// // Will deserialize without allocations
79/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
80/// .expect("Deserialization should be successful");
81///
82/// assert_eq!(deserialized.strings.get(2), Some("文"));
83/// assert_eq!(deserialized.strings, &*strings);
84/// # Ok::<(), ZeroVecError>(())
85/// ```
86///
87/// Here's another example with `ZeroSlice<T>` (similar to `[T]`):
88///
89/// ```rust
90/// # use zerovec::ule::ZeroVecError;
91/// use zerovec::VarZeroVec;
92/// use zerovec::ZeroSlice;
93///
94/// // The structured list correspond to the list of integers.
95/// let numbers: &[&[u32]] = &[
96/// &[12, 25, 38],
97/// &[39179, 100],
98/// &[42, 55555],
99/// &[12345, 54321, 9],
100/// ];
101///
102/// #[derive(serde::Serialize, serde::Deserialize)]
103/// struct Data<'a> {
104/// #[serde(borrow)]
105/// vecs: VarZeroVec<'a, ZeroSlice<u32>>,
106/// }
107///
108/// let data = Data {
109/// vecs: VarZeroVec::from(numbers),
110/// };
111///
112/// let bincode_bytes =
113/// bincode::serialize(&data).expect("Serialization should be successful");
114///
115/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
116/// .expect("Deserialization should be successful");
117///
118/// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25);
119/// assert_eq!(deserialized.vecs[1], *numbers[1]);
120///
121/// # Ok::<(), ZeroVecError>(())
122/// ```
123///
124/// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`]
125/// for more information.
126///
127/// # How it Works
128///
129/// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will
130/// serialize to a specially formatted list of bytes. The format is:
131///
132/// - 4 bytes for `length` (interpreted as a little-endian u32)
133/// - `4 * length` bytes of `indices` (interpreted as little-endian u32)
134/// - Remaining bytes for actual `data`
135///
136/// Each element in the `indices` array points to the starting index of its corresponding
137/// data part in the `data` list. The ending index can be calculated from the starting index
138/// of the next element (or the length of the slice if dealing with the last element).
139///
140/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details.
141///
142/// [`ule`]: crate::ule
143#[non_exhaustive]
144pub enum VarZeroVec<'a, T: ?Sized, F = Index16> {
145 /// An allocated VarZeroVec, allowing for mutations.
146 ///
147 /// # Examples
148 ///
149 /// ```
150 /// use zerovec::VarZeroVec;
151 ///
152 /// let mut vzv = VarZeroVec::<str>::default();
153 /// vzv.make_mut().push("foo");
154 /// vzv.make_mut().push("bar");
155 /// assert!(matches!(vzv, VarZeroVec::Owned(_)));
156 /// ```
157 Owned(VarZeroVecOwned<T, F>),
158 /// A borrowed VarZeroVec, requiring no allocations.
159 ///
160 /// If a mutating operation is invoked on VarZeroVec, the Borrowed is converted to Owned.
161 ///
162 /// # Examples
163 ///
164 /// ```
165 /// use zerovec::VarZeroVec;
166 ///
167 /// let bytes = &[
168 /// 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240,
169 /// 145, 132, 131,
170 /// ];
171 ///
172 /// let vzv: VarZeroVec<str> = VarZeroVec::parse_byte_slice(bytes).unwrap();
173 /// assert!(matches!(vzv, VarZeroVec::Borrowed(_)));
174 /// ```
175 Borrowed(&'a VarZeroSlice<T, F>),
176}
177
178impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> {
179 fn clone(&self) -> Self {
180 match *self {
181 VarZeroVec::Owned(ref o) => o.clone().into(),
182 VarZeroVec::Borrowed(b) => b.into(),
183 }
184 }
185}
186
187impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F>
188where
189 T: fmt::Debug,
190{
191 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192 VarZeroSlice::fmt(self, f)
193 }
194}
195
196impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> {
197 #[inline]
198 fn from(other: VarZeroVecOwned<T, F>) -> Self {
199 VarZeroVec::Owned(other)
200 }
201}
202
203impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> {
204 fn from(other: &'a VarZeroSlice<T, F>) -> Self {
205 VarZeroVec::Borrowed(other)
206 }
207}
208
209impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>>
210 for VarZeroVecOwned<T, F>
211{
212 #[inline]
213 fn from(other: VarZeroVec<'a, T, F>) -> Self {
214 match other {
215 VarZeroVec::Owned(o) => o,
216 VarZeroVec::Borrowed(b) => b.into(),
217 }
218 }
219}
220
221impl<T: VarULE + ?Sized> Default for VarZeroVec<'_, T> {
222 #[inline]
223 fn default() -> Self {
224 Self::new()
225 }
226}
227
228impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> {
229 type Target = VarZeroSlice<T, F>;
230 fn deref(&self) -> &VarZeroSlice<T, F> {
231 self.as_slice()
232 }
233}
234
235impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> {
236 /// Creates a new, empty `VarZeroVec<T>`.
237 ///
238 /// # Examples
239 ///
240 /// ```
241 /// use zerovec::VarZeroVec;
242 ///
243 /// let vzv: VarZeroVec<str> = VarZeroVec::new();
244 /// assert!(vzv.is_empty());
245 /// ```
246 #[inline]
247 pub const fn new() -> Self {
248 Self::Borrowed(VarZeroSlice::new_empty())
249 }
250
251 /// Parse a VarZeroVec from a slice of the appropriate format
252 ///
253 /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`].
254 ///
255 /// # Example
256 ///
257 /// ```rust
258 /// # use zerovec::ule::ZeroVecError;
259 /// # use zerovec::VarZeroVec;
260 ///
261 /// let strings = vec!["foo", "bar", "baz", "quux"];
262 /// let vec = VarZeroVec::<str>::from(&strings);
263 ///
264 /// assert_eq!(&vec[0], "foo");
265 /// assert_eq!(&vec[1], "bar");
266 /// assert_eq!(&vec[2], "baz");
267 /// assert_eq!(&vec[3], "quux");
268 /// # Ok::<(), ZeroVecError>(())
269 /// ```
270 pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ZeroVecError> {
271 let borrowed = VarZeroSlice::<T, F>::parse_byte_slice(slice)?;
272
273 Ok(VarZeroVec::Borrowed(borrowed))
274 }
275
276 /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification.
277 ///
278 /// # Safety
279 ///
280 /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
281 pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
282 Self::Borrowed(core::mem::transmute::<&[u8], &VarZeroSlice<T, F>>(bytes))
283 }
284
285 /// Convert this into a mutable vector of the owned `T` type, cloning if necessary.
286 ///
287 ///
288 /// # Example
289 ///
290 /// ```rust,ignore
291 /// # use zerovec::ule::ZeroVecError;
292 /// # use zerovec::VarZeroVec;
293 ///
294 /// let strings = vec!["foo", "bar", "baz", "quux"];
295 /// let mut vec = VarZeroVec::<str>::from(&strings);
296 ///
297 /// assert_eq!(vec.len(), 4);
298 /// let mutvec = vec.make_mut();
299 /// mutvec.push("lorem ipsum".into());
300 /// mutvec[2] = "dolor sit".into();
301 /// assert_eq!(&vec[0], "foo");
302 /// assert_eq!(&vec[1], "bar");
303 /// assert_eq!(&vec[2], "dolor sit");
304 /// assert_eq!(&vec[3], "quux");
305 /// assert_eq!(&vec[4], "lorem ipsum");
306 /// # Ok::<(), ZeroVecError>(())
307 /// ```
308 //
309 // This function is crate-public for now since we don't yet want to stabilize
310 // the internal implementation details
311 pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> {
312 match self {
313 VarZeroVec::Owned(ref mut vec) => vec,
314 VarZeroVec::Borrowed(slice) => {
315 let new_self = VarZeroVecOwned::from_slice(slice);
316 *self = new_self.into();
317 // recursion is limited since we are guaranteed to hit the Owned branch
318 self.make_mut()
319 }
320 }
321 }
322
323 /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned.
324 ///
325 /// # Example
326 ///
327 /// ```
328 /// # use zerovec::ule::ZeroVecError;
329 /// # use zerovec::VarZeroVec;
330 ///
331 /// let strings = vec!["foo", "bar", "baz", "quux"];
332 /// let vec = VarZeroVec::<str>::from(&strings);
333 ///
334 /// assert_eq!(vec.len(), 4);
335 /// // has 'static lifetime
336 /// let owned = vec.into_owned();
337 /// # Ok::<(), ZeroVecError>(())
338 /// ```
339 pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> {
340 self.make_mut();
341 match self {
342 VarZeroVec::Owned(vec) => vec.into(),
343 _ => unreachable!(),
344 }
345 }
346
347 /// Obtain this `VarZeroVec` as a [`VarZeroSlice`]
348 pub fn as_slice(&self) -> &VarZeroSlice<T, F> {
349 match *self {
350 VarZeroVec::Owned(ref owned) => owned,
351 VarZeroVec::Borrowed(b) => b,
352 }
353 }
354
355 /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed,
356 /// this function allocates a byte vector and copies the borrowed bytes into it.
357 ///
358 /// The bytes can be passed back to [`Self::parse_byte_slice()`].
359 ///
360 /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`].
361 ///
362 /// # Example
363 ///
364 /// ```rust
365 /// # use zerovec::ule::ZeroVecError;
366 /// # use zerovec::VarZeroVec;
367 ///
368 /// let strings = vec!["foo", "bar", "baz"];
369 /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes();
370 ///
371 /// let mut borrowed: VarZeroVec<str> = VarZeroVec::parse_byte_slice(&bytes)?;
372 /// assert_eq!(borrowed, &*strings);
373 ///
374 /// # Ok::<(), ZeroVecError>(())
375 /// ```
376 pub fn into_bytes(self) -> Vec<u8> {
377 match self {
378 VarZeroVec::Owned(vec) => vec.into_bytes(),
379 VarZeroVec::Borrowed(vec) => vec.as_bytes().to_vec(),
380 }
381 }
382
383 /// Return whether the [`VarZeroVec`] is operating on owned or borrowed
384 /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can
385 /// be used to force it into an owned type
386 pub fn is_owned(&self) -> bool {
387 match self {
388 VarZeroVec::Owned(..) => true,
389 VarZeroVec::Borrowed(..) => false,
390 }
391 }
392
393 #[cfg(feature = "bench")]
394 #[doc(hidden)]
395 pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> {
396 self.as_slice().as_components()
397 }
398}
399
400impl<A, T, F> From<&Vec<A>> for VarZeroVec<'static, T, F>
401where
402 T: VarULE + ?Sized,
403 A: EncodeAsVarULE<T>,
404 F: VarZeroVecFormat,
405{
406 #[inline]
407 fn from(elements: &Vec<A>) -> Self {
408 Self::from(elements.as_slice())
409 }
410}
411
412impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F>
413where
414 T: VarULE + ?Sized,
415 A: EncodeAsVarULE<T>,
416 F: VarZeroVecFormat,
417{
418 #[inline]
419 fn from(elements: &[A]) -> Self {
420 if elements.is_empty() {
421 VarZeroSlice::new_empty().into()
422 } else {
423 #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility
424 VarZeroVecOwned::try_from_elements(elements).unwrap().into()
425 }
426 }
427}
428
429impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F>
430where
431 T: VarULE + ?Sized,
432 A: EncodeAsVarULE<T>,
433 F: VarZeroVecFormat,
434{
435 #[inline]
436 fn from(elements: &[A; N]) -> Self {
437 Self::from(elements.as_slice())
438 }
439}
440
441impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F>
442where
443 T: VarULE,
444 T: ?Sized,
445 T: PartialEq,
446 F: VarZeroVecFormat,
447{
448 #[inline]
449 fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool {
450 // VZV::from_elements used to produce a non-canonical representation of the
451 // empty VZV, so we cannot use byte equality for empty vecs.
452 if self.is_empty() || other.is_empty() {
453 return self.is_empty() && other.is_empty();
454 }
455 // VarULE has an API guarantee that byte equality is semantic equality.
456 // For non-empty VZVs, there's only a single metadata representation,
457 // so this guarantee extends to the whole VZV representation.
458 self.as_bytes().eq(other.as_bytes())
459 }
460}
461
462impl<'a, T, F> Eq for VarZeroVec<'a, T, F>
463where
464 T: VarULE,
465 T: ?Sized,
466 T: Eq,
467 F: VarZeroVecFormat,
468{
469}
470
471impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F>
472where
473 T: VarULE + ?Sized,
474 T: PartialEq,
475 A: AsRef<T>,
476 F: VarZeroVecFormat,
477{
478 #[inline]
479 fn eq(&self, other: &&[A]) -> bool {
480 self.iter().eq(other.iter().map(|t| t.as_ref()))
481 }
482}
483
484impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F>
485where
486 T: VarULE + ?Sized,
487 T: PartialEq,
488 A: AsRef<T>,
489 F: VarZeroVecFormat,
490{
491 #[inline]
492 fn eq(&self, other: &[A; N]) -> bool {
493 self.iter().eq(other.iter().map(|t| t.as_ref()))
494 }
495}
496
497impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> {
498 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
499 self.iter().partial_cmp(other.iter())
500 }
501}
502
503impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> {
504 fn cmp(&self, other: &Self) -> Ordering {
505 self.iter().cmp(other.iter())
506 }
507}
508
509#[test]
510fn assert_single_empty_representation() {
511 assert_eq!(
512 VarZeroVec::<str>::new().as_bytes(),
513 VarZeroVec::<str>::from(&[] as &[&str]).as_bytes()
514 );
515}
516
517#[test]
518fn weird_empty_representation_equality() {
519 assert_eq!(
520 VarZeroVec::<str>::parse_byte_slice(&[0, 0, 0, 0]).unwrap(),
521 VarZeroVec::<str>::parse_byte_slice(&[]).unwrap()
522 );
523}