zerovec/varzerovec/slice.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::components::{VarZeroSliceIter, VarZeroVecComponents};
6use super::vec::VarZeroVecInner;
7use super::*;
8use crate::ule::*;
9use core::cmp::{Ord, Ordering, PartialOrd};
10use core::fmt;
11use core::marker::PhantomData;
12
13use core::ops::Index;
14use core::ops::Range;
15
16#[cfg(feature = "alloc")]
17use alloc::{boxed::Box, vec::Vec};
18
19/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]`
20/// where `T` is not `Sized`.
21///
22/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain
23/// owned data and as such is ideal for deserialization since most human readable
24/// serialization formats cannot unconditionally deserialize zero-copy.
25///
26/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
27/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
28/// using `VarZeroVec<ZeroSlice<T>>`.
29///
30/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
31/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
32///
33/// This type can be nested within itself to allow for multi-level nested `Vec`s.
34///
35/// # Examples
36///
37/// ## Nested Slices
38///
39/// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>`
40///
41/// ```rust
42/// use zerovec::{VarZeroSlice, VarZeroVec};
43/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"];
44/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"];
45/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"];
46/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"];
47/// let strings_12 = vec![&*strings_1, &*strings_2];
48/// let strings_34 = vec![&*strings_3, &*strings_4];
49/// let all_strings = vec![strings_12, strings_34];
50///
51/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1);
52/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2);
53/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3);
54/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4);
55/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]);
56/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]);
57/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]);
58///
59/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all
60/// .iter()
61/// .map(|v: &VarZeroSlice<VarZeroSlice<str>>| {
62/// v.iter()
63/// .map(|x: &VarZeroSlice<_>| {
64/// x.as_varzerovec()
65/// .iter()
66/// .map(|s| s.to_owned())
67/// .collect::<Vec<String>>()
68/// })
69/// .collect::<Vec<_>>()
70/// })
71/// .collect::<Vec<_>>();
72/// assert_eq!(reconstructed, all_strings);
73///
74/// let bytes = vzv_all.as_bytes();
75/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> =
76/// VarZeroVec::parse_bytes(bytes).unwrap();
77/// assert_eq!(vzv_from_bytes, vzv_all);
78/// ```
79///
80/// ## Iterate over Windows
81///
82/// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like
83/// [`core::slice::Windows`], this behavior can be easily modeled using an iterator:
84///
85/// ```
86/// use zerovec::VarZeroVec;
87///
88/// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]);
89/// # let mut pairs: Vec<(&str, &str)> = Vec::new();
90///
91/// let mut it = vzv.iter().peekable();
92/// while let (Some(x), Some(y)) = (it.next(), it.peek()) {
93/// // Evaluate (x, y) here.
94/// # pairs.push((x, y));
95/// }
96/// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]);
97/// ```
98//
99// safety invariant: The slice MUST be one which parses to
100// a valid VarZeroVecComponents<T>
101#[repr(transparent)]
102pub struct VarZeroSlice<T: ?Sized, F = Index16> {
103 marker: PhantomData<(F, T)>,
104 /// The original slice this was constructed from
105 entire_slice: [u8],
106}
107
108impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
109 /// Construct a new empty [`VarZeroSlice`]
110 pub const fn new_empty() -> &'static Self {
111 // The empty VZV is special-cased to the empty slice
112 unsafe { &*(&[] as *const [u8] as *const Self) }
113 }
114
115 /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
116 #[inline]
117 pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> {
118 unsafe {
119 // safety: VarZeroSlice is guaranteed to parse here
120 VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice)
121 }
122 }
123
124 /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification.
125 ///
126 /// # Safety
127 ///
128 /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
129 pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
130 // self is really just a wrapper around a byte slice
131 &*(bytes as *const [u8] as *const Self)
132 }
133
134 /// Get the number of elements in this slice
135 ///
136 /// # Example
137 ///
138 /// ```rust
139 /// # use zerovec::VarZeroVec;
140 ///
141 /// let strings = vec!["foo", "bar", "baz", "quux"];
142 /// let vec = VarZeroVec::<str>::from(&strings);
143 ///
144 /// assert_eq!(vec.len(), 4);
145 /// ```
146 pub fn len(&self) -> usize {
147 self.as_components().len()
148 }
149
150 /// Returns `true` if the slice contains no elements.
151 ///
152 /// # Examples
153 ///
154 /// ```
155 /// # use zerovec::VarZeroVec;
156 ///
157 /// let strings: Vec<String> = vec![];
158 /// let vec = VarZeroVec::<str>::from(&strings);
159 ///
160 /// assert!(vec.is_empty());
161 /// ```
162 pub fn is_empty(&self) -> bool {
163 self.as_components().is_empty()
164 }
165
166 /// Obtain an iterator over this slice's elements
167 ///
168 /// # Example
169 ///
170 /// ```rust
171 /// # use zerovec::VarZeroVec;
172 ///
173 /// let strings = vec!["foo", "bar", "baz", "quux"];
174 /// let vec = VarZeroVec::<str>::from(&strings);
175 ///
176 /// let mut iter_results: Vec<&str> = vec.iter().collect();
177 /// assert_eq!(iter_results[0], "foo");
178 /// assert_eq!(iter_results[1], "bar");
179 /// assert_eq!(iter_results[2], "baz");
180 /// assert_eq!(iter_results[3], "quux");
181 /// ```
182 pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> {
183 self.as_components().iter()
184 }
185
186 /// Get one of this slice's elements, returning `None` if the index is out of bounds
187 ///
188 /// # Example
189 ///
190 /// ```rust
191 /// # use zerovec::VarZeroVec;
192 ///
193 /// let strings = vec!["foo", "bar", "baz", "quux"];
194 /// let vec = VarZeroVec::<str>::from(&strings);
195 ///
196 /// let mut iter_results: Vec<&str> = vec.iter().collect();
197 /// assert_eq!(vec.get(0), Some("foo"));
198 /// assert_eq!(vec.get(1), Some("bar"));
199 /// assert_eq!(vec.get(2), Some("baz"));
200 /// assert_eq!(vec.get(3), Some("quux"));
201 /// assert_eq!(vec.get(4), None);
202 /// ```
203 pub fn get(&self, idx: usize) -> Option<&T> {
204 self.as_components().get(idx)
205 }
206
207 /// Get one of this slice's elements
208 ///
209 /// # Safety
210 ///
211 /// `index` must be in range
212 ///
213 /// # Example
214 ///
215 /// ```rust
216 /// # use zerovec::VarZeroVec;
217 ///
218 /// let strings = vec!["foo", "bar", "baz", "quux"];
219 /// let vec = VarZeroVec::<str>::from(&strings);
220 ///
221 /// let mut iter_results: Vec<&str> = vec.iter().collect();
222 /// unsafe {
223 /// assert_eq!(vec.get_unchecked(0), "foo");
224 /// assert_eq!(vec.get_unchecked(1), "bar");
225 /// assert_eq!(vec.get_unchecked(2), "baz");
226 /// assert_eq!(vec.get_unchecked(3), "quux");
227 /// }
228 /// ```
229 pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
230 self.as_components().get_unchecked(idx)
231 }
232
233 /// Obtain an owned `Vec<Box<T>>` out of this
234 ///
235 /// ✨ *Enabled with the `alloc` Cargo feature.*
236 #[cfg(feature = "alloc")]
237 pub fn to_vec(&self) -> Vec<Box<T>> {
238 self.as_components().to_vec()
239 }
240
241 /// Get a reference to the entire encoded backing buffer of this slice
242 ///
243 /// The bytes can be passed back to [`Self::parse_bytes()`].
244 ///
245 /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`].
246 ///
247 /// # Example
248 ///
249 /// ```rust
250 /// # use zerovec::VarZeroVec;
251 ///
252 /// let strings = vec!["foo", "bar", "baz"];
253 /// let vzv = VarZeroVec::<str>::from(&strings);
254 ///
255 /// assert_eq!(vzv, VarZeroVec::parse_bytes(vzv.as_bytes()).unwrap());
256 /// ```
257 #[inline]
258 pub const fn as_bytes(&self) -> &[u8] {
259 &self.entire_slice
260 }
261
262 /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`]
263 ///
264 /// If you wish to repeatedly call methods on this [`VarZeroSlice`],
265 /// it is more efficient to perform this conversion first
266 pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
267 VarZeroVec(VarZeroVecInner::Borrowed(self))
268 }
269
270 /// Parse a [`VarZeroSlice`] from a slice of the appropriate format
271 ///
272 /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]
273 pub fn parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError> {
274 <Self as VarULE>::parse_bytes(slice)
275 }
276}
277
278impl<T, F> VarZeroSlice<T, F>
279where
280 T: VarULE,
281 T: ?Sized,
282 T: Ord,
283 F: VarZeroVecFormat,
284{
285 /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see
286 /// the standard library function [`binary_search`].
287 ///
288 /// # Example
289 ///
290 /// ```
291 /// # use zerovec::VarZeroVec;
292 ///
293 /// let strings = vec!["a", "b", "f", "g"];
294 /// let vec = VarZeroVec::<str>::from(&strings);
295 ///
296 /// assert_eq!(vec.binary_search("f"), Ok(2));
297 /// assert_eq!(vec.binary_search("e"), Err(2));
298 /// ```
299 ///
300 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
301 #[inline]
302 pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
303 self.as_components().binary_search(x)
304 }
305
306 /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range.
307 ///
308 /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
309 /// to the behavior of the standard library function [`binary_search`].
310 ///
311 /// The index is returned relative to the start of the range.
312 ///
313 /// # Example
314 ///
315 /// ```
316 /// # use zerovec::VarZeroVec;
317 /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
318 /// let vec = VarZeroVec::<str>::from(&strings);
319 ///
320 /// // Same behavior as binary_search when the range covers the whole slice:
321 /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3)));
322 /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4)));
323 ///
324 /// // Will not look outside of the range:
325 /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1)));
326 /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0)));
327 ///
328 /// // Will return indices relative to the start of the range:
329 /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2)));
330 /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3)));
331 ///
332 /// // Will return `None` if the range is out of bounds:
333 /// assert_eq!(vec.binary_search_in_range("x", 100..200), None);
334 /// assert_eq!(vec.binary_search_in_range("x", 0..200), None);
335 /// ```
336 ///
337 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
338 #[inline]
339 pub fn binary_search_in_range(
340 &self,
341 x: &T,
342 range: Range<usize>,
343 ) -> Option<Result<usize, usize>> {
344 self.as_components().binary_search_in_range(x, range)
345 }
346}
347
348impl<T, F> VarZeroSlice<T, F>
349where
350 T: VarULE,
351 T: ?Sized,
352 F: VarZeroVecFormat,
353{
354 /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see
355 /// the standard library function [`binary_search_by`].
356 ///
357 /// # Example
358 ///
359 /// ```
360 /// # use zerovec::VarZeroVec;
361 /// let strings = vec!["a", "b", "f", "g"];
362 /// let vec = VarZeroVec::<str>::from(&strings);
363 ///
364 /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2));
365 /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2));
366 /// ```
367 ///
368 /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
369 #[inline]
370 pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
371 self.as_components().binary_search_by(predicate)
372 }
373
374 /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range.
375 ///
376 /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
377 /// to the behavior of the standard library function [`binary_search`].
378 ///
379 /// The index is returned relative to the start of the range.
380 ///
381 /// # Example
382 ///
383 /// ```
384 /// # use zerovec::VarZeroVec;
385 /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
386 /// let vec = VarZeroVec::<str>::from(&strings);
387 ///
388 /// // Same behavior as binary_search when the range covers the whole slice:
389 /// assert_eq!(
390 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7),
391 /// Some(Ok(3))
392 /// );
393 /// assert_eq!(
394 /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7),
395 /// Some(Err(4))
396 /// );
397 ///
398 /// // Will not look outside of the range:
399 /// assert_eq!(
400 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1),
401 /// Some(Err(1))
402 /// );
403 /// assert_eq!(
404 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7),
405 /// Some(Err(0))
406 /// );
407 ///
408 /// // Will return indices relative to the start of the range:
409 /// assert_eq!(
410 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6),
411 /// Some(Ok(2))
412 /// );
413 /// assert_eq!(
414 /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6),
415 /// Some(Err(3))
416 /// );
417 ///
418 /// // Will return `None` if the range is out of bounds:
419 /// assert_eq!(
420 /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200),
421 /// None
422 /// );
423 /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None);
424 /// ```
425 ///
426 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
427 pub fn binary_search_in_range_by(
428 &self,
429 predicate: impl FnMut(&T) -> Ordering,
430 range: Range<usize>,
431 ) -> Option<Result<usize, usize>> {
432 self.as_components()
433 .binary_search_in_range_by(predicate, range)
434 }
435}
436// Safety (based on the safety checklist on the VarULE trait):
437// 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a
438// `[u8]` slice which satisfies this invariant)
439// 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a
440// `[u8]` slice which satisfies this invariant)
441// 3. The impl of `validate_bytes()` returns an error if any byte is not valid.
442// 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety
443// 5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
444// 6. `as_bytes()` is equivalent to a regular transmute of the underlying data
445// 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type)
446unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> {
447 fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
448 let _ = VarZeroVecComponents::<T, F>::parse_bytes(bytes)
449 .map_err(|_| UleError::parse::<Self>())?;
450 Ok(())
451 }
452
453 unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
454 // self is really just a wrapper around a byte slice
455 &*(bytes as *const [u8] as *const Self)
456 }
457
458 fn as_bytes(&self) -> &[u8] {
459 &self.entire_slice
460 }
461}
462
463impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> {
464 type Output = T;
465 fn index(&self, index: usize) -> &Self::Output {
466 #[expect(clippy::panic)] // documented
467 match self.get(index) {
468 Some(x) => x,
469 None => panic!(
470 "index out of bounds: the len is {} but the index is {index}",
471 self.len()
472 ),
473 }
474 }
475}
476
477impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F>
478where
479 T: VarULE,
480 T: ?Sized,
481 T: PartialEq,
482 F: VarZeroVecFormat,
483{
484 #[inline]
485 fn eq(&self, other: &VarZeroSlice<T, F>) -> bool {
486 // VarULE has an API guarantee that this is equivalent
487 // to `T::VarULE::eq()`
488 self.entire_slice.eq(&other.entire_slice)
489 }
490}
491
492impl<T, F> Eq for VarZeroSlice<T, F>
493where
494 T: VarULE,
495 T: ?Sized,
496 T: Eq,
497 F: VarZeroVecFormat,
498{
499}
500
501impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> {
502 #[inline]
503 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
504 self.iter().partial_cmp(other.iter())
505 }
506}
507
508impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> {
509 #[inline]
510 fn cmp(&self, other: &Self) -> Ordering {
511 self.iter().cmp(other.iter())
512 }
513}
514
515impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F>
516where
517 T: fmt::Debug,
518{
519 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
520 f.debug_list().entries(self.iter()).finish()
521 }
522}
523
524impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> {
525 fn as_ref(&self) -> &VarZeroSlice<T, F> {
526 self
527 }
528}