zerovec/varzerovec/slice.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::components::{VarZeroSliceIter, VarZeroVecComponents};
6use super::vec::VarZeroVecInner;
7use super::*;
8use crate::ule::*;
9use core::cmp::{Ord, Ordering, PartialOrd};
10use core::fmt;
11use core::marker::PhantomData;
12use core::mem;
13
14use core::ops::Index;
15use core::ops::Range;
16
17/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]`
18/// where `T` is not `Sized`.
19///
20/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain
21/// owned data and as such is ideal for deserialization since most human readable
22/// serialization formats cannot unconditionally deserialize zero-copy.
23///
24/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
25/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
26/// using `VarZeroVec<ZeroSlice<T>>`.
27///
28/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
29/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
30///
31/// This type can be nested within itself to allow for multi-level nested `Vec`s.
32///
33/// # Examples
34///
35/// ## Nested Slices
36///
37/// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>`
38///
39/// ```rust
40/// use zerovec::{VarZeroSlice, VarZeroVec};
41/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"];
42/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"];
43/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"];
44/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"];
45/// let strings_12 = vec![&*strings_1, &*strings_2];
46/// let strings_34 = vec![&*strings_3, &*strings_4];
47/// let all_strings = vec![strings_12, strings_34];
48///
49/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1);
50/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2);
51/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3);
52/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4);
53/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]);
54/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]);
55/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]);
56///
57/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all
58/// .iter()
59/// .map(|v: &VarZeroSlice<VarZeroSlice<str>>| {
60/// v.iter()
61/// .map(|x: &VarZeroSlice<_>| {
62/// x.as_varzerovec()
63/// .iter()
64/// .map(|s| s.to_owned())
65/// .collect::<Vec<String>>()
66/// })
67/// .collect::<Vec<_>>()
68/// })
69/// .collect::<Vec<_>>();
70/// assert_eq!(reconstructed, all_strings);
71///
72/// let bytes = vzv_all.as_bytes();
73/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> =
74/// VarZeroVec::parse_bytes(bytes).unwrap();
75/// assert_eq!(vzv_from_bytes, vzv_all);
76/// ```
77///
78/// ## Iterate over Windows
79///
80/// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like
81/// [core::slice::Windows], this behavior can be easily modeled using an iterator:
82///
83/// ```
84/// use zerovec::VarZeroVec;
85///
86/// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]);
87/// # let mut pairs: Vec<(&str, &str)> = Vec::new();
88///
89/// let mut it = vzv.iter().peekable();
90/// while let (Some(x), Some(y)) = (it.next(), it.peek()) {
91/// // Evaluate (x, y) here.
92/// # pairs.push((x, y));
93/// }
94/// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]);
95/// ```
96//
97// safety invariant: The slice MUST be one which parses to
98// a valid VarZeroVecComponents<T>
99#[repr(transparent)]
100pub struct VarZeroSlice<T: ?Sized, F = Index16> {
101 marker: PhantomData<(F, T)>,
102 /// The original slice this was constructed from
103 entire_slice: [u8],
104}
105
106impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
107 /// Construct a new empty VarZeroSlice
108 pub const fn new_empty() -> &'static Self {
109 // The empty VZV is special-cased to the empty slice
110 unsafe { mem::transmute(&[] as &[u8]) }
111 }
112
113 /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
114 #[inline]
115 pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> {
116 unsafe {
117 // safety: VarZeroSlice is guaranteed to parse here
118 VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice)
119 }
120 }
121
122 /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification.
123 ///
124 /// # Safety
125 ///
126 /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
127 pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
128 // self is really just a wrapper around a byte slice
129 mem::transmute(bytes)
130 }
131
132 /// Get the number of elements in this slice
133 ///
134 /// # Example
135 ///
136 /// ```rust
137 /// # use zerovec::VarZeroVec;
138 ///
139 /// let strings = vec!["foo", "bar", "baz", "quux"];
140 /// let vec = VarZeroVec::<str>::from(&strings);
141 ///
142 /// assert_eq!(vec.len(), 4);
143 /// ```
144 pub fn len(&self) -> usize {
145 self.as_components().len()
146 }
147
148 /// Returns `true` if the slice contains no elements.
149 ///
150 /// # Examples
151 ///
152 /// ```
153 /// # use zerovec::VarZeroVec;
154 ///
155 /// let strings: Vec<String> = vec![];
156 /// let vec = VarZeroVec::<str>::from(&strings);
157 ///
158 /// assert!(vec.is_empty());
159 /// ```
160 pub fn is_empty(&self) -> bool {
161 self.as_components().is_empty()
162 }
163
164 /// Obtain an iterator over this slice's elements
165 ///
166 /// # Example
167 ///
168 /// ```rust
169 /// # use zerovec::VarZeroVec;
170 ///
171 /// let strings = vec!["foo", "bar", "baz", "quux"];
172 /// let vec = VarZeroVec::<str>::from(&strings);
173 ///
174 /// let mut iter_results: Vec<&str> = vec.iter().collect();
175 /// assert_eq!(iter_results[0], "foo");
176 /// assert_eq!(iter_results[1], "bar");
177 /// assert_eq!(iter_results[2], "baz");
178 /// assert_eq!(iter_results[3], "quux");
179 /// ```
180 pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> {
181 self.as_components().iter()
182 }
183
184 /// Get one of this slice's elements, returning `None` if the index is out of bounds
185 ///
186 /// # Example
187 ///
188 /// ```rust
189 /// # use zerovec::VarZeroVec;
190 ///
191 /// let strings = vec!["foo", "bar", "baz", "quux"];
192 /// let vec = VarZeroVec::<str>::from(&strings);
193 ///
194 /// let mut iter_results: Vec<&str> = vec.iter().collect();
195 /// assert_eq!(vec.get(0), Some("foo"));
196 /// assert_eq!(vec.get(1), Some("bar"));
197 /// assert_eq!(vec.get(2), Some("baz"));
198 /// assert_eq!(vec.get(3), Some("quux"));
199 /// assert_eq!(vec.get(4), None);
200 /// ```
201 pub fn get(&self, idx: usize) -> Option<&T> {
202 self.as_components().get(idx)
203 }
204
205 /// Get one of this slice's elements
206 ///
207 /// # Safety
208 ///
209 /// `index` must be in range
210 ///
211 /// # Example
212 ///
213 /// ```rust
214 /// # use zerovec::VarZeroVec;
215 ///
216 /// let strings = vec!["foo", "bar", "baz", "quux"];
217 /// let vec = VarZeroVec::<str>::from(&strings);
218 ///
219 /// let mut iter_results: Vec<&str> = vec.iter().collect();
220 /// unsafe {
221 /// assert_eq!(vec.get_unchecked(0), "foo");
222 /// assert_eq!(vec.get_unchecked(1), "bar");
223 /// assert_eq!(vec.get_unchecked(2), "baz");
224 /// assert_eq!(vec.get_unchecked(3), "quux");
225 /// }
226 /// ```
227 pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
228 self.as_components().get_unchecked(idx)
229 }
230
231 /// Obtain an owned `Vec<Box<T>>` out of this
232 ///
233 /// ✨ *Enabled with the `alloc` Cargo feature.*
234 #[cfg(feature = "alloc")]
235 pub fn to_vec(&self) -> alloc::vec::Vec<alloc::boxed::Box<T>> {
236 self.as_components().to_vec()
237 }
238
239 /// Get a reference to the entire encoded backing buffer of this slice
240 ///
241 /// The bytes can be passed back to [`Self::parse_bytes()`].
242 ///
243 /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`].
244 ///
245 /// # Example
246 ///
247 /// ```rust
248 /// # use zerovec::VarZeroVec;
249 ///
250 /// let strings = vec!["foo", "bar", "baz"];
251 /// let vzv = VarZeroVec::<str>::from(&strings);
252 ///
253 /// assert_eq!(vzv, VarZeroVec::parse_bytes(vzv.as_bytes()).unwrap());
254 /// ```
255 #[inline]
256 pub const fn as_bytes(&self) -> &[u8] {
257 &self.entire_slice
258 }
259
260 /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`]
261 ///
262 /// If you wish to repeatedly call methods on this [`VarZeroSlice`],
263 /// it is more efficient to perform this conversion first
264 pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
265 VarZeroVec(VarZeroVecInner::Borrowed(self))
266 }
267
268 /// Parse a VarZeroSlice from a slice of the appropriate format
269 ///
270 /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]
271 pub fn parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError> {
272 <Self as VarULE>::parse_bytes(slice)
273 }
274}
275
276impl<T, F> VarZeroSlice<T, F>
277where
278 T: VarULE,
279 T: ?Sized,
280 T: Ord,
281 F: VarZeroVecFormat,
282{
283 /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see
284 /// the standard library function [`binary_search`].
285 ///
286 /// # Example
287 ///
288 /// ```
289 /// # use zerovec::VarZeroVec;
290 ///
291 /// let strings = vec!["a", "b", "f", "g"];
292 /// let vec = VarZeroVec::<str>::from(&strings);
293 ///
294 /// assert_eq!(vec.binary_search("f"), Ok(2));
295 /// assert_eq!(vec.binary_search("e"), Err(2));
296 /// ```
297 ///
298 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
299 #[inline]
300 pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
301 self.as_components().binary_search(x)
302 }
303
304 /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range.
305 ///
306 /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
307 /// to the behavior of the standard library function [`binary_search`].
308 ///
309 /// The index is returned relative to the start of the range.
310 ///
311 /// # Example
312 ///
313 /// ```
314 /// # use zerovec::VarZeroVec;
315 /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
316 /// let vec = VarZeroVec::<str>::from(&strings);
317 ///
318 /// // Same behavior as binary_search when the range covers the whole slice:
319 /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3)));
320 /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4)));
321 ///
322 /// // Will not look outside of the range:
323 /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1)));
324 /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0)));
325 ///
326 /// // Will return indices relative to the start of the range:
327 /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2)));
328 /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3)));
329 ///
330 /// // Will return `None` if the range is out of bounds:
331 /// assert_eq!(vec.binary_search_in_range("x", 100..200), None);
332 /// assert_eq!(vec.binary_search_in_range("x", 0..200), None);
333 /// ```
334 ///
335 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
336 #[inline]
337 pub fn binary_search_in_range(
338 &self,
339 x: &T,
340 range: Range<usize>,
341 ) -> Option<Result<usize, usize>> {
342 self.as_components().binary_search_in_range(x, range)
343 }
344}
345
346impl<T, F> VarZeroSlice<T, F>
347where
348 T: VarULE,
349 T: ?Sized,
350 F: VarZeroVecFormat,
351{
352 /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see
353 /// the standard library function [`binary_search_by`].
354 ///
355 /// # Example
356 ///
357 /// ```
358 /// # use zerovec::VarZeroVec;
359 /// let strings = vec!["a", "b", "f", "g"];
360 /// let vec = VarZeroVec::<str>::from(&strings);
361 ///
362 /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2));
363 /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2));
364 /// ```
365 ///
366 /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
367 #[inline]
368 pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
369 self.as_components().binary_search_by(predicate)
370 }
371
372 /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range.
373 ///
374 /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
375 /// to the behavior of the standard library function [`binary_search`].
376 ///
377 /// The index is returned relative to the start of the range.
378 ///
379 /// # Example
380 ///
381 /// ```
382 /// # use zerovec::VarZeroVec;
383 /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
384 /// let vec = VarZeroVec::<str>::from(&strings);
385 ///
386 /// // Same behavior as binary_search when the range covers the whole slice:
387 /// assert_eq!(
388 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7),
389 /// Some(Ok(3))
390 /// );
391 /// assert_eq!(
392 /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7),
393 /// Some(Err(4))
394 /// );
395 ///
396 /// // Will not look outside of the range:
397 /// assert_eq!(
398 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1),
399 /// Some(Err(1))
400 /// );
401 /// assert_eq!(
402 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7),
403 /// Some(Err(0))
404 /// );
405 ///
406 /// // Will return indices relative to the start of the range:
407 /// assert_eq!(
408 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6),
409 /// Some(Ok(2))
410 /// );
411 /// assert_eq!(
412 /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6),
413 /// Some(Err(3))
414 /// );
415 ///
416 /// // Will return `None` if the range is out of bounds:
417 /// assert_eq!(
418 /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200),
419 /// None
420 /// );
421 /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None);
422 /// ```
423 ///
424 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
425 pub fn binary_search_in_range_by(
426 &self,
427 predicate: impl FnMut(&T) -> Ordering,
428 range: Range<usize>,
429 ) -> Option<Result<usize, usize>> {
430 self.as_components()
431 .binary_search_in_range_by(predicate, range)
432 }
433}
434// Safety (based on the safety checklist on the VarULE trait):
435// 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a
436// `[u8]` slice which satisfies this invariant)
437// 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a
438// `[u8]` slice which satisfies this invariant)
439// 3. The impl of `validate_bytes()` returns an error if any byte is not valid.
440// 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety
441// 5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
442// 6. `as_bytes()` is equivalent to a regular transmute of the underlying data
443// 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type)
444unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> {
445 fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
446 let _: VarZeroVecComponents<T, F> =
447 VarZeroVecComponents::parse_bytes(bytes).map_err(|_| UleError::parse::<Self>())?;
448 Ok(())
449 }
450
451 unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
452 // self is really just a wrapper around a byte slice
453 mem::transmute(bytes)
454 }
455
456 fn as_bytes(&self) -> &[u8] {
457 &self.entire_slice
458 }
459}
460
461impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> {
462 type Output = T;
463 fn index(&self, index: usize) -> &Self::Output {
464 #[expect(clippy::panic)] // documented
465 match self.get(index) {
466 Some(x) => x,
467 None => panic!(
468 "index out of bounds: the len is {} but the index is {index}",
469 self.len()
470 ),
471 }
472 }
473}
474
475impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F>
476where
477 T: VarULE,
478 T: ?Sized,
479 T: PartialEq,
480 F: VarZeroVecFormat,
481{
482 #[inline]
483 fn eq(&self, other: &VarZeroSlice<T, F>) -> bool {
484 // VarULE has an API guarantee that this is equivalent
485 // to `T::VarULE::eq()`
486 self.entire_slice.eq(&other.entire_slice)
487 }
488}
489
490impl<T, F> Eq for VarZeroSlice<T, F>
491where
492 T: VarULE,
493 T: ?Sized,
494 T: Eq,
495 F: VarZeroVecFormat,
496{
497}
498
499impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> {
500 #[inline]
501 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
502 self.iter().partial_cmp(other.iter())
503 }
504}
505
506impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> {
507 #[inline]
508 fn cmp(&self, other: &Self) -> Ordering {
509 self.iter().cmp(other.iter())
510 }
511}
512
513impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F>
514where
515 T: fmt::Debug,
516{
517 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
518 f.debug_list().entries(self.iter()).finish()
519 }
520}
521
522impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> {
523 fn as_ref(&self) -> &VarZeroSlice<T, F> {
524 self
525 }
526}