1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
45use crate::ule::{EncodeAsVarULE, UleError, VarULE};
6#[cfg(feature = "alloc")]
7use alloc::boxed::Box;
8use core::fmt;
9use core::marker::PhantomData;
10#[cfg(feature = "alloc")]
11use core::mem::ManuallyDrop;
12use core::ops::Deref;
13use core::ptr::NonNull;
14use zerofrom::ZeroFrom;
1516/// Copy-on-write type that efficiently represents [`VarULE`] types as their bitstream representation.
17///
18/// The primary use case for [`VarULE`] types is the ability to store complex variable-length datastructures
19/// inside variable-length collections like [`crate::VarZeroVec`].
20///
21/// Underlying this ability is the fact that [`VarULE`] types can be efficiently represented as a flat
22/// bytestream.
23///
24/// In zero-copy cases, sometimes one wishes to unconditionally use this bytestream representation, for example
25/// to save stack size. A struct with five `Cow<'a, str>`s is not as stack-efficient as a single `Cow` containing
26/// the bytestream representation of, say, `Tuple5VarULE<str, str, str, str, str>`.
27///
28/// This type helps in this case: It is logically a `Cow<'a, V>`, with some optimizations, that is guaranteed
29/// to serialize as a byte stream in machine-readable scenarios.
30///
31/// During human-readable serialization, it will fall back to the serde impls on `V`, which ought to have
32/// a human-readable variant.
33pub struct VarZeroCow<'a, V: ?Sized> {
34/// Safety invariant: Contained slice must be a valid V
35 /// It may or may not have a lifetime valid for 'a, it must be valid for as long as this type is around.
36raw: RawVarZeroCow,
37 marker1: PhantomData<&'a V>,
38#[cfg(feature = "alloc")]
39marker2: PhantomData<Box<V>>,
40}
4142/// VarZeroCow without the `V` to simulate a dropck eyepatch
43/// (i.e., prove to rustc that the dtor is not able to observe V or 'a)
44///
45/// This is effectively `Cow<'a, [u8]>`, with the lifetime managed externally
46struct RawVarZeroCow {
47/// Pointer to data
48 ///
49 /// # Safety Invariants
50 ///
51 /// 1. This slice must always be valid as a byte slice
52 /// 2. If `owned` is true, this slice can be freed.
53 /// 3. VarZeroCow, the only user of this type, will impose an additional invariant that the buffer is a valid V
54buf: NonNull<[u8]>,
55/// The buffer is `Box<[u8]>` if true
56#[cfg(feature = "alloc")]
57owned: bool,
58// Safety: We do not need any PhantomDatas here, since the Drop impl does not observe borrowed data
59 // if there is any.
60}
6162#[cfg(feature = "alloc")]
63impl Drop for RawVarZeroCow {
64fn drop(&mut self) {
65// Note: this drop impl NEVER observes borrowed data (which may have already been cleaned up by the time the impl is called)
66if self.owned {
67unsafe {
68// Safety: (Invariant 2 on buf)
69 // since owned is true, this is a valid Box<[u8]> and can be cleaned up
70let _ = Box::<[u8]>::from_raw(self.buf.as_ptr());
71 }
72 }
73 }
74}
7576// This is mostly just a `Cow<[u8]>`, safe to implement Send and Sync on
77unsafe impl Sendfor RawVarZeroCow {}
78unsafe impl Syncfor RawVarZeroCow {}
7980impl Clonefor RawVarZeroCow {
81fn clone(&self) -> Self {
82#[cfg(feature = "alloc")]
83if self.is_owned() {
84// This clones the box
85let b: Box<[u8]> = self.as_bytes().into();
86let b = ManuallyDrop::new(b);
87let buf: NonNull<[u8]> = (&**b).into();
88return Self {
89// Invariants upheld:
90 // 1 & 3: The bytes came from `self` so they're a valid value and byte slice
91 // 2: This is owned (we cloned it), so we set owned to true.
92buf,
93 owned: true,
94 };
95 }
96// Unfortunately we can't just use `new_borrowed(self.deref())` since the lifetime is shorter
97Self {
98// Invariants upheld:
99 // 1 & 3: The bytes came from `self` so they're a valid value and byte slice
100 // 2: This is borrowed (we're sharing a borrow), so we set owned to false.
101buf: self.buf,
102#[cfg(feature = "alloc")]
103owned: false,
104 }
105 }
106}
107108impl<'a, V: ?Sized> Clonefor VarZeroCow<'a, V> {
109fn clone(&self) -> Self {
110let raw = self.raw.clone();
111// Invariant upheld: raw came from a valid VarZeroCow, so it
112 // is a valid V
113unsafe { Self::from_raw(raw) }
114 }
115}
116117impl<'a, V: VarULE + ?Sized> VarZeroCow<'a, V> {
118/// Construct from a slice. Errors if the slice doesn't represent a valid `V`
119pub fn parse_bytes(bytes: &'a [u8]) -> Result<Self, UleError> {
120let val = V::parse_bytes(bytes)?;
121Ok(Self::new_borrowed(val))
122 }
123124/// Construct from an owned slice. Errors if the slice doesn't represent a valid `V`
125 ///
126 /// ✨ *Enabled with the `alloc` Cargo feature.*
127#[cfg(feature = "alloc")]
128pub fn parse_owned_bytes(bytes: Box<[u8]>) -> Result<Self, UleError> {
129 V::validate_bytes(&bytes)?;
130let bytes = ManuallyDrop::new(bytes);
131let buf: NonNull<[u8]> = (&**bytes).into();
132let raw = RawVarZeroCow {
133// Invariants upheld:
134 // 1 & 3: The bytes came from `val` so they're a valid value and byte slice
135 // 2: This is owned, so we set owned to true.
136buf,
137 owned: true,
138 };
139Ok(Self {
140 raw,
141 marker1: PhantomData,
142#[cfg(feature = "alloc")]
143marker2: PhantomData,
144 })
145 }
146147/// Construct from a slice that is known to represent a valid `V`
148 ///
149 /// # Safety
150 ///
151 /// `bytes` must be a valid `V`, i.e. it must successfully pass through
152 /// `V::parse_bytes()` or `V::validate_bytes()`.
153pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
154unsafe {
155// Safety: bytes is an &T which is always non-null
156let buf: NonNull<[u8]> = NonNull::new_unchecked(bytesas *const [u8] as *mut [u8]);
157let raw = RawVarZeroCow {
158// Invariants upheld:
159 // 1 & 3: Passed upstream to caller
160 // 2: This is borrowed, so we set owned to false.
161buf,
162#[cfg(feature = "alloc")]
163owned: false,
164 };
165// Invariant passed upstream to caller
166Self::from_raw(raw)
167 }
168 }
169170/// Construct this from an [`EncodeAsVarULE`] version of the contained type
171 ///
172 /// Will always construct an owned version
173 ///
174 /// ✨ *Enabled with the `alloc` Cargo feature.*
175#[cfg(feature = "alloc")]
176pub fn from_encodeable<E: EncodeAsVarULE<V>>(encodeable: &E) -> Self {
177let b = crate::ule::encode_varule_to_box(encodeable);
178Self::new_owned(b)
179 }
180181/// Construct a new borrowed version of this
182pub fn new_borrowed(val: &'a V) -> Self {
183unsafe {
184// Safety: val is a valid V, by type
185Self::from_bytes_unchecked(val.as_bytes())
186 }
187 }
188189/// Construct a new borrowed version of this
190 ///
191 /// ✨ *Enabled with the `alloc` Cargo feature.*
192#[cfg(feature = "alloc")]
193pub fn new_owned(val: Box<V>) -> Self {
194let val = ManuallyDrop::new(val);
195let buf: NonNull<[u8]> = val.as_bytes().into();
196let raw = RawVarZeroCow {
197// Invariants upheld:
198 // 1 & 3: The bytes came from `val` so they're a valid value and byte slice
199 // 2: This is owned, so we set owned to true.
200buf,
201#[cfg(feature = "alloc")]
202owned: true,
203 };
204// The bytes came from `val`, so it's a valid value
205unsafe { Self::from_raw(raw) }
206 }
207}
208209impl<'a, V: ?Sized> VarZeroCow<'a, V> {
210/// Whether or not this is owned
211pub fn is_owned(&self) -> bool {
212self.raw.is_owned()
213 }
214215/// Get the byte representation of this type
216 ///
217 /// Is also always a valid `V` and can be passed to
218 /// `V::from_bytes_unchecked()`
219pub fn as_bytes(&self) -> &[u8] {
220// The valid V invariant comes from Invariant 2
221self.raw.as_bytes()
222 }
223224/// Invariant: `raw` must wrap a valid V, either owned or borrowed for 'a
225const unsafe fn from_raw(raw: RawVarZeroCow) -> Self {
226Self {
227// Invariant passed up to caller
228raw,
229 marker1: PhantomData,
230#[cfg(feature = "alloc")]
231marker2: PhantomData,
232 }
233 }
234}
235236impl RawVarZeroCow {
237/// Whether or not this is owned
238#[inline]
239pub fn is_owned(&self) -> bool {
240#[cfg(feature = "alloc")]
241return self.owned;
242#[cfg(not(feature = "alloc"))]
243return false;
244 }
245246/// Get the byte representation of this type
247#[inline]
248pub fn as_bytes(&self) -> &[u8] {
249// Safety: Invariant 1 on self.buf
250unsafe { self.buf.as_ref() }
251 }
252}
253254impl<'a, V: VarULE + ?Sized> Dereffor VarZeroCow<'a, V> {
255type Target = V;
256fn deref(&self) -> &V {
257// Safety: From invariant 2 on self.buf
258unsafe { V::from_bytes_unchecked(self.as_bytes()) }
259 }
260}
261262impl<'a, V: VarULE + ?Sized> From<&'a V> for VarZeroCow<'a, V> {
263fn from(other: &'a V) -> Self {
264Self::new_borrowed(other)
265 }
266}
267268#[cfg(feature = "alloc")]
269impl<'a, V: VarULE + ?Sized> From<Box<V>> for VarZeroCow<'a, V> {
270fn from(other: Box<V>) -> Self {
271Self::new_owned(other)
272 }
273}
274275impl<'a, V: VarULE + ?Sized + fmt::Debug> fmt::Debugfor VarZeroCow<'a, V> {
276fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
277self.deref().fmt(f)
278 }
279}
280281// We need manual impls since `#[derive()]` is disallowed on packed types
282impl<'a, V: VarULE + ?Sized + PartialEq> PartialEqfor VarZeroCow<'a, V> {
283fn eq(&self, other: &Self) -> bool {
284self.deref().eq(other.deref())
285 }
286}
287288impl<'a, V: VarULE + ?Sized + Eq> Eqfor VarZeroCow<'a, V> {}
289290impl<'a, V: VarULE + ?Sized + PartialOrd> PartialOrdfor VarZeroCow<'a, V> {
291fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
292self.deref().partial_cmp(other.deref())
293 }
294}
295296impl<'a, V: VarULE + ?Sized + Ord> Ordfor VarZeroCow<'a, V> {
297fn cmp(&self, other: &Self) -> core::cmp::Ordering {
298self.deref().cmp(other.deref())
299 }
300}
301302// # Safety
303//
304// encode_var_ule_len: Produces the length of the contained bytes, which are known to be a valid V by invariant
305//
306// encode_var_ule_write: Writes the contained bytes, which are known to be a valid V by invariant
307unsafe impl<'a, V: VarULE + ?Sized> EncodeAsVarULE<V> for VarZeroCow<'a, V> {
308fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
309// unnecessary if the other two are implemented
310::core::panicking::panic("internal error: entered unreachable code")unreachable!()311 }
312313#[inline]
314fn encode_var_ule_len(&self) -> usize {
315self.as_bytes().len()
316 }
317318#[inline]
319fn encode_var_ule_write(&self, dst: &mut [u8]) {
320dst.copy_from_slice(self.as_bytes())
321 }
322}
323324#[cfg(feature = "serde")]
325impl<'a, V: VarULE + ?Sized + serde::Serialize> serde::Serialize for VarZeroCow<'a, V> {
326fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
327where
328S: serde::Serializer,
329 {
330if serializer.is_human_readable() {
331 <V as serde::Serialize>::serialize(self.deref(), serializer)
332 } else {
333 serializer.serialize_bytes(self.as_bytes())
334 }
335 }
336}
337338#[cfg(all(feature = "serde", feature = "alloc"))]
339impl<'a, 'de: 'a, V: VarULE + ?Sized> serde::Deserialize<'de> for VarZeroCow<'a, V>
340where
341Box<V>: serde::Deserialize<'de>,
342{
343fn deserialize<Des>(deserializer: Des) -> Result<Self, Des::Error>
344where
345Des: serde::Deserializer<'de>,
346 {
347if deserializer.is_human_readable() {
348let b = Box::<V>::deserialize(deserializer)?;
349Ok(Self::new_owned(b))
350 } else {
351let bytes = <&[u8]>::deserialize(deserializer)?;
352Self::parse_bytes(bytes).map_err(serde::de::Error::custom)
353 }
354 }
355}
356357#[cfg(feature = "databake")]
358impl<'a, V: VarULE + ?Sized> databake::Bake for VarZeroCow<'a, V> {
359fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
360 env.insert("zerovec");
361let bytes = self.as_bytes().bake(env);
362databake::quote! {
363// Safety: Known to come from a valid V since self.as_bytes() is always a valid V
364unsafe {
365 zerovec::VarZeroCow::from_bytes_unchecked(#bytes)
366 }
367 }
368 }
369}
370371#[cfg(feature = "databake")]
372impl<'a, V: VarULE + ?Sized> databake::BakeSize for VarZeroCow<'a, V> {
373fn borrows_size(&self) -> usize {
374self.as_bytes().len()
375 }
376}
377378impl<'a, V: VarULE + ?Sized> ZeroFrom<'a, V> for VarZeroCow<'a, V> {
379#[inline]
380fn zero_from(other: &'a V) -> Self {
381Self::new_borrowed(other)
382 }
383}
384385impl<'a, 'b, V: VarULE + ?Sized> ZeroFrom<'a, VarZeroCow<'b, V>> for VarZeroCow<'a, V> {
386#[inline]
387fn zero_from(other: &'a VarZeroCow<'b, V>) -> Self {
388Self::new_borrowed(other)
389 }
390}
391392#[cfg(test)]
393mod tests {
394use super::VarZeroCow;
395use crate::ule::tuplevar::Tuple3VarULE;
396use crate::vecs::VarZeroSlice;
397#[test]
398fn test_cow_roundtrip() {
399type Messy = Tuple3VarULE<str, [u8], VarZeroSlice<str>>;
400let vec = vec!["one", "two", "three"];
401let messy: VarZeroCow<Messy> =
402 VarZeroCow::from_encodeable(&("hello", &b"g\xFF\xFFdbye"[..], vec));
403404assert_eq!(messy.a(), "hello");
405assert_eq!(messy.b(), b"g\xFF\xFFdbye");
406assert_eq!(&messy.c()[1], "two");
407408#[cfg(feature = "serde")]
409{
410let bincode = bincode::serialize(&messy).unwrap();
411let deserialized: VarZeroCow<Messy> = bincode::deserialize(&bincode).unwrap();
412assert_eq!(
413 messy, deserialized,
414"Single element roundtrips with bincode"
415);
416assert!(!deserialized.is_owned());
417418let json = serde_json::to_string(&messy).unwrap();
419let deserialized: VarZeroCow<Messy> = serde_json::from_str(&json).unwrap();
420assert_eq!(messy, deserialized, "Single element roundtrips with serde");
421 }
422 }
423424struct TwoCows<'a> {
425 cow1: VarZeroCow<'a, str>,
426 cow2: VarZeroCow<'a, str>,
427 }
428429#[test]
430fn test_eyepatch_works() {
431// This code should compile
432let mut two = TwoCows {
433 cow1: VarZeroCow::new_borrowed("hello"),
434 cow2: VarZeroCow::new_owned("world".into()),
435 };
436let three = VarZeroCow::new_borrowed(&*two.cow2);
437 two.cow1 = three;
438439// Without the eyepatch, dropck will be worried that the dtor of two.cow1 can observe the
440 // data it borrowed from two.cow2, which may have already been deleted
441442 // This test will fail if you add an empty `impl<'a, V: ?Sized> Drop for VarZeroCow<'a, V>`
443}
444}