potential_utf/
ustr.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5#[cfg(feature = "alloc")]
6use alloc::boxed::Box;
7use core::cmp::Ordering;
8use core::fmt;
9use core::ops::Deref;
10
11/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.
12///
13/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For
14/// example, strings that are keys of a map don't need to ever be reified as `str`s.
15///
16/// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].
17///
18/// The main advantage of this type over `[u8]` is that it serializes as a string in
19/// human-readable formats like JSON.
20///
21/// # Examples
22///
23/// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]:
24///
25/// ```
26/// use potential_utf::PotentialUtf8;
27/// use zerovec::ZeroMap;
28///
29/// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation.
30/// let map: ZeroMap<PotentialUtf8, u8> = [
31///     (PotentialUtf8::from_bytes(b"abc"), 11),
32///     (PotentialUtf8::from_bytes(b"def"), 22),
33///     (PotentialUtf8::from_bytes(b"ghi"), 33),
34/// ]
35/// .into_iter()
36/// .collect();
37///
38/// let key = "abc";
39/// let value = map.get_copied(PotentialUtf8::from_str(key));
40/// assert_eq!(Some(11), value);
41/// ```
42///
43/// [`ZeroMap`]: zerovec::ZeroMap
44#[repr(transparent)]
45#[derive(#[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::PartialEq for PotentialUtf8 {
    #[inline]
    fn eq(&self, other: &PotentialUtf8) -> bool { self.0 == other.0 }
}PartialEq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::Eq for PotentialUtf8 {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<[u8]>;
    }
}Eq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::PartialOrd for PotentialUtf8 {
    #[inline]
    fn partial_cmp(&self, other: &PotentialUtf8)
        -> ::core::option::Option<::core::cmp::Ordering> {
        ::core::cmp::PartialOrd::partial_cmp(&self.0, &other.0)
    }
}PartialOrd, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::Ord for PotentialUtf8 {
    #[inline]
    fn cmp(&self, other: &PotentialUtf8) -> ::core::cmp::Ordering {
        ::core::cmp::Ord::cmp(&self.0, &other.0)
    }
}Ord)]
46#[allow(clippy::exhaustive_structs)] // transparent newtype
47pub struct PotentialUtf8(pub [u8]);
48
49impl fmt::Debug for PotentialUtf8 {
50    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51        // Debug as a string if possible
52        match self.try_as_str() {
53            Ok(s) => fmt::Debug::fmt(s, f),
54            Err(_) => fmt::Debug::fmt(&self.0, f),
55        }
56    }
57}
58
59impl PotentialUtf8 {
60    /// Create a [`PotentialUtf8`] from a byte slice.
61    #[inline]
62    pub const fn from_bytes(other: &[u8]) -> &Self {
63        // Safety: PotentialUtf8 is transparent over [u8]
64        unsafe { core::mem::transmute(other) }
65    }
66
67    /// Create a [`PotentialUtf8`] from a string slice.
68    #[inline]
69    pub const fn from_str(s: &str) -> &Self {
70        Self::from_bytes(s.as_bytes())
71    }
72
73    /// Create a [`PotentialUtf8`] from boxed bytes.
74    ///
75    /// ✨ *Enabled with the `alloc` Cargo feature.*
76    #[inline]
77    #[cfg(feature = "alloc")]
78    pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> {
79        // Safety: PotentialUtf8 is transparent over [u8]
80        unsafe { core::mem::transmute(other) }
81    }
82
83    /// Create a [`PotentialUtf8`] from a boxed `str`.
84    ///
85    /// ✨ *Enabled with the `alloc` Cargo feature.*
86    #[inline]
87    #[cfg(feature = "alloc")]
88    pub fn from_boxed_str(other: Box<str>) -> Box<Self> {
89        Self::from_boxed_bytes(other.into_boxed_bytes())
90    }
91
92    /// Get the bytes from a [`PotentialUtf8].
93    #[inline]
94    pub const fn as_bytes(&self) -> &[u8] {
95        &self.0
96    }
97
98    /// Attempt to convert a [`PotentialUtf8`] to a `str`.
99    ///
100    /// # Examples
101    ///
102    /// ```
103    /// use potential_utf::PotentialUtf8;
104    ///
105    /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc");
106    ///
107    /// let b = A.try_as_str().unwrap();
108    /// assert_eq!(b, "abc");
109    /// ```
110    // Note: this is const starting in 1.63
111    #[inline]
112    pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> {
113        core::str::from_utf8(&self.0)
114    }
115}
116
117impl<'a> From<&'a str> for &'a PotentialUtf8 {
118    #[inline]
119    fn from(other: &'a str) -> Self {
120        PotentialUtf8::from_str(other)
121    }
122}
123
124impl PartialEq<str> for PotentialUtf8 {
125    fn eq(&self, other: &str) -> bool {
126        self.eq(Self::from_str(other))
127    }
128}
129
130impl PartialOrd<str> for PotentialUtf8 {
131    fn partial_cmp(&self, other: &str) -> Option<Ordering> {
132        self.partial_cmp(Self::from_str(other))
133    }
134}
135
136impl PartialEq<PotentialUtf8> for str {
137    fn eq(&self, other: &PotentialUtf8) -> bool {
138        PotentialUtf8::from_str(self).eq(other)
139    }
140}
141
142impl PartialOrd<PotentialUtf8> for str {
143    fn partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering> {
144        PotentialUtf8::from_str(self).partial_cmp(other)
145    }
146}
147
148#[cfg(feature = "alloc")]
149impl From<Box<str>> for Box<PotentialUtf8> {
150    #[inline]
151    fn from(other: Box<str>) -> Self {
152        PotentialUtf8::from_boxed_str(other)
153    }
154}
155
156impl Deref for PotentialUtf8 {
157    type Target = [u8];
158    fn deref(&self) -> &Self::Target {
159        &self.0
160    }
161}
162
163/// This impl requires enabling the optional `zerovec` Cargo feature
164#[cfg(all(feature = "zerovec", feature = "alloc"))]
165impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 {
166    type Container = zerovec::VarZeroVec<'a, PotentialUtf8>;
167    type Slice = zerovec::VarZeroSlice<PotentialUtf8>;
168    type GetType = PotentialUtf8;
169    type OwnedType = Box<PotentialUtf8>;
170}
171
172// Safety (based on the safety checklist on the VarULE trait):
173//  1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE)
174//  2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE)
175//  3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible)
176//  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible)
177//  5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly)
178//  6. All other methods are defaulted
179//  7. `[T]` byte equality is semantic equality (transparent over a ULE)
180/// This impl requires enabling the optional `zerovec` Cargo feature
181#[cfg(feature = "zerovec")]
182unsafe impl zerovec::ule::VarULE for PotentialUtf8 {
183    #[inline]
184    fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> {
185        Ok(())
186    }
187    #[inline]
188    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
189        PotentialUtf8::from_bytes(bytes)
190    }
191}
192
193/// This impl requires enabling the optional `serde` Cargo feature
194#[cfg(feature = "serde")]
195impl serde_core::Serialize for PotentialUtf8 {
196    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
197    where
198        S: serde_core::Serializer,
199    {
200        use serde_core::ser::Error;
201        let s = self
202            .try_as_str()
203            .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?;
204        if serializer.is_human_readable() {
205            serializer.serialize_str(s)
206        } else {
207            serializer.serialize_bytes(s.as_bytes())
208        }
209    }
210}
211
212/// This impl requires enabling the optional `serde` Cargo feature
213#[cfg(all(feature = "serde", feature = "alloc"))]
214impl<'de> serde_core::Deserialize<'de> for Box<PotentialUtf8> {
215    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
216    where
217        D: serde_core::Deserializer<'de>,
218    {
219        if deserializer.is_human_readable() {
220            let boxed_str = Box::<str>::deserialize(deserializer)?;
221            Ok(PotentialUtf8::from_boxed_str(boxed_str))
222        } else {
223            let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;
224            Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes))
225        }
226    }
227}
228
229/// This impl requires enabling the optional `serde` Cargo feature
230#[cfg(feature = "serde")]
231impl<'de, 'a> serde_core::Deserialize<'de> for &'a PotentialUtf8
232where
233    'de: 'a,
234{
235    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
236    where
237        D: serde_core::Deserializer<'de>,
238    {
239        if deserializer.is_human_readable() {
240            let s = <&str>::deserialize(deserializer)?;
241            Ok(PotentialUtf8::from_str(s))
242        } else {
243            let bytes = <&[u8]>::deserialize(deserializer)?;
244            Ok(PotentialUtf8::from_bytes(bytes))
245        }
246    }
247}
248
249#[repr(transparent)]
250#[derive(#[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::PartialEq for PotentialUtf16 {
    #[inline]
    fn eq(&self, other: &PotentialUtf16) -> bool { self.0 == other.0 }
}PartialEq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::Eq for PotentialUtf16 {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<[u16]>;
    }
}Eq, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::PartialOrd for PotentialUtf16 {
    #[inline]
    fn partial_cmp(&self, other: &PotentialUtf16)
        -> ::core::option::Option<::core::cmp::Ordering> {
        ::core::cmp::PartialOrd::partial_cmp(&self.0, &other.0)
    }
}PartialOrd, #[automatically_derived]
#[allow(clippy::exhaustive_structs)]
impl ::core::cmp::Ord for PotentialUtf16 {
    #[inline]
    fn cmp(&self, other: &PotentialUtf16) -> ::core::cmp::Ordering {
        ::core::cmp::Ord::cmp(&self.0, &other.0)
    }
}Ord)]
251#[allow(clippy::exhaustive_structs)] // transparent newtype
252pub struct PotentialUtf16(pub [u16]);
253
254impl fmt::Debug for PotentialUtf16 {
255    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256        // Debug as a string if possible
257        for c in char::decode_utf16(self.0.iter().copied()) {
258            match c {
259                Ok(c) => f.write_fmt(format_args!("{0}", c))write!(f, "{c}")?,
260                Err(e) => f.write_fmt(format_args!("\\0x{0:x}", e.unpaired_surrogate()))write!(f, "\\0x{:x}", e.unpaired_surrogate())?,
261            }
262        }
263        Ok(())
264    }
265}
266
267impl PotentialUtf16 {
268    /// Create a [`PotentialUtf16`] from a u16 slice.
269    #[inline]
270    pub const fn from_slice(other: &[u16]) -> &Self {
271        // Safety: PotentialUtf16 is transparent over [u16]
272        unsafe { core::mem::transmute(other) }
273    }
274
275    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
276        char::decode_utf16(self.0.iter().copied()).map(|c| c.unwrap_or(char::REPLACEMENT_CHARACTER))
277    }
278}