tinystr/
ascii.rs

Help
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::asciibyte::AsciiByte;
6use crate::int_ops::{Aligned4, Aligned8};
7use crate::TinyStrError;
8use core::fmt;
9use core::ops::Deref;
10use core::str::{self, FromStr};
11
12#[repr(transparent)]
13#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)]
14pub struct TinyAsciiStr<const N: usize> {
15    bytes: [AsciiByte; N],
16}
17
18impl<const N: usize> TinyAsciiStr<N> {
19    /// Creates a `TinyAsciiStr<N>` from the given byte slice.
20    /// `bytes` may contain at most `N` non-null ASCII bytes.
21    pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> {
22        Self::from_bytes_inner(bytes, 0, bytes.len(), false)
23    }
24
25    /// Creates a `TinyAsciiStr<N>` from a byte slice, replacing invalid bytes.
26    ///
27    /// Null and non-ASCII bytes (i.e. those outside the range `0x01..=0x7F`)
28    /// will be replaced with the '?' character.
29    ///
30    /// The input slice will be truncated if its length exceeds `N`.
31    pub const fn from_bytes_lossy(bytes: &[u8]) -> Self {
32        const QUESTION: u8 = b'?';
33        let mut out = [0; N];
34        let mut i = 0;
35        // Ord is not available in const, so no `.min(N)`
36        let len = if bytes.len() > N { N } else { bytes.len() };
37
38        // Indexing is protected by the len check above
39        #[allow(clippy::indexing_slicing)]
40        while i < len {
41            let b = bytes[i];
42            if b > 0 && b < 0x80 {
43                out[i] = b;
44            } else {
45                out[i] = QUESTION;
46            }
47            i += 1;
48        }
49
50        Self {
51            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
52            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
53        }
54    }
55
56    /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`.
57    ///
58    /// The byte array may contain trailing NUL bytes.
59    ///
60    /// # Example
61    ///
62    /// ```
63    /// use tinystr::tinystr;
64    /// use tinystr::TinyAsciiStr;
65    ///
66    /// assert_eq!(
67    ///     TinyAsciiStr::<3>::try_from_raw(*b"GB\0"),
68    ///     Ok(tinystr!(3, "GB"))
69    /// );
70    /// assert_eq!(
71    ///     TinyAsciiStr::<3>::try_from_raw(*b"USD"),
72    ///     Ok(tinystr!(3, "USD"))
73    /// );
74    /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_)));
75    /// ```
76    pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> {
77        Self::from_bytes_inner(&raw, 0, N, true)
78    }
79
80    /// Equivalent to [`from_bytes(bytes[start..end])`](Self::from_bytes),
81    /// but callable in a `const` context (which range indexing is not).
82    pub const fn from_bytes_manual_slice(
83        bytes: &[u8],
84        start: usize,
85        end: usize,
86    ) -> Result<Self, TinyStrError> {
87        Self::from_bytes_inner(bytes, start, end, false)
88    }
89
90    #[inline]
91    pub(crate) const fn from_bytes_inner(
92        bytes: &[u8],
93        start: usize,
94        end: usize,
95        allow_trailing_null: bool,
96    ) -> Result<Self, TinyStrError> {
97        let len = end - start;
98        if len > N {
99            return Err(TinyStrError::TooLarge { max: N, len });
100        }
101
102        let mut out = [0; N];
103        let mut i = 0;
104        let mut found_null = false;
105        // Indexing is protected by TinyStrError::TooLarge
106        #[allow(clippy::indexing_slicing)]
107        while i < len {
108            let b = bytes[start + i];
109
110            if b == 0 {
111                found_null = true;
112            } else if b >= 0x80 {
113                return Err(TinyStrError::NonAscii);
114            } else if found_null {
115                // Error if there are contentful bytes after null
116                return Err(TinyStrError::ContainsNull);
117            }
118            out[i] = b;
119
120            i += 1;
121        }
122
123        if !allow_trailing_null && found_null {
124            // We found some trailing nulls, error
125            return Err(TinyStrError::ContainsNull);
126        }
127
128        Ok(Self {
129            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
130            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
131        })
132    }
133
134    // TODO: This function shadows the FromStr trait. Rename?
135    #[inline]
136    pub const fn from_str(s: &str) -> Result<Self, TinyStrError> {
137        Self::from_bytes_inner(s.as_bytes(), 0, s.len(), false)
138    }
139
140    #[inline]
141    pub const fn as_str(&self) -> &str {
142        // as_bytes is valid utf8
143        unsafe { str::from_utf8_unchecked(self.as_bytes()) }
144    }
145
146    #[inline]
147    #[must_use]
148    pub const fn len(&self) -> usize {
149        if N <= 4 {
150            Aligned4::from_ascii_bytes(&self.bytes).len()
151        } else if N <= 8 {
152            Aligned8::from_ascii_bytes(&self.bytes).len()
153        } else {
154            let mut i = 0;
155            #[allow(clippy::indexing_slicing)] // < N is safe
156            while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 {
157                i += 1
158            }
159            i
160        }
161    }
162
163    #[inline]
164    #[must_use]
165    pub const fn is_empty(&self) -> bool {
166        self.bytes[0] as u8 == AsciiByte::B0 as u8
167    }
168
169    #[inline]
170    #[must_use]
171    pub const fn as_bytes(&self) -> &[u8] {
172        // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`,
173        // and changing the length of that slice to self.len() < N is safe.
174        unsafe {
175            core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len())
176        }
177    }
178
179    #[inline]
180    #[must_use]
181    pub const fn all_bytes(&self) -> &[u8; N] {
182        // SAFETY: `self.bytes` has same size as [u8; N]
183        unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) }
184    }
185
186    #[inline]
187    #[must_use]
188    /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`.
189    ///
190    /// If `M < len()` the string gets truncated, otherwise only the
191    /// memory representation changes.
192    pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> {
193        let mut bytes = [0; M];
194        let mut i = 0;
195        // Indexing is protected by the loop guard
196        #[allow(clippy::indexing_slicing)]
197        while i < M && i < N {
198            bytes[i] = self.bytes[i] as u8;
199            i += 1;
200        }
201        // `self.bytes` only contains ASCII bytes, with no null bytes between
202        // ASCII characters, so this also holds for `bytes`.
203        unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) }
204    }
205
206    /// # Safety
207    /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes
208    /// between ASCII characters
209    #[must_use]
210    pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self {
211        Self {
212            bytes: AsciiByte::to_ascii_byte_array(&bytes),
213        }
214    }
215}
216
217macro_rules! check_is {
218    ($self:ident, $check_int:ident, $check_u8:ident) => {
219        if N <= 4 {
220            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
221        } else if N <= 8 {
222            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
223        } else {
224            let mut i = 0;
225            // Won't panic because self.bytes has length N
226            #[allow(clippy::indexing_slicing)]
227            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
228                if !($self.bytes[i] as u8).$check_u8() {
229                    return false;
230                }
231                i += 1;
232            }
233            true
234        }
235    };
236    ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => {
237        if N <= 4 {
238            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
239        } else if N <= 8 {
240            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
241        } else {
242            // Won't panic because N is > 8
243            if ($self.bytes[0] as u8).$check_u8_0_inv() {
244                return false;
245            }
246            let mut i = 1;
247            // Won't panic because self.bytes has length N
248            #[allow(clippy::indexing_slicing)]
249            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
250                if ($self.bytes[i] as u8).$check_u8_1_inv() {
251                    return false;
252                }
253                i += 1;
254            }
255            true
256        }
257    };
258    ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => {
259        if N <= 4 {
260            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
261        } else if N <= 8 {
262            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
263        } else {
264            // Won't panic because N is > 8
265            if !($self.bytes[0] as u8).$check_u8_0_inv() {
266                return false;
267            }
268            let mut i = 1;
269            // Won't panic because self.bytes has length N
270            #[allow(clippy::indexing_slicing)]
271            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
272                if !($self.bytes[i] as u8).$check_u8_1_inv() {
273                    return false;
274                }
275                i += 1;
276            }
277            true
278        }
279    };
280}
281
282impl<const N: usize> TinyAsciiStr<N> {
283    /// Checks if the value is composed of ASCII alphabetic characters:
284    ///
285    ///  * U+0041 'A' ..= U+005A 'Z', or
286    ///  * U+0061 'a' ..= U+007A 'z'.
287    ///
288    /// # Examples
289    ///
290    /// ```
291    /// use tinystr::TinyAsciiStr;
292    ///
293    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
294    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
295    ///
296    /// assert!(s1.is_ascii_alphabetic());
297    /// assert!(!s2.is_ascii_alphabetic());
298    /// ```
299    #[inline]
300    #[must_use]
301    pub const fn is_ascii_alphabetic(&self) -> bool {
302        check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic)
303    }
304
305    /// Checks if the value is composed of ASCII alphanumeric characters:
306    ///
307    ///  * U+0041 'A' ..= U+005A 'Z', or
308    ///  * U+0061 'a' ..= U+007A 'z', or
309    ///  * U+0030 '0' ..= U+0039 '9'.
310    ///
311    /// # Examples
312    ///
313    /// ```
314    /// use tinystr::TinyAsciiStr;
315    ///
316    /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse.");
317    /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse.");
318    ///
319    /// assert!(s1.is_ascii_alphanumeric());
320    /// assert!(!s2.is_ascii_alphanumeric());
321    /// ```
322    #[inline]
323    #[must_use]
324    pub const fn is_ascii_alphanumeric(&self) -> bool {
325        check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric)
326    }
327
328    /// Checks if the value is composed of ASCII decimal digits:
329    ///
330    ///  * U+0030 '0' ..= U+0039 '9'.
331    ///
332    /// # Examples
333    ///
334    /// ```
335    /// use tinystr::TinyAsciiStr;
336    ///
337    /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse.");
338    /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse.");
339    ///
340    /// assert!(s1.is_ascii_numeric());
341    /// assert!(!s2.is_ascii_numeric());
342    /// ```
343    #[inline]
344    #[must_use]
345    pub const fn is_ascii_numeric(&self) -> bool {
346        check_is!(self, is_ascii_numeric, is_ascii_digit)
347    }
348
349    /// Checks if the value is in ASCII lower case.
350    ///
351    /// All letter characters are checked for case. Non-letter characters are ignored.
352    ///
353    /// # Examples
354    ///
355    /// ```
356    /// use tinystr::TinyAsciiStr;
357    ///
358    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
359    /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
360    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
361    ///
362    /// assert!(!s1.is_ascii_lowercase());
363    /// assert!(s2.is_ascii_lowercase());
364    /// assert!(s3.is_ascii_lowercase());
365    /// ```
366    #[inline]
367    #[must_use]
368    pub const fn is_ascii_lowercase(&self) -> bool {
369        check_is!(
370            self,
371            is_ascii_lowercase,
372            !is_ascii_uppercase,
373            !is_ascii_uppercase
374        )
375    }
376
377    /// Checks if the value is in ASCII title case.
378    ///
379    /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase.
380    /// Non-letter characters are ignored.
381    ///
382    /// # Examples
383    ///
384    /// ```
385    /// use tinystr::TinyAsciiStr;
386    ///
387    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
388    /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
389    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
390    ///
391    /// assert!(!s1.is_ascii_titlecase());
392    /// assert!(s2.is_ascii_titlecase());
393    /// assert!(s3.is_ascii_titlecase());
394    /// ```
395    #[inline]
396    #[must_use]
397    pub const fn is_ascii_titlecase(&self) -> bool {
398        check_is!(
399            self,
400            is_ascii_titlecase,
401            !is_ascii_lowercase,
402            !is_ascii_uppercase
403        )
404    }
405
406    /// Checks if the value is in ASCII upper case.
407    ///
408    /// All letter characters are checked for case. Non-letter characters are ignored.
409    ///
410    /// # Examples
411    ///
412    /// ```
413    /// use tinystr::TinyAsciiStr;
414    ///
415    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
416    /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
417    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
418    ///
419    /// assert!(!s1.is_ascii_uppercase());
420    /// assert!(s2.is_ascii_uppercase());
421    /// assert!(!s3.is_ascii_uppercase());
422    /// ```
423    #[inline]
424    #[must_use]
425    pub const fn is_ascii_uppercase(&self) -> bool {
426        check_is!(
427            self,
428            is_ascii_uppercase,
429            !is_ascii_lowercase,
430            !is_ascii_lowercase
431        )
432    }
433
434    /// Checks if the value is composed of ASCII alphabetic lower case characters:
435    ///
436    ///  * U+0061 'a' ..= U+007A 'z',
437    ///
438    /// # Examples
439    ///
440    /// ```
441    /// use tinystr::TinyAsciiStr;
442    ///
443    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
444    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
445    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
446    /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
447    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
448    ///
449    /// assert!(!s1.is_ascii_alphabetic_lowercase());
450    /// assert!(!s2.is_ascii_alphabetic_lowercase());
451    /// assert!(!s3.is_ascii_alphabetic_lowercase());
452    /// assert!(s4.is_ascii_alphabetic_lowercase());
453    /// assert!(!s5.is_ascii_alphabetic_lowercase());
454    /// ```
455    #[inline]
456    #[must_use]
457    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
458        check_is!(
459            self,
460            is_ascii_alphabetic_lowercase,
461            is_ascii_lowercase,
462            is_ascii_lowercase
463        )
464    }
465
466    /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase.
467    ///
468    /// # Examples
469    ///
470    /// ```
471    /// use tinystr::TinyAsciiStr;
472    ///
473    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
474    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
475    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
476    /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
477    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
478    ///
479    /// assert!(s1.is_ascii_alphabetic_titlecase());
480    /// assert!(!s2.is_ascii_alphabetic_titlecase());
481    /// assert!(!s3.is_ascii_alphabetic_titlecase());
482    /// assert!(!s4.is_ascii_alphabetic_titlecase());
483    /// assert!(!s5.is_ascii_alphabetic_titlecase());
484    /// ```
485    #[inline]
486    #[must_use]
487    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
488        check_is!(
489            self,
490            is_ascii_alphabetic_titlecase,
491            is_ascii_uppercase,
492            is_ascii_lowercase
493        )
494    }
495
496    /// Checks if the value is composed of ASCII alphabetic upper case characters:
497    ///
498    ///  * U+0041 'A' ..= U+005A 'Z',
499    ///
500    /// # Examples
501    ///
502    /// ```
503    /// use tinystr::TinyAsciiStr;
504    ///
505    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
506    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
507    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
508    /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
509    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
510    ///
511    /// assert!(!s1.is_ascii_alphabetic_uppercase());
512    /// assert!(!s2.is_ascii_alphabetic_uppercase());
513    /// assert!(!s3.is_ascii_alphabetic_uppercase());
514    /// assert!(s4.is_ascii_alphabetic_uppercase());
515    /// assert!(!s5.is_ascii_alphabetic_uppercase());
516    /// ```
517    #[inline]
518    #[must_use]
519    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
520        check_is!(
521            self,
522            is_ascii_alphabetic_uppercase,
523            is_ascii_uppercase,
524            is_ascii_uppercase
525        )
526    }
527}
528
529macro_rules! to {
530    ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{
531        let mut i = 0;
532        if N <= 4 {
533            let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
534            // Won't panic because self.bytes has length N and aligned has length >= N
535            #[allow(clippy::indexing_slicing)]
536            while i < N {
537                $self.bytes[i] = aligned[i];
538                i += 1;
539            }
540        } else if N <= 8 {
541            let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
542            // Won't panic because self.bytes has length N and aligned has length >= N
543            #[allow(clippy::indexing_slicing)]
544            while i < N {
545                $self.bytes[i] = aligned[i];
546                i += 1;
547            }
548        } else {
549            // Won't panic because self.bytes has length N
550            #[allow(clippy::indexing_slicing)]
551            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
552                // SAFETY: AsciiByte is repr(u8) and has same size as u8
553                unsafe {
554                    $self.bytes[i] = core::mem::transmute::<u8, AsciiByte>(
555                        ($self.bytes[i] as u8).$later_char_to()
556                    );
557                }
558                i += 1;
559            }
560            // SAFETY: AsciiByte is repr(u8) and has same size as u8
561            $(
562                $self.bytes[0] = unsafe {
563                    core::mem::transmute::<u8, AsciiByte>(($self.bytes[0] as u8).$first_char_to())
564                };
565            )?
566        }
567        $self
568    }};
569}
570
571impl<const N: usize> TinyAsciiStr<N> {
572    /// Converts this type to its ASCII lower case equivalent in-place.
573    ///
574    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged.
575    ///
576    /// # Examples
577    ///
578    /// ```
579    /// use tinystr::TinyAsciiStr;
580    ///
581    /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse.");
582    ///
583    /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3");
584    /// ```
585    #[inline]
586    #[must_use]
587    pub const fn to_ascii_lowercase(mut self) -> Self {
588        to!(self, to_ascii_lowercase, to_ascii_lowercase)
589    }
590
591    /// Converts this type to its ASCII title case equivalent in-place.
592    ///
593    /// The first character is converted to ASCII uppercase; the remaining characters
594    /// are converted to ASCII lowercase.
595    ///
596    /// # Examples
597    ///
598    /// ```
599    /// use tinystr::TinyAsciiStr;
600    ///
601    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
602    ///
603    /// assert_eq!(&*s1.to_ascii_titlecase(), "Test");
604    /// ```
605    #[inline]
606    #[must_use]
607    pub const fn to_ascii_titlecase(mut self) -> Self {
608        to!(
609            self,
610            to_ascii_titlecase,
611            to_ascii_lowercase,
612            to_ascii_uppercase
613        )
614    }
615
616    /// Converts this type to its ASCII upper case equivalent in-place.
617    ///
618    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged.
619    ///
620    /// # Examples
621    ///
622    /// ```
623    /// use tinystr::TinyAsciiStr;
624    ///
625    /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse.");
626    ///
627    /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3");
628    /// ```
629    #[inline]
630    #[must_use]
631    pub const fn to_ascii_uppercase(mut self) -> Self {
632        to!(self, to_ascii_uppercase, to_ascii_uppercase)
633    }
634}
635
636impl<const N: usize> fmt::Debug for TinyAsciiStr<N> {
637    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
638        fmt::Debug::fmt(self.as_str(), f)
639    }
640}
641
642impl<const N: usize> fmt::Display for TinyAsciiStr<N> {
643    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
644        fmt::Display::fmt(self.as_str(), f)
645    }
646}
647
648impl<const N: usize> Deref for TinyAsciiStr<N> {
649    type Target = str;
650    #[inline]
651    fn deref(&self) -> &str {
652        self.as_str()
653    }
654}
655
656impl<const N: usize> FromStr for TinyAsciiStr<N> {
657    type Err = TinyStrError;
658    #[inline]
659    fn from_str(s: &str) -> Result<Self, Self::Err> {
660        Self::from_str(s)
661    }
662}
663
664impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> {
665    fn eq(&self, other: &str) -> bool {
666        self.deref() == other
667    }
668}
669
670impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> {
671    fn eq(&self, other: &&str) -> bool {
672        self.deref() == *other
673    }
674}
675
676#[cfg(feature = "alloc")]
677impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> {
678    fn eq(&self, other: &alloc::string::String) -> bool {
679        self.deref() == other.deref()
680    }
681}
682
683#[cfg(feature = "alloc")]
684impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String {
685    fn eq(&self, other: &TinyAsciiStr<N>) -> bool {
686        self.deref() == other.deref()
687    }
688}
689
690#[cfg(test)]
691mod test {
692    use super::*;
693    use rand::distributions::Distribution;
694    use rand::distributions::Standard;
695    use rand::rngs::SmallRng;
696    use rand::seq::SliceRandom;
697    use rand::SeedableRng;
698
699    const STRINGS: [&str; 26] = [
700        "Latn",
701        "laTn",
702        "windows",
703        "AR",
704        "Hans",
705        "macos",
706        "AT",
707        "infiniband",
708        "FR",
709        "en",
710        "Cyrl",
711        "FromIntegral",
712        "NO",
713        "419",
714        "MacintoshOSX2019",
715        "a3z",
716        "A3z",
717        "A3Z",
718        "a3Z",
719        "3A",
720        "3Z",
721        "3a",
722        "3z",
723        "@@[`{",
724        "UK",
725        "E12",
726    ];
727
728    fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> {
729        let mut rng = SmallRng::seed_from_u64(2022);
730        // Need to do this in 2 steps since the RNG is needed twice
731        let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap())
732            .take(num_strings)
733            .collect::<Vec<usize>>();
734        string_lengths
735            .iter()
736            .map(|len| {
737                Standard
738                    .sample_iter(&mut rng)
739                    .filter(|b: &u8| *b > 0 && *b < 0x80)
740                    .take(*len)
741                    .collect::<Vec<u8>>()
742            })
743            .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII"))
744            .collect()
745    }
746
747    fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2)
748    where
749        F1: Fn(&str) -> T,
750        F2: Fn(TinyAsciiStr<N>) -> T,
751        T: core::fmt::Debug + core::cmp::PartialEq,
752    {
753        for s in STRINGS
754            .into_iter()
755            .map(str::to_owned)
756            .chain(gen_strings(100, &[3, 4, 5, 8, 12]))
757        {
758            let t = match TinyAsciiStr::<N>::from_str(&s) {
759                Ok(t) => t,
760                Err(TinyStrError::TooLarge { .. }) => continue,
761                Err(e) => panic!("{}", e),
762            };
763            let expected = reference_f(&s);
764            let actual = tinystr_f(t);
765            assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
766        }
767    }
768
769    #[test]
770    fn test_is_ascii_alphabetic() {
771        fn check<const N: usize>() {
772            check_operation(
773                |s| s.chars().all(|c| c.is_ascii_alphabetic()),
774                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t),
775            )
776        }
777        check::<2>();
778        check::<3>();
779        check::<4>();
780        check::<5>();
781        check::<8>();
782        check::<16>();
783    }
784
785    #[test]
786    fn test_is_ascii_alphanumeric() {
787        fn check<const N: usize>() {
788            check_operation(
789                |s| s.chars().all(|c| c.is_ascii_alphanumeric()),
790                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t),
791            )
792        }
793        check::<2>();
794        check::<3>();
795        check::<4>();
796        check::<5>();
797        check::<8>();
798        check::<16>();
799    }
800
801    #[test]
802    fn test_is_ascii_numeric() {
803        fn check<const N: usize>() {
804            check_operation(
805                |s| s.chars().all(|c| c.is_ascii_digit()),
806                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t),
807            )
808        }
809        check::<2>();
810        check::<3>();
811        check::<4>();
812        check::<5>();
813        check::<8>();
814        check::<16>();
815    }
816
817    #[test]
818    fn test_is_ascii_lowercase() {
819        fn check<const N: usize>() {
820            check_operation(
821                |s| {
822                    s == TinyAsciiStr::<16>::from_str(s)
823                        .unwrap()
824                        .to_ascii_lowercase()
825                        .as_str()
826                },
827                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t),
828            )
829        }
830        check::<2>();
831        check::<3>();
832        check::<4>();
833        check::<5>();
834        check::<8>();
835        check::<16>();
836    }
837
838    #[test]
839    fn test_is_ascii_titlecase() {
840        fn check<const N: usize>() {
841            check_operation(
842                |s| {
843                    s == TinyAsciiStr::<16>::from_str(s)
844                        .unwrap()
845                        .to_ascii_titlecase()
846                        .as_str()
847                },
848                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t),
849            )
850        }
851        check::<2>();
852        check::<3>();
853        check::<4>();
854        check::<5>();
855        check::<8>();
856        check::<16>();
857    }
858
859    #[test]
860    fn test_is_ascii_uppercase() {
861        fn check<const N: usize>() {
862            check_operation(
863                |s| {
864                    s == TinyAsciiStr::<16>::from_str(s)
865                        .unwrap()
866                        .to_ascii_uppercase()
867                        .as_str()
868                },
869                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t),
870            )
871        }
872        check::<2>();
873        check::<3>();
874        check::<4>();
875        check::<5>();
876        check::<8>();
877        check::<16>();
878    }
879
880    #[test]
881    fn test_is_ascii_alphabetic_lowercase() {
882        fn check<const N: usize>() {
883            check_operation(
884                |s| {
885                    // Check alphabetic
886                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
887                    // Check lowercase
888                    s == TinyAsciiStr::<16>::from_str(s)
889                        .unwrap()
890                        .to_ascii_lowercase()
891                        .as_str()
892                },
893                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t),
894            )
895        }
896        check::<2>();
897        check::<3>();
898        check::<4>();
899        check::<5>();
900        check::<8>();
901        check::<16>();
902    }
903
904    #[test]
905    fn test_is_ascii_alphabetic_titlecase() {
906        fn check<const N: usize>() {
907            check_operation(
908                |s| {
909                    // Check alphabetic
910                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
911                    // Check titlecase
912                    s == TinyAsciiStr::<16>::from_str(s)
913                        .unwrap()
914                        .to_ascii_titlecase()
915                        .as_str()
916                },
917                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t),
918            )
919        }
920        check::<2>();
921        check::<3>();
922        check::<4>();
923        check::<5>();
924        check::<8>();
925        check::<16>();
926    }
927
928    #[test]
929    fn test_is_ascii_alphabetic_uppercase() {
930        fn check<const N: usize>() {
931            check_operation(
932                |s| {
933                    // Check alphabetic
934                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
935                    // Check uppercase
936                    s == TinyAsciiStr::<16>::from_str(s)
937                        .unwrap()
938                        .to_ascii_uppercase()
939                        .as_str()
940                },
941                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t),
942            )
943        }
944        check::<2>();
945        check::<3>();
946        check::<4>();
947        check::<5>();
948        check::<8>();
949        check::<16>();
950    }
951
952    #[test]
953    fn test_to_ascii_lowercase() {
954        fn check<const N: usize>() {
955            check_operation(
956                |s| {
957                    s.chars()
958                        .map(|c| c.to_ascii_lowercase())
959                        .collect::<String>()
960                },
961                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(),
962            )
963        }
964        check::<2>();
965        check::<3>();
966        check::<4>();
967        check::<5>();
968        check::<8>();
969        check::<16>();
970    }
971
972    #[test]
973    fn test_to_ascii_titlecase() {
974        fn check<const N: usize>() {
975            check_operation(
976                |s| {
977                    let mut r = s
978                        .chars()
979                        .map(|c| c.to_ascii_lowercase())
980                        .collect::<String>();
981                    // Safe because the string is nonempty and an ASCII string
982                    unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() };
983                    r
984                },
985                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(),
986            )
987        }
988        check::<2>();
989        check::<3>();
990        check::<4>();
991        check::<5>();
992        check::<8>();
993        check::<16>();
994    }
995
996    #[test]
997    fn test_to_ascii_uppercase() {
998        fn check<const N: usize>() {
999            check_operation(
1000                |s| {
1001                    s.chars()
1002                        .map(|c| c.to_ascii_uppercase())
1003                        .collect::<String>()
1004                },
1005                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(),
1006            )
1007        }
1008        check::<2>();
1009        check::<3>();
1010        check::<4>();
1011        check::<5>();
1012        check::<8>();
1013        check::<16>();
1014    }
1015
1016    #[test]
1017    fn lossy_constructor() {
1018        assert_eq!(TinyAsciiStr::<4>::from_bytes_lossy(b"").as_str(), "");
1019        assert_eq!(
1020            TinyAsciiStr::<4>::from_bytes_lossy(b"oh\0o").as_str(),
1021            "oh?o"
1022        );
1023        assert_eq!(TinyAsciiStr::<4>::from_bytes_lossy(b"\0").as_str(), "?");
1024        assert_eq!(
1025            TinyAsciiStr::<4>::from_bytes_lossy(b"toolong").as_str(),
1026            "tool"
1027        );
1028        assert_eq!(
1029            TinyAsciiStr::<4>::from_bytes_lossy(&[b'a', 0x80, 0xFF, b'1']).as_str(),
1030            "a??1"
1031        );
1032    }
1033}
tinystr/ascii.rs

tinystr/
ascii.rs