Skip to main content

idna_adapter/
lib.rs

1// Copyright The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! This crate abstracts over a Unicode back end for the [`idna`][1]
10//! crate.
11//!
12//! To work around the lack of [`global-features`][2] in Cargo, this
13//! crate allows the top level `Cargo.lock` to choose an alternative
14//! Unicode back end for the `idna` crate by pinning a version of this
15//! crate.
16//!
17//! See the [README of the latest version][3] for more details.
18//!
19//! [1]: https://docs.rs/crate/idna/latest
20//! [2]: https://internals.rust-lang.org/t/pre-rfc-mutually-excusive-global-features/19618
21//! [3]: https://docs.rs/crate/idna_adapter/latest
22
23#![no_std]
24
25use icu_normalizer::uts46::Uts46MapperBorrowed;
26use icu_properties::props::GeneralCategory;
27use icu_properties::CodePointMapDataBorrowed;
28
29/// Turns a joining type into a mask for comparing with multiple type at once.
30const fn joining_type_to_mask(jt: icu_properties::props::JoiningType) -> u32 {
31    1u32 << jt.to_icu4c_value()
32}
33
34/// Mask for checking for both left and dual joining.
35pub const LEFT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask(
36    joining_type_to_mask(icu_properties::props::JoiningType::LeftJoining)
37        | joining_type_to_mask(icu_properties::props::JoiningType::DualJoining),
38);
39
40/// Mask for checking for both left and dual joining.
41pub const RIGHT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask(
42    joining_type_to_mask(icu_properties::props::JoiningType::RightJoining)
43        | joining_type_to_mask(icu_properties::props::JoiningType::DualJoining),
44);
45
46/// Turns a bidi class into a mask for comparing with multiple classes at once.
47const fn bidi_class_to_mask(bc: icu_properties::props::BidiClass) -> u32 {
48    1u32 << bc.to_icu4c_value()
49}
50
51/// Mask for checking if the domain is a bidi domain.
52pub const RTL_MASK: BidiClassMask = BidiClassMask(
53    bidi_class_to_mask(icu_properties::props::BidiClass::RightToLeft)
54        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicLetter)
55        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicNumber),
56);
57
58/// Mask for allowable bidi classes in the first character of a label
59/// (either LTR or RTL) in a bidi domain.
60pub const FIRST_BC_MASK: BidiClassMask = BidiClassMask(
61    bidi_class_to_mask(icu_properties::props::BidiClass::LeftToRight)
62        | bidi_class_to_mask(icu_properties::props::BidiClass::RightToLeft)
63        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicLetter),
64);
65
66// Mask for allowable bidi classes of the last (non-Non-Spacing Mark)
67// character in an LTR label in a bidi domain.
68pub const LAST_LTR_MASK: BidiClassMask = BidiClassMask(
69    bidi_class_to_mask(icu_properties::props::BidiClass::LeftToRight)
70        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanNumber),
71);
72
73// Mask for allowable bidi classes of the last (non-Non-Spacing Mark)
74// character in an RTL label in a bidi domain.
75pub const LAST_RTL_MASK: BidiClassMask = BidiClassMask(
76    bidi_class_to_mask(icu_properties::props::BidiClass::RightToLeft)
77        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicLetter)
78        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanNumber)
79        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicNumber),
80);
81
82// Mask for allowable bidi classes of the middle characters in an LTR label in a bidi domain.
83pub const MIDDLE_LTR_MASK: BidiClassMask = BidiClassMask(
84    bidi_class_to_mask(icu_properties::props::BidiClass::LeftToRight)
85        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanNumber)
86        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanSeparator)
87        | bidi_class_to_mask(icu_properties::props::BidiClass::CommonSeparator)
88        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanTerminator)
89        | bidi_class_to_mask(icu_properties::props::BidiClass::OtherNeutral)
90        | bidi_class_to_mask(icu_properties::props::BidiClass::BoundaryNeutral)
91        | bidi_class_to_mask(icu_properties::props::BidiClass::NonspacingMark),
92);
93
94// Mask for allowable bidi classes of the middle characters in an RTL label in a bidi domain.
95pub const MIDDLE_RTL_MASK: BidiClassMask = BidiClassMask(
96    bidi_class_to_mask(icu_properties::props::BidiClass::RightToLeft)
97        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicLetter)
98        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicNumber)
99        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanNumber)
100        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanSeparator)
101        | bidi_class_to_mask(icu_properties::props::BidiClass::CommonSeparator)
102        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanTerminator)
103        | bidi_class_to_mask(icu_properties::props::BidiClass::OtherNeutral)
104        | bidi_class_to_mask(icu_properties::props::BidiClass::BoundaryNeutral)
105        | bidi_class_to_mask(icu_properties::props::BidiClass::NonspacingMark),
106);
107
108/// Turns a genecal category into a mask for comparing with multiple categories at once.
109const fn general_category_to_mask(gc: GeneralCategory) -> u32 {
110    1 << (gc as u32)
111}
112
113/// Mask for the disallowed general categories of the first character in a label.
114const MARK_MASK: u32 = general_category_to_mask(GeneralCategory::NonspacingMark)
115    | general_category_to_mask(GeneralCategory::SpacingMark)
116    | general_category_to_mask(GeneralCategory::EnclosingMark);
117
118/// Value for the Joining_Type Unicode property.
119#[repr(transparent)]
120#[derive(#[automatically_derived]
impl ::core::clone::Clone for JoiningType {
    #[inline]
    fn clone(&self) -> JoiningType {
        let _:
                ::core::clone::AssertParamIsClone<icu_properties::props::JoiningType>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for JoiningType { }Copy)]
121pub struct JoiningType(icu_properties::props::JoiningType);
122
123impl JoiningType {
124    /// Returns the corresponding `JoiningTypeMask`.
125    #[inline(always)]
126    pub fn to_mask(self) -> JoiningTypeMask {
127        JoiningTypeMask(joining_type_to_mask(self.0))
128    }
129
130    // `true` iff this value is the Transparent value.
131    #[inline(always)]
132    pub fn is_transparent(self) -> bool {
133        self.0 == icu_properties::props::JoiningType::Transparent
134    }
135}
136
137/// A mask representing potentially multiple `JoiningType`
138/// values.
139#[repr(transparent)]
140#[derive(#[automatically_derived]
impl ::core::clone::Clone for JoiningTypeMask {
    #[inline]
    fn clone(&self) -> JoiningTypeMask {
        let _: ::core::clone::AssertParamIsClone<u32>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for JoiningTypeMask { }Copy)]
141pub struct JoiningTypeMask(u32);
142
143impl JoiningTypeMask {
144    /// `true` iff both masks have at `JoiningType` in common.
145    #[inline(always)]
146    pub fn intersects(self, other: JoiningTypeMask) -> bool {
147        self.0 & other.0 != 0
148    }
149}
150
151/// Value for the Bidi_Class Unicode property.
152#[repr(transparent)]
153#[derive(#[automatically_derived]
impl ::core::clone::Clone for BidiClass {
    #[inline]
    fn clone(&self) -> BidiClass {
        let _:
                ::core::clone::AssertParamIsClone<icu_properties::props::BidiClass>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for BidiClass { }Copy)]
154pub struct BidiClass(icu_properties::props::BidiClass);
155
156impl BidiClass {
157    /// Returns the corresponding `BidiClassMask`.
158    #[inline(always)]
159    pub fn to_mask(self) -> BidiClassMask {
160        BidiClassMask(bidi_class_to_mask(self.0))
161    }
162
163    /// `true` iff this value is Left_To_Right
164    #[inline(always)]
165    pub fn is_ltr(self) -> bool {
166        self.0 == icu_properties::props::BidiClass::LeftToRight
167    }
168
169    /// `true` iff this value is Nonspacing_Mark
170    #[inline(always)]
171    pub fn is_nonspacing_mark(self) -> bool {
172        self.0 == icu_properties::props::BidiClass::NonspacingMark
173    }
174
175    /// `true` iff this value is European_Number
176    #[inline(always)]
177    pub fn is_european_number(self) -> bool {
178        self.0 == icu_properties::props::BidiClass::EuropeanNumber
179    }
180
181    /// `true` iff this value is Arabic_Number
182    #[inline(always)]
183    pub fn is_arabic_number(self) -> bool {
184        self.0 == icu_properties::props::BidiClass::ArabicNumber
185    }
186}
187
188/// A mask representing potentially multiple `BidiClass`
189/// values.
190#[repr(transparent)]
191#[derive(#[automatically_derived]
impl ::core::clone::Clone for BidiClassMask {
    #[inline]
    fn clone(&self) -> BidiClassMask {
        let _: ::core::clone::AssertParamIsClone<u32>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for BidiClassMask { }Copy)]
192pub struct BidiClassMask(u32);
193
194impl BidiClassMask {
195    /// `true` iff both masks have at `BidiClass` in common.
196    #[inline(always)]
197    pub fn intersects(self, other: BidiClassMask) -> bool {
198        self.0 & other.0 != 0
199    }
200}
201
202/// An adapter between a Unicode back end an the `idna` crate.
203pub struct Adapter {
204    mapper: Uts46MapperBorrowed<'static>,
205    general_category: CodePointMapDataBorrowed<'static, GeneralCategory>,
206    bidi_class: CodePointMapDataBorrowed<'static, icu_properties::props::BidiClass>,
207    joining_type: CodePointMapDataBorrowed<'static, icu_properties::props::JoiningType>,
208}
209
210#[cfg(feature = "compiled_data")]
211impl Default for Adapter {
212    fn default() -> Self {
213        Self::new()
214    }
215}
216
217impl Adapter {
218    /// Constructor using data compiled into the binary.
219    #[cfg(feature = "compiled_data")]
220    #[inline(always)]
221    pub const fn new() -> Self {
222        Self {
223            mapper: Uts46MapperBorrowed::new(),
224            general_category: icu_properties::CodePointMapData::<GeneralCategory>::new(),
225            bidi_class: icu_properties::CodePointMapData::<icu_properties::props::BidiClass>::new(),
226            joining_type:
227                icu_properties::CodePointMapData::<icu_properties::props::JoiningType>::new(),
228        }
229    }
230
231    /// `true` iff the Canonical_Combining_Class of `c` is Virama.
232    #[inline(always)]
233    pub fn is_virama(&self, c: char) -> bool {
234        self.mapper.is_virama(c)
235    }
236
237    /// `true` iff the General_Category of `c` is Mark, i.e. any of Nonspacing_Mark,
238    /// Spacing_Mark, or Enclosing_Mark.
239    #[inline(always)]
240    pub fn is_mark(&self, c: char) -> bool {
241        (general_category_to_mask(self.general_category.get(c)) & MARK_MASK) != 0
242    }
243
244    /// Returns the Bidi_Class of `c`.
245    #[inline(always)]
246    pub fn bidi_class(&self, c: char) -> BidiClass {
247        BidiClass(self.bidi_class.get(c))
248    }
249
250    /// Returns the Joining_Type of `c`.
251    #[inline(always)]
252    pub fn joining_type(&self, c: char) -> JoiningType {
253        JoiningType(self.joining_type.get(c))
254    }
255
256    /// See the [method of the same name in `icu_normalizer`][1] for the
257    /// exact semantics.
258    ///
259    /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.map_normalize
260    #[inline(always)]
261    pub fn map_normalize<'delegate, I: Iterator<Item = char> + 'delegate>(
262        &'delegate self,
263        iter: I,
264    ) -> impl Iterator<Item = char> + 'delegate {
265        self.mapper.map_normalize(iter)
266    }
267
268    /// See the [method of the same name in `icu_normalizer`][1] for the
269    /// exact semantics.
270    ///
271    /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.normalize_validate
272    #[inline(always)]
273    pub fn normalize_validate<'delegate, I: Iterator<Item = char> + 'delegate>(
274        &'delegate self,
275        iter: I,
276    ) -> impl Iterator<Item = char> + 'delegate {
277        self.mapper.normalize_validate(iter)
278    }
279}