1// Copyright The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
89//! This module provides the lower-level API for UTS 46.
10//!
11//! [`Uts46::process`] is the core that the other convenience
12//! methods build on.
13//!
14//! UTS 46 flags map to this API as follows:
15//!
16//! * _CheckHyphens_ - _true_: [`Hyphens::Check`], _false_: [`Hyphens::Allow`]; the WHATWG URL Standard sets this to _false_ for normal (non-conformance-checker) user agents.
17//! * _CheckBidi_ - Always _true_; cannot be configured, since this flag is _true_ even when WHATWG URL Standard _beStrict_ is _false_.
18//! * _CheckJoiners_ - Always _true_; cannot be configured, since this flag is _true_ even when WHATWG URL Standard _beStrict_ is _false_.
19//! * _UseSTD3ASCIIRules_ - _true_: [`AsciiDenyList::STD3`], _false_: [`AsciiDenyList::EMPTY`]; however, the check the WHATWG URL Standard performs right after the UTS 46 invocation corresponds to [`AsciiDenyList::URL`].
20//! * _Transitional_Processing_ - Always _false_ but could be implemented as a preprocessing step. This flag is deprecated and for Web purposes the transition is over in the sense that all of Firefox, Safari, or Chrome set this flag to _false_.
21//! * _VerifyDnsLength_ - _true_: [`DnsLength::Verify`], _false_: [`DnsLength::Ignore`]; the WHATWG URL Standard sets this to _false_ for normal (non-conformance-checker) user agents.
22//! * _IgnoreInvalidPunycode_ - Always _false_; cannot be configured. (Not yet covered by the WHATWG URL Standard, but 2 out of 3 major browser clearly behave as if this was _false_).
2324use crate::punycode::Decoder;
25use crate::punycode::InternalCaller;
26use alloc::borrow::Cow;
27use alloc::string::String;
28use core::fmt::Write;
29use idna_adapter::*;
30use smallvec::SmallVec;
31use utf8_iter::Utf8CharsEx;
3233/// ICU4C-compatible constraint.
34/// https://unicode-org.atlassian.net/browse/ICU-13727
35const PUNYCODE_DECODE_MAX_INPUT_LENGTH: usize = 2000;
3637/// ICU4C-compatible constraint. (Note: ICU4C measures
38/// UTF-16 and we measure UTF-32. This means that we
39/// allow longer non-BMP inputs. For this implementation,
40/// the denial-of-service scaling does not depend on BMP vs.
41/// non-BMP: only the scalar values matter.)
42///
43/// https://unicode-org.atlassian.net/browse/ICU-13727
44const PUNYCODE_ENCODE_MAX_INPUT_LENGTH: usize = 1000;
4546/// For keeping track of what kind of numerals have been
47/// seen in an RTL label.
48#[derive(#[automatically_derived]
impl ::core::fmt::Debug for RtlNumeralState {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::write_str(f,
match self {
RtlNumeralState::Undecided => "Undecided",
RtlNumeralState::European => "European",
RtlNumeralState::Arabic => "Arabic",
})
}
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for RtlNumeralState {
#[inline]
fn eq(&self, other: &RtlNumeralState) -> bool {
let __self_discr = ::core::intrinsics::discriminant_value(self);
let __arg1_discr = ::core::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for RtlNumeralState {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_receiver_is_total_eq(&self) -> () {}
}Eq)]
49enum RtlNumeralState {
50 Undecided,
51 European,
52 Arabic,
53}
5455/// Computes the mask for upper-case ASCII.
56const fn upper_case_mask() -> u128 {
57let mut accu = 0u128;
58let mut b = 0u8;
59while b < 128 {
60if (b >= b'A') && (b <= b'Z') {
61 accu |= 1u128 << b;
62 }
63 b += 1;
64 }
65accu66}
6768/// Bit set for upper-case ASCII.
69const UPPER_CASE_MASK: u128 = upper_case_mask();
7071/// Computes the mask for glyphless ASCII.
72const fn glyphless_mask() -> u128 {
73let mut accu = 0u128;
74let mut b = 0u8;
75while b < 128 {
76if (b <= b' ') || (b == 0x7F) {
77 accu |= 1u128 << b;
78 }
79 b += 1;
80 }
81accu82}
8384/// Bit set for glyphless ASCII.
85const GLYPHLESS_MASK: u128 = glyphless_mask();
8687/// The mask for the ASCII dot.
88const DOT_MASK: u128 = 1 << b'.';
8990/// Computes the ASCII deny list for STD3 ASCII rules.
91const fn ldh_mask() -> u128 {
92let mut accu = 0u128;
93let mut b = 0u8;
94while b < 128 {
95if !((b >= b'a' && b <= b'z') || (b >= b'0' && b <= b'9') || b == b'-' || b == b'.') {
96 accu |= 1u128 << b;
97 }
98 b += 1;
99 }
100accu101}
102103const PUNYCODE_PREFIX: u32 =
104 ((b'-' as u32) << 24) | ((b'-' as u32) << 16) | ((b'N' as u32) << 8) | b'X' as u32;
105106const PUNYCODE_PREFIX_MASK: u32 = (0xFF << 24) | (0xFF << 16) | (0xDF << 8) | 0xDF;
107108fn write_punycode_label<W: Write + ?Sized>(
109 label: &[char],
110 sink: &mut W,
111) -> Result<(), ProcessingError> {
112sink.write_str("xn--")?;
113crate::punycode::encode_into::<_, _, InternalCaller>(label.iter().copied(), sink)?;
114Ok(())
115}
116117#[inline(always)]
118fn has_punycode_prefix(slice: &[u8]) -> bool {
119if slice.len() < 4 {
120return false;
121 }
122// Sadly, the optimizer doesn't figure out that more idiomatic code
123 // should compile to masking on 32-bit value.
124let a = slice[0];
125let b = slice[1];
126let c = slice[2];
127let d = slice[3];
128let u = (u32::from(d) << 24) | (u32::from(c) << 16) | (u32::from(b) << 8) | u32::from(a);
129 (u & PUNYCODE_PREFIX_MASK) == PUNYCODE_PREFIX130}
131132#[inline(always)]
133fn in_inclusive_range8(u: u8, start: u8, end: u8) -> bool {
134u.wrapping_sub(start) <= (end - start)
135}
136137#[inline(always)]
138fn in_inclusive_range_char(c: char, start: char, end: char) -> bool {
139u32::from(c).wrapping_sub(u32::from(start)) <= (u32::from(end) - u32::from(start))
140}
141142#[inline(always)]
143fn is_passthrough_ascii_label(label: &[u8]) -> bool {
144// XXX if we aren't performing _CheckHyphens_, this could
145 // check for "xn--" and pass through YouTube CDN node names.
146if label.len() >= 4 && label[2] == b'-' && label[3] == b'-' {
147return false;
148 }
149if let Some((&first, tail)) = label.split_first() {
150// We need to check the first and last character
151 // more strictly in case this turns out to be a
152 // label in a bidi domain name. This has the side
153 // effect that this function only accepts labels
154 // that also conform to the STD3 rules.
155 //
156 // XXX: If we are in the fail-fast mode (i.e. we don't need
157 // to be able to overwrite anything with U+FFFD), we could
158 // merely record that we've seen a digit here and error out
159 // if we later discover that the domain name is a bidi
160 // domain name.
161if !in_inclusive_range8(first, b'a', b'z') {
162return false;
163 }
164for &b in tail {
165// If we used LDH_MASK, we'd have to check
166 // the bytes for the ASCII range anyhow.
167if in_inclusive_range8(b, b'a', b'z') {
168continue;
169 }
170if in_inclusive_range8(b, b'0', b'9') {
171continue;
172 }
173if b == b'-' {
174continue;
175 }
176return false;
177 }
178label.last() != Some(&b'-')
179 } else {
180// empty
181true
182}
183}
184185#[inline(always)]
186fn split_ascii_fast_path_prefix(label: &[u8]) -> (&[u8], &[u8]) {
187if let Some(pos) = label.iter().position(|b| !b.is_ascii()) {
188if pos == 0 {
189// First is non-ASCII
190(&[], label)
191 } else {
192// Leave one ASCII character in the suffix
193 // in case it's a letter that a combining
194 // character combines with.
195let (head, tail) = label.split_at(pos - 1);
196 (head, tail)
197 }
198 } else {
199// All ASCII
200(label, &[])
201 }
202}
203204// Input known to be lower-case, but may contain non-ASCII.
205#[inline(always)]
206fn apply_ascii_deny_list_to_lower_cased_unicode(c: char, deny_list: u128) -> char {
207if let Some(shifted) = 1u128.checked_shl(u32::from(c)) {
208if (deny_list & shifted) == 0 {
209c210 } else {
211'\u{FFFD}'
212}
213 } else {
214c215 }
216}
217218// Input known to be ASCII, but may contain upper case ASCII.
219#[inline(always)]
220fn apply_ascii_deny_list_to_potentially_upper_case_ascii(b: u8, deny_list: u128) -> char {
221if (deny_list & (1u128 << b)) == 0 {
222return char::from(b);
223 }
224if in_inclusive_range8(b, b'A', b'Z') {
225return char::from(b + 0x20);
226 }
227'\u{FFFD}'
228}
229230#[inline(always)]
231fn is_ascii(label: &[char]) -> bool {
232for c in label.iter() {
233if !c.is_ascii() {
234return false;
235 }
236 }
237true
238}
239240#[derive(#[automatically_derived]
impl ::core::cmp::PartialEq for PunycodeClassification {
#[inline]
fn eq(&self, other: &PunycodeClassification) -> bool {
let __self_discr = ::core::intrinsics::discriminant_value(self);
let __arg1_discr = ::core::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for PunycodeClassification {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_receiver_is_total_eq(&self) -> () {}
}Eq, #[automatically_derived]
impl ::core::marker::Copy for PunycodeClassification { }Copy, #[automatically_derived]
impl ::core::clone::Clone for PunycodeClassification {
#[inline]
fn clone(&self) -> PunycodeClassification { *self }
}Clone)]
241enum PunycodeClassification {
242 Ascii,
243 Unicode,
244 Error,
245}
246247#[inline(always)]
248fn classify_for_punycode(label: &[char]) -> PunycodeClassification {
249let mut iter = label.iter().copied();
250loop {
251if let Some(c) = iter.next() {
252if c.is_ascii() {
253continue;
254 }
255if c == '\u{FFFD}' {
256return PunycodeClassification::Error;
257 }
258for c in iter {
259if c == '\u{FFFD}' {
260return PunycodeClassification::Error;
261 }
262 }
263return PunycodeClassification::Unicode;
264 }
265return PunycodeClassification::Ascii;
266 }
267}
268269/// The ASCII deny list to be applied.
270#[derive(#[automatically_derived]
impl ::core::cmp::PartialEq for AsciiDenyList {
#[inline]
fn eq(&self, other: &AsciiDenyList) -> bool { self.bits == other.bits }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for AsciiDenyList {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_receiver_is_total_eq(&self) -> () {
let _: ::core::cmp::AssertParamIsEq<u128>;
}
}Eq, #[automatically_derived]
impl ::core::marker::Copy for AsciiDenyList { }Copy, #[automatically_derived]
impl ::core::clone::Clone for AsciiDenyList {
#[inline]
fn clone(&self) -> AsciiDenyList {
let _: ::core::clone::AssertParamIsClone<u128>;
*self
}
}Clone)]
271#[repr(transparent)]
272pub struct AsciiDenyList {
273 bits: u128,
274}
275276impl AsciiDenyList {
277/// Computes (preferably at compile time) an ASCII deny list.
278 ///
279 /// Setting `deny_glyphless` to `true` denies U+0020 SPACE and below
280 /// as well as U+007F DELETE for convenience without having to list
281 /// these characters in the `deny_list` string.
282 ///
283 /// `deny_list` is the list of ASCII characters to deny. This
284 /// list must not contain any of:
285 /// * Letters
286 /// * Digits
287 /// * Hyphen
288 /// * Dot (period / full-stop)
289 /// * Non-ASCII
290 ///
291 /// # Panics
292 ///
293 /// If the deny list contains characters listed as prohibited above.
294pub const fn new(deny_glyphless: bool, deny_list: &str) -> Self {
295let mut bits = UPPER_CASE_MASK;
296if deny_glyphless {
297bits |= GLYPHLESS_MASK;
298 }
299let mut i = 0;
300let bytes = deny_list.as_bytes();
301while i < bytes.len() {
302let b = bytes[i];
303if !(b < 0x80) { ::core::panicking::panic("ASCII deny list must be ASCII.") };assert!(b < 0x80, "ASCII deny list must be ASCII.");
304// assert_ne not yet available in const context.
305if !(b != b'.') {
::core::panicking::panic("ASCII deny list must not contain the dot.")
};assert!(b != b'.', "ASCII deny list must not contain the dot.");
306if !(b != b'-') {
::core::panicking::panic("ASCII deny list must not contain the hyphen.")
};assert!(b != b'-', "ASCII deny list must not contain the hyphen.");
307if !!((b >= b'0') && (b <= b'9')) {
::core::panicking::panic("ASCII deny list must not contain digits.")
};assert!(
308 !((b >= b'0') && (b <= b'9')),
309"ASCII deny list must not contain digits."
310);
311if !!((b >= b'a') && (b <= b'z')) {
::core::panicking::panic("ASCII deny list must not contain letters.")
};assert!(
312 !((b >= b'a') && (b <= b'z')),
313"ASCII deny list must not contain letters."
314);
315if !!((b >= b'A') && (b <= b'Z')) {
::core::panicking::panic("ASCII deny list must not contain letters.")
};assert!(
316 !((b >= b'A') && (b <= b'Z')),
317"ASCII deny list must not contain letters."
318);
319 bits |= 1u128 << b;
320 i += 1;
321 }
322Self { bits }
323 }
324325/// No ASCII deny list. This corresponds to _UseSTD3ASCIIRules=false_.
326 ///
327 /// Equivalent to `AsciiDenyList::new(false, "")`.
328 ///
329 /// Note: Not denying the space and control characters can result in
330 /// strange behavior. Without a deny list provided to the UTS 46
331 /// operation, the caller is expected perform filtering afterwards,
332 /// but it's more efficient to use `AsciiDenyList` than post-processing,
333 /// because the internals of this crate can optimize away checks in
334 /// certain cases.
335pub const EMPTY: Self = Self::new(false, "");
336337/// The STD3 deny list. This corresponds to _UseSTD3ASCIIRules=true_.
338 ///
339 /// Note that this deny list rejects the underscore, which occurs in
340 /// pseudo-hosts used by various TXT record-based protocols, and also
341 /// characters that may occurs in non-DNS naming, such as NetBIOS.
342pub const STD3: Self = Self { bits: ldh_mask() };
343344/// [Forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point) from the WHATWG URL Standard.
345 ///
346 /// Equivalent to `AsciiDenyList::new(true, "%#/:<>?@[\\]^|")`.
347 ///
348 /// Note that this deny list rejects IPv6 addresses, so (as in URL
349 /// parsing) you need to check for IPv6 addresses first and not
350 /// put them through UTS 46 processing.
351pub const URL: Self = Self::new(true, "%#/:<>?@[\\]^|");
352}
353354/// The _CheckHyphens_ mode.
355#[derive(#[automatically_derived]
impl ::core::cmp::PartialEq for Hyphens {
#[inline]
fn eq(&self, other: &Hyphens) -> bool {
let __self_discr = ::core::intrinsics::discriminant_value(self);
let __arg1_discr = ::core::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for Hyphens {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_receiver_is_total_eq(&self) -> () {}
}Eq, #[automatically_derived]
impl ::core::marker::Copy for Hyphens { }Copy, #[automatically_derived]
impl ::core::clone::Clone for Hyphens {
#[inline]
fn clone(&self) -> Hyphens { *self }
}Clone)]
356#[non_exhaustive] // non_exhaustive in case a middle mode that prohibits only first and last position needs to be added
357pub enum Hyphens {
358/// _CheckHyphens=false_: Do not place positional restrictions on hyphens.
359 ///
360 /// This mode is used by the WHATWG URL Standard for normal User Agent processing
361 /// (i.e. not conformance checking).
362Allow,
363364/// Prohibit hyphens in the first and last position in the label but allow in
365 /// the third and fourth position.
366 ///
367 /// Note that this mode rejects real-world names, including some GitHub user pages.
368CheckFirstLast,
369370/// _CheckHyphens=true_: Prohibit hyphens in the first, third, fourth,
371 /// and last position in the label.
372 ///
373 /// Note that this mode rejects real-world names, including YouTube CDN nodes
374 /// and some GitHub user pages.
375Check,
376}
377378/// The UTS 46 _VerifyDNSLength_ flag.
379#[derive(#[automatically_derived]
impl ::core::cmp::PartialEq for DnsLength {
#[inline]
fn eq(&self, other: &DnsLength) -> bool {
let __self_discr = ::core::intrinsics::discriminant_value(self);
let __arg1_discr = ::core::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for DnsLength {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_receiver_is_total_eq(&self) -> () {}
}Eq, #[automatically_derived]
impl ::core::marker::Copy for DnsLength { }Copy, #[automatically_derived]
impl ::core::clone::Clone for DnsLength {
#[inline]
fn clone(&self) -> DnsLength { *self }
}Clone)]
380#[non_exhaustive]
381pub enum DnsLength {
382/// _VerifyDNSLength=false_. (Possibly relevant for allowing non-DNS naming systems.)
383Ignore,
384/// _VerifyDNSLength=true_ with the exception that the trailing root label dot is
385 /// allowed.
386VerifyAllowRootDot,
387/// _VerifyDNSLength=true_. (The trailing root label dot is not allowed.)
388Verify,
389}
390391/// Policy for customizing behavior in case of an error.
392#[derive(#[automatically_derived]
impl ::core::cmp::PartialEq for ErrorPolicy {
#[inline]
fn eq(&self, other: &ErrorPolicy) -> bool {
let __self_discr = ::core::intrinsics::discriminant_value(self);
let __arg1_discr = ::core::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for ErrorPolicy {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_receiver_is_total_eq(&self) -> () {}
}Eq, #[automatically_derived]
impl ::core::marker::Copy for ErrorPolicy { }Copy, #[automatically_derived]
impl ::core::clone::Clone for ErrorPolicy {
#[inline]
fn clone(&self) -> ErrorPolicy { *self }
}Clone)]
393#[non_exhaustive]
394pub enum ErrorPolicy {
395/// Return as early as possible without producing output in case of error.
396FailFast,
397/// In case of error, mark errors with the REPLACEMENT CHARACTER. (The output
398 /// containing REPLACEMENT CHARACTERs may be show to the user to illustrate
399 /// what was wrong but must not be used for naming in a network protocol.)
400MarkErrors,
401}
402403/// The success outcome of [`Uts46::process`]
404#[derive(#[automatically_derived]
impl ::core::cmp::PartialEq for ProcessingSuccess {
#[inline]
fn eq(&self, other: &ProcessingSuccess) -> bool {
let __self_discr = ::core::intrinsics::discriminant_value(self);
let __arg1_discr = ::core::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for ProcessingSuccess {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_receiver_is_total_eq(&self) -> () {}
}Eq, #[automatically_derived]
impl ::core::marker::Copy for ProcessingSuccess { }Copy, #[automatically_derived]
impl ::core::clone::Clone for ProcessingSuccess {
#[inline]
fn clone(&self) -> ProcessingSuccess { *self }
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for ProcessingSuccess {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::write_str(f,
match self {
ProcessingSuccess::Passthrough => "Passthrough",
ProcessingSuccess::WroteToSink => "WroteToSink",
})
}
}Debug)]
405pub enum ProcessingSuccess {
406/// There were no errors. The caller must consider the input to be the output.
407 ///
408 /// This asserts that the input can be safely passed to [`core::str::from_utf8_unchecked`].
409 ///
410 /// (Distinct from `WroteToSink` in order to allow `Cow` behavior to be implemented on top of
411 /// [`Uts46::process`].)
412Passthrough,
413414/// There were no errors. The caller must consider what was written to the sink to be the output.
415 ///
416 /// (Distinct from `Passthrough` in order to allow `Cow` behavior to be implemented on top of
417 /// [`Uts46::process`].)
418WroteToSink,
419}
420421/// The failure outcome of [`Uts46::process`]
422#[derive(#[automatically_derived]
impl ::core::cmp::PartialEq for ProcessingError {
#[inline]
fn eq(&self, other: &ProcessingError) -> bool {
let __self_discr = ::core::intrinsics::discriminant_value(self);
let __arg1_discr = ::core::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for ProcessingError {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_receiver_is_total_eq(&self) -> () {}
}Eq, #[automatically_derived]
impl ::core::marker::Copy for ProcessingError { }Copy, #[automatically_derived]
impl ::core::clone::Clone for ProcessingError {
#[inline]
fn clone(&self) -> ProcessingError { *self }
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for ProcessingError {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::write_str(f,
match self {
ProcessingError::ValidityError => "ValidityError",
ProcessingError::SinkError => "SinkError",
})
}
}Debug)]
423pub enum ProcessingError {
424/// There was a validity error according to the chosen options.
425 ///
426 /// In case of `Operation::ToAscii`, there is no output. Otherwise, output was written to the
427 /// sink and the output contains at least one U+FFFD REPLACEMENT CHARACTER to denote an error.
428ValidityError,
429430/// The sink emitted [`core::fmt::Error`]. The partial output written to the sink must not
431 /// be used.
432SinkError,
433}
434435impl From<core::fmt::Error> for ProcessingError {
436fn from(_: core::fmt::Error) -> Self {
437Self::SinkError438 }
439}
440441impl From<crate::punycode::PunycodeEncodeError> for ProcessingError {
442fn from(_: crate::punycode::PunycodeEncodeError) -> Self {
443{
::core::panicking::unreachable_display(&"Punycode overflows should not be possible due to PUNYCODE_ENCODE_MAX_INPUT_LENGTH");
};unreachable!(
444"Punycode overflows should not be possible due to PUNYCODE_ENCODE_MAX_INPUT_LENGTH"
445);
446 }
447}
448449#[derive(#[automatically_derived]
impl<'a> ::core::fmt::Debug for AlreadyAsciiLabel<'a> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
match self {
AlreadyAsciiLabel::MixedCaseAscii(__self_0) =>
::core::fmt::Formatter::debug_tuple_field1_finish(f,
"MixedCaseAscii", &__self_0),
AlreadyAsciiLabel::MixedCasePunycode(__self_0) =>
::core::fmt::Formatter::debug_tuple_field1_finish(f,
"MixedCasePunycode", &__self_0),
AlreadyAsciiLabel::Other =>
::core::fmt::Formatter::write_str(f, "Other"),
}
}
}Debug, #[automatically_derived]
impl<'a> ::core::clone::Clone for AlreadyAsciiLabel<'a> {
#[inline]
fn clone(&self) -> AlreadyAsciiLabel<'a> {
let _: ::core::clone::AssertParamIsClone<&'a [u8]>;
let _: ::core::clone::AssertParamIsClone<&'a [u8]>;
*self
}
}Clone, #[automatically_derived]
impl<'a> ::core::marker::Copy for AlreadyAsciiLabel<'a> { }Copy)]
450enum AlreadyAsciiLabel<'a> {
451 MixedCaseAscii(&'a [u8]),
452 MixedCasePunycode(&'a [u8]),
453 Other,
454}
455456/// Performs the _VerifyDNSLength_ check on the output of the _ToASCII_ operation.
457///
458/// If the second argument is `false`, the trailing root label dot is allowed.
459///
460/// # Panics
461///
462/// Panics in debug mode if the argument isn't ASCII.
463pub fn verify_dns_length(domain_name: &str, allow_trailing_dot: bool) -> bool {
464let bytes = domain_name.as_bytes();
465if true {
if !bytes.is_ascii() {
::core::panicking::panic("assertion failed: bytes.is_ascii()")
};
};debug_assert!(bytes.is_ascii());
466let domain_name_without_trailing_dot = if let Some(without) = bytes.strip_suffix(b".") {
467if !allow_trailing_dot {
468return false;
469 }
470without471 } else {
472bytes473 };
474if domain_name_without_trailing_dot.len() > 253 {
475return false;
476 }
477for label in domain_name_without_trailing_dot.split(|b| *b == b'.') {
478if label.is_empty() {
479return false;
480 }
481if label.len() > 63 {
482return false;
483 }
484 }
485true
486}
487488/// An implementation of UTS #46.
489pub struct Uts46 {
490 data: idna_adapter::Adapter,
491}
492493#[cfg(feature = "compiled_data")]
494impl Defaultfor Uts46 {
495fn default() -> Self {
496Self::new()
497 }
498}
499500impl Uts46 {
501/// Constructor using data compiled into the binary.
502#[cfg(feature = "compiled_data")]
503pub const fn new() -> Self {
504Self {
505 data: idna_adapter::Adapter::new(),
506 }
507 }
508509// XXX Should there be an `icu_provider` feature for enabling
510 // a constructor for run-time data loading?
511512/// Performs the [ToASCII](https://www.unicode.org/reports/tr46/#ToASCII) operation
513 /// from UTS #46 with the options indicated.
514 ///
515 /// # Arguments
516 ///
517 /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by
518 /// this method and input that is not well-formed UTF-8 is treated as an error. If you
519 /// already have a `&str`, call `.as_bytes()` on it.)
520 /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46
521 /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point
522 /// processing is handled via this argument. Most callers are probably the best off
523 /// by using [`AsciiDenyList::URL`] here.
524 /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best
525 /// off by using [`Hyphens::Allow`] here.
526 /// * `dns_length` - The UTS 46 _VerifyDNSLength_ flag.
527pub fn to_ascii<'a>(
528&self,
529 domain_name: &'a [u8],
530 ascii_deny_list: AsciiDenyList,
531 hyphens: Hyphens,
532 dns_length: DnsLength,
533 ) -> Result<Cow<'a, str>, crate::Errors> {
534self.to_ascii_from_cow(
535 Cow::Borrowed(domain_name),
536ascii_deny_list,
537hyphens,
538dns_length,
539 )
540 }
541542pub(crate) fn to_ascii_from_cow<'a>(
543&self,
544 domain_name: Cow<'a, [u8]>,
545 ascii_deny_list: AsciiDenyList,
546 hyphens: Hyphens,
547 dns_length: DnsLength,
548 ) -> Result<Cow<'a, str>, crate::Errors> {
549let mut s = String::new();
550match self.process(
551&domain_name,
552ascii_deny_list,
553hyphens,
554 ErrorPolicy::FailFast,
555 |_, _, _| false,
556&mut s,
557None,
558 ) {
559Ok(ProcessingSuccess::Passthrough) => {
560// SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII.
561let cow = match domain_name {
562 Cow::Borrowed(v) => Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(v) }),
563 Cow::Owned(v) => Cow::Owned(unsafe { String::from_utf8_unchecked(v) }),
564 };
565if dns_length != DnsLength::Ignore566 && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot)
567 {
568Err(crate::Errors::default())
569 } else {
570Ok(cow)
571 }
572 }
573Ok(ProcessingSuccess::WroteToSink) => {
574let cow: Cow<'_, str> = Cow::Owned(s);
575if dns_length != DnsLength::Ignore576 && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot)
577 {
578Err(crate::Errors::default())
579 } else {
580Ok(cow)
581 }
582 }
583Err(ProcessingError::ValidityError) => Err(crate::Errors::default()),
584Err(ProcessingError::SinkError) => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
585 }
586 }
587588/// Performs the [ToUnicode](https://www.unicode.org/reports/tr46/#ToUnicode) operation
589 /// from UTS #46 according to the options given. When there
590 /// are errors, there is still output, which may be rendered user, even through
591 /// the output must not be used in networking protocols. Errors are denoted
592 /// by U+FFFD REPLACEMENT CHARACTERs in the output. (That is, if the second item of the
593 /// return tuple is `Err`, the first item of the return tuple is guaranteed to contain
594 /// at least one U+FFFD.)
595 ///
596 /// Most applications probably shouldn't use this method and should be using
597 /// [`Uts46::to_user_interface`] instead.
598 ///
599 /// # Arguments
600 ///
601 /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by
602 /// this method and input that is not well-formed UTF-8 is treated as an error. If you
603 /// already have a `&str`, call `.as_bytes()` on it.)
604 /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46
605 /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point
606 /// processing is handled via this argument. Most callers are probably the best off
607 /// by using [`AsciiDenyList::URL`] here.
608 /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best
609 /// off by using [`Hyphens::Allow`] here.
610pub fn to_unicode<'a>(
611&self,
612 domain_name: &'a [u8],
613 ascii_deny_list: AsciiDenyList,
614 hyphens: Hyphens,
615 ) -> (Cow<'a, str>, Result<(), crate::Errors>) {
616self.to_user_interface(domain_name, ascii_deny_list, hyphens, |_, _, _| true)
617 }
618619/// Performs the [ToUnicode](https://www.unicode.org/reports/tr46/#ToUnicode) operation
620 /// from UTS #46 according to options given with some
621 /// error-free Unicode labels output according to
622 /// [ToASCII](https://www.unicode.org/reports/tr46/#ToASCII) instead as decided by
623 /// application policy implemented via the `output_as_unicode` closure. The purpose
624 /// is to convert user-visible domains to the Unicode form in general but to render
625 /// potentially misleading labels as Punycode.
626 ///
627 /// This is an imperfect security mechanism, because [the Punycode form itself may be
628 /// resemble a user-recognizable name](https://www.unicode.org/reports/tr36/#TablePunycodeSpoofing).
629 /// However, since this mechanism is common practice, this API provides support for The
630 /// the mechanism.
631 ///
632 /// ASCII labels always pass through as ASCII and labels with errors always pass through
633 /// as Unicode. For non-erroneous labels that contain at least one non-ASCII character
634 /// (implies non-empty), `output_as_unicode` is called with the Unicode form of the label,
635 /// the TLD (potentially empty), and a flag indicating whether the domain name as a whole
636 /// is a bidi domain name. If the return value is `true`, the label passes through as
637 /// Unicode. If the return value is `false`, the label is converted to Punycode.
638 ///
639 /// When there are errors, there is still output, which may be rendered user, even through
640 /// the output must not be used in networking protocols. Errors are denoted by
641 /// U+FFFD REPLACEMENT CHARACTERs in the output. (That is, if the second item
642 /// of the return tuple is `Err`, the first item of the return tuple is guaranteed to contain
643 /// at least one U+FFFD.) Labels that contain errors are not converted to Punycode.
644 ///
645 /// # Arguments
646 ///
647 /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by
648 /// this method and input that is not well-formed UTF-8 is treated as an error. If you
649 /// already have a `&str`, call `.as_bytes()` on it.)
650 /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46
651 /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point
652 /// processing is handled via this argument. Most callers are probably the best off
653 /// by using [`AsciiDenyList::URL`] here.
654 /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best
655 /// off by using [`Hyphens::Allow`] here.
656 /// * `output_as_unicode` - A closure for deciding if a label should be output as Unicode
657 /// (as opposed to Punycode). The first argument is the label for which a decision is
658 /// needed (always non-empty slice). The second argument is the TLD (potentially empty).
659 /// The third argument is `true` iff the domain name as a whole is a bidi domain name.
660 /// Only non-erroneous labels that contain at least one non-ASCII character are passed
661 /// to the closure as the first argument. The second and third argument values are
662 /// guaranteed to remain the same during a single call to `process`, and the closure
663 /// may cache computations derived from the second and third argument (hence the
664 /// `FnMut` type).
665pub fn to_user_interface<'a, OutputUnicode: FnMut(&[char], &[char], bool) -> bool>(
666&self,
667 domain_name: &'a [u8],
668 ascii_deny_list: AsciiDenyList,
669 hyphens: Hyphens,
670 output_as_unicode: OutputUnicode,
671 ) -> (Cow<'a, str>, Result<(), crate::Errors>) {
672let mut s = String::new();
673match self.process(
674domain_name,
675ascii_deny_list,
676hyphens,
677 ErrorPolicy::MarkErrors,
678output_as_unicode,
679&mut s,
680None,
681 ) {
682// SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII.
683Ok(ProcessingSuccess::Passthrough) => (
684 Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) }),
685Ok(()),
686 ),
687Ok(ProcessingSuccess::WroteToSink) => (Cow::Owned(s), Ok(())),
688Err(ProcessingError::ValidityError) => (Cow::Owned(s), Err(crate::Errors::default())),
689Err(ProcessingError::SinkError) => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
690 }
691 }
692693/// The lower-level function that [`Uts46::to_ascii`], [`Uts46::to_unicode`], and
694 /// [`Uts46::to_user_interface`] are built on to allow support for output types other
695 /// than `Cow<'a, str>` (e.g. string types in a non-Rust programming language).
696 ///
697 /// # Arguments
698 ///
699 /// * `domain_name` - The input domain name as UTF-8 bytes. (The UTF-8ness is checked by
700 /// this method and input that is not well-formed UTF-8 is treated as an error. If you
701 /// already have a `&str`, call `.as_bytes()` on it.)
702 /// * `ascii_deny_list` - What ASCII deny list, if any, to apply. The UTS 46
703 /// _UseSTD3ASCIIRules_ flag or the WHATWG URL Standard forbidden domain code point
704 /// processing is handled via this argument. Most callers are probably the best off
705 /// by using [`AsciiDenyList::URL`] here.
706 /// * `hyphens` - The UTS 46 _CheckHyphens_ flag. Most callers are probably the best
707 /// off by using [`Hyphens::Allow`] here.
708 /// * `error_policy` - Whether to fail fast or to produce output that may be rendered
709 /// for the user to examine in case of errors.
710 /// * `output_as_unicode` - A closure for deciding if a label should be output as Unicode
711 /// (as opposed to Punycode). The first argument is the label for which a decision is
712 /// needed (always non-empty slice). The second argument is the TLD (potentially empty).
713 /// The third argument is `true` iff the domain name as a whole is a bidi domain name.
714 /// Only non-erroneous labels that contain at least one non-ASCII character are passed
715 /// to the closure as the first argument. The second and third argument values are
716 /// guaranteed to remain the same during a single call to `process`, and the closure
717 /// may cache computations derived from the second and third argument (hence the
718 /// `FnMut` type). To perform the _ToASCII_ operation, `|_, _, _| false` must be
719 /// passed as the closure. To perform the _ToUnicode_ operation, `|_, _, _| true` must
720 /// be passed as the closure. A more complex closure may be used to prepare a domain
721 /// name for display in a user interface so that labels are converted to the Unicode
722 /// form in general but potentially misleading labels are converted to the Punycode
723 /// form.
724 /// * `sink` - The object that receives the output (in the non-passthrough case).
725 /// * `ascii_sink` - A second sink that receives the _ToASCII_ form only if there
726 /// were no errors and `sink` received at least one character of non-ASCII output.
727 /// The purpose of this argument is to enable a user interface display form of the
728 /// domain and the _ToASCII_ form of the domain to be computed efficiently together.
729 /// This argument is useless when `output_as_unicode` always returns `false`, in
730 /// which case the _ToASCII_ form ends up in `sink` already. If `ascii_sink` receives
731 /// no output and the return value is `Ok(ProcessingSuccess::WroteToSink)`, use the
732 /// output received by `sink` also as the _ToASCII_ result.
733 ///
734 /// # Return value
735 ///
736 /// * `Ok(ProcessingSuccess::Passthrough)` - The caller must treat
737 /// `unsafe { core::str::from_utf8_unchecked(domain_name) }` as the output. (This
738 /// return value asserts that calling `core::str::from_utf8_unchecked(domain_name)`
739 /// is safe.)
740 /// * `Ok(ProcessingSuccess::WroteToSink)` - The caller must treat was was written
741 /// to `sink` as the output. If another sink was passed as `ascii_sink` but it did
742 /// not receive output, the caller must treat what was written to `sink` also as
743 /// the _ToASCII_ output. Otherwise, if `ascii_sink` received output, the caller
744 /// must treat what was written to `ascii_sink` as the _ToASCII_ output.
745 /// * `Err(ProcessingError::ValidityError)` - The input was in error and must
746 /// not be used for DNS lookup or otherwise in a network protocol. If `error_policy`
747 /// was `ErrorPolicy::MarkErrors`, the output written to `sink` may be displayed
748 /// to the user as an illustration of where the error was or the errors were.
749 /// * `Err(ProcessingError::SinkError)` - Either `sink` or `ascii_sink` returned
750 /// [`core::fmt::Error`]. The partial output written to `sink` `ascii_sink` must not
751 /// be used. If `W` never returns [`core::fmt::Error`], this method never returns
752 /// `Err(ProcessingError::SinkError)`.
753 ///
754 /// # Safety-usable invariant
755 ///
756 /// If the return value is `Ok(ProcessingSuccess::Passthrough)`, `domain_name` is
757 /// ASCII and `core::str::from_utf8_unchecked(domain_name)` is safe. (Note:
758 /// Other return values do _not_ imply that `domain_name` wasn't ASCII!)
759 ///
760 /// # Security considerations
761 ///
762 /// Showing labels whose Unicode form might mislead the user as Punycode instead is
763 /// an imperfect security mechanism, because [the Punycode form itself may be resemble
764 /// a user-recognizable name](https://www.unicode.org/reports/tr36/#TablePunycodeSpoofing).
765 /// However, since this mechanism is common practice, this API provides support for the
766 /// the mechanism.
767 ///
768 /// Punycode processing is quadratic, so to avoid denial of service, this method imposes
769 /// length limits on Punycode treating especially long inputs as being in error. These
770 /// limits are well higher than the DNS length limits and are not more restrictive than
771 /// the limits imposed by ICU4C.
772#[allow(clippy::too_many_arguments)]
773pub fn process<W: Write + ?Sized, OutputUnicode: FnMut(&[char], &[char], bool) -> bool>(
774&self,
775 domain_name: &[u8],
776 ascii_deny_list: AsciiDenyList,
777 hyphens: Hyphens,
778 error_policy: ErrorPolicy,
779mut output_as_unicode: OutputUnicode,
780 sink: &mut W,
781 ascii_sink: Option<&mut W>,
782 ) -> Result<ProcessingSuccess, ProcessingError> {
783let fail_fast = error_policy == ErrorPolicy::FailFast;
784let mut domain_buffer = SmallVec::<[char; 253]>::new();
785let mut already_punycode = SmallVec::<[AlreadyAsciiLabel; 8]>::new();
786// `process_inner` could be pasted inline here, but it's out of line in order
787 // to avoid duplicating that code when monomorphizing over `W` and `OutputUnicode`.
788let (passthrough_up_to, is_bidi, had_errors) = self.process_inner(
789domain_name,
790ascii_deny_list,
791hyphens,
792fail_fast,
793&mut domain_buffer,
794&mut already_punycode,
795 );
796if passthrough_up_to == domain_name.len() {
797if true {
if !!had_errors {
::core::panicking::panic("assertion failed: !had_errors")
};
};debug_assert!(!had_errors);
798return Ok(ProcessingSuccess::Passthrough);
799 }
800// Checked only after passthrough as a micro optimization.
801if fail_fast && had_errors {
802return Err(ProcessingError::ValidityError);
803 }
804if true {
match (&had_errors, &domain_buffer.contains(&'\u{FFFD}')) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(had_errors, domain_buffer.contains(&'\u{FFFD}'));
805let without_dot = if let Some(without_dot) = domain_buffer.strip_suffix(&['.']) {
806without_dot807 } else {
808&domain_buffer[..]
809 };
810// unwrap is OK, because we always have at least one label
811let tld = without_dot.rsplit(|c| *c == '.').next().unwrap();
812let mut had_unicode_output = false;
813let mut seen_label = false;
814let mut already_punycode_iter = already_punycode.iter();
815let mut passthrough_up_to_extended = passthrough_up_to;
816let mut flushed_prefix = false;
817for label in domain_buffer.split(|c| *c == '.') {
818// Unwrap is OK, because there are supposed to be as many items in
819 // `already_punycode` as there are labels.
820let input_punycode = *already_punycode_iter.next().unwrap();
821if seen_label {
822if flushed_prefix {
823 sink.write_char('.')?;
824 } else {
825if true {
match (&domain_name[passthrough_up_to_extended], &b'.') {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.');
826 passthrough_up_to_extended += 1;
827if passthrough_up_to_extended == domain_name.len() {
828if true {
if !!had_errors {
::core::panicking::panic("assertion failed: !had_errors")
};
};debug_assert!(!had_errors);
829return Ok(ProcessingSuccess::Passthrough);
830 }
831 }
832 }
833 seen_label = true;
834835if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode {
836if let Some(first_upper_case) =
837 mixed_case.iter().position(|c| c.is_ascii_uppercase())
838 {
839let (head, tail) = mixed_case.split_at(first_upper_case);
840let slice_to_write = if flushed_prefix {
841 head
842 } else {
843 flushed_prefix = true;
844 passthrough_up_to_extended += head.len();
845if true {
match (&passthrough_up_to_extended, &domain_name.len()) {
(left_val, right_val) => {
if *left_val == *right_val {
let kind = ::core::panicking::AssertKind::Ne;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
846&domain_name[..passthrough_up_to_extended]
847 };
848// SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
849sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?;
850for c in tail.iter() {
851 sink.write_char(char::from(c.to_ascii_lowercase()))?;
852 }
853 } else if flushed_prefix {
854// SAFETY: `mixed_case` is known to be ASCII.
855sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
856 } else {
857 passthrough_up_to_extended += mixed_case.len();
858if passthrough_up_to_extended == domain_name.len() {
859if true {
if !!had_errors {
::core::panicking::panic("assertion failed: !had_errors")
};
};debug_assert!(!had_errors);
860return Ok(ProcessingSuccess::Passthrough);
861 }
862 }
863continue;
864 }
865866let potentially_punycode = if fail_fast {
867if true {
if !(classify_for_punycode(label) != PunycodeClassification::Error) {
::core::panicking::panic("assertion failed: classify_for_punycode(label) != PunycodeClassification::Error")
};
};debug_assert!(classify_for_punycode(label) != PunycodeClassification::Error);
868 !is_ascii(label)
869 } else {
870 classify_for_punycode(label) == PunycodeClassification::Unicode
871 };
872let passthrough = if potentially_punycode {
873let unicode = output_as_unicode(label, tld, is_bidi);
874 had_unicode_output |= unicode;
875 unicode
876 } else {
877true
878};
879if passthrough {
880if !flushed_prefix {
881 flushed_prefix = true;
882// SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
883sink.write_str(unsafe {
884 core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended])
885 })?;
886 }
887for c in label.iter().copied() {
888 sink.write_char(c)?;
889 }
890 } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode {
891if let Some(first_upper_case) =
892 mixed_case.iter().position(|c| c.is_ascii_uppercase())
893 {
894let (head, tail) = mixed_case.split_at(first_upper_case);
895let slice_to_write = if flushed_prefix {
896 head
897 } else {
898 flushed_prefix = true;
899 passthrough_up_to_extended += head.len();
900if true {
match (&passthrough_up_to_extended, &domain_name.len()) {
(left_val, right_val) => {
if *left_val == *right_val {
let kind = ::core::panicking::AssertKind::Ne;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
901&domain_name[..passthrough_up_to_extended]
902 };
903// SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
904sink.write_str(unsafe { core::str::from_utf8_unchecked(slice_to_write) })?;
905for c in tail.iter() {
906 sink.write_char(char::from(c.to_ascii_lowercase()))?;
907 }
908 } else if flushed_prefix {
909// SAFETY: `mixed_case` is known to be ASCII.
910sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
911 } else {
912 passthrough_up_to_extended += mixed_case.len();
913if passthrough_up_to_extended == domain_name.len() {
914if true {
if !!had_errors {
::core::panicking::panic("assertion failed: !had_errors")
};
};debug_assert!(!had_errors);
915return Ok(ProcessingSuccess::Passthrough);
916 }
917 }
918 } else {
919if !flushed_prefix {
920 flushed_prefix = true;
921// SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
922sink.write_str(unsafe {
923 core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended])
924 })?;
925 }
926 write_punycode_label(label, sink)?;
927 }
928 }
929930if had_errors {
931return Err(ProcessingError::ValidityError);
932 }
933934if had_unicode_output {
935if let Some(sink) = ascii_sink {
936let mut seen_label = false;
937let mut already_punycode_iter = already_punycode.iter();
938let mut passthrough_up_to_extended = passthrough_up_to;
939let mut flushed_prefix = false;
940for label in domain_buffer.split(|c| *c == '.') {
941// Unwrap is OK, because there are supposed to be as many items in
942 // `already_punycode` as there are labels.
943let input_punycode = *already_punycode_iter.next().unwrap();
944if seen_label {
945if flushed_prefix {
946 sink.write_char('.')?;
947 } else {
948if true {
match (&domain_name[passthrough_up_to_extended], &b'.') {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(domain_name[passthrough_up_to_extended], b'.');
949 passthrough_up_to_extended += 1;
950 }
951 }
952 seen_label = true;
953954if let AlreadyAsciiLabel::MixedCaseAscii(mixed_case) = input_punycode {
955if let Some(first_upper_case) =
956 mixed_case.iter().position(|c| c.is_ascii_uppercase())
957 {
958let (head, tail) = mixed_case.split_at(first_upper_case);
959let slice_to_write = if flushed_prefix {
960 head
961 } else {
962 flushed_prefix = true;
963 passthrough_up_to_extended += head.len();
964if true {
match (&passthrough_up_to_extended, &domain_name.len()) {
(left_val, right_val) => {
if *left_val == *right_val {
let kind = ::core::panicking::AssertKind::Ne;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
965&domain_name[..passthrough_up_to_extended]
966 };
967// SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
968sink.write_str(unsafe {
969 core::str::from_utf8_unchecked(slice_to_write)
970 })?;
971for c in tail.iter() {
972 sink.write_char(char::from(c.to_ascii_lowercase()))?;
973 }
974 } else if flushed_prefix {
975// SAFETY: `mixed_case` is known to be ASCII.
976sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
977 } else {
978 passthrough_up_to_extended += mixed_case.len();
979 }
980continue;
981 }
982983if is_ascii(label) {
984if !flushed_prefix {
985 flushed_prefix = true;
986// SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
987sink.write_str(unsafe {
988 core::str::from_utf8_unchecked(
989&domain_name[..passthrough_up_to_extended],
990 )
991 })?;
992 }
993for c in label.iter().copied() {
994 sink.write_char(c)?;
995 }
996 } else if let AlreadyAsciiLabel::MixedCasePunycode(mixed_case) = input_punycode
997 {
998if let Some(first_upper_case) =
999 mixed_case.iter().position(|c| c.is_ascii_uppercase())
1000 {
1001let (head, tail) = mixed_case.split_at(first_upper_case);
1002let slice_to_write = if flushed_prefix {
1003 head
1004 } else {
1005 flushed_prefix = true;
1006 passthrough_up_to_extended += head.len();
1007if true {
match (&passthrough_up_to_extended, &domain_name.len()) {
(left_val, right_val) => {
if *left_val == *right_val {
let kind = ::core::panicking::AssertKind::Ne;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_ne!(passthrough_up_to_extended, domain_name.len());
1008&domain_name[..passthrough_up_to_extended]
1009 };
1010// SAFETY: `mixed_case` and `domain_name` up to `passthrough_up_to_extended` are known to be ASCII.
1011sink.write_str(unsafe {
1012 core::str::from_utf8_unchecked(slice_to_write)
1013 })?;
1014for c in tail.iter() {
1015 sink.write_char(char::from(c.to_ascii_lowercase()))?;
1016 }
1017 } else if flushed_prefix {
1018// SAFETY: `mixed_case` is known to be ASCII.
1019sink.write_str(unsafe { core::str::from_utf8_unchecked(mixed_case) })?;
1020 } else {
1021 passthrough_up_to_extended += mixed_case.len();
1022 }
1023 } else {
1024if !flushed_prefix {
1025 flushed_prefix = true;
1026// SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
1027sink.write_str(unsafe {
1028 core::str::from_utf8_unchecked(
1029&domain_name[..passthrough_up_to_extended],
1030 )
1031 })?;
1032 }
1033 write_punycode_label(label, sink)?;
1034 }
1035 }
1036if !flushed_prefix {
1037// SAFETY: `domain_name` up to `passthrough_up_to_extended` is known to be ASCII.
1038sink.write_str(unsafe {
1039 core::str::from_utf8_unchecked(&domain_name[..passthrough_up_to_extended])
1040 })?;
1041 }
1042 }
1043 }
1044Ok(ProcessingSuccess::WroteToSink)
1045 }
10461047/// The part of `process` that doesn't need to be generic over the sink.
1048#[inline(always)]
1049fn process_inner<'a>(
1050&self,
1051 domain_name: &'a [u8],
1052 ascii_deny_list: AsciiDenyList,
1053 hyphens: Hyphens,
1054 fail_fast: bool,
1055 domain_buffer: &mut SmallVec<[char; 253]>,
1056 already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>,
1057 ) -> (usize, bool, bool) {
1058// Sadly, this even faster-path ASCII tier is needed to avoid regressing
1059 // performance.
1060let mut iter = domain_name.iter();
1061let mut most_recent_label_start = iter.clone();
1062loop {
1063if let Some(&b) = iter.next() {
1064if in_inclusive_range8(b, b'a', b'z') {
1065continue;
1066 }
1067if b == b'.' {
1068most_recent_label_start = iter.clone();
1069continue;
1070 }
1071return self.process_innermost(
1072domain_name,
1073ascii_deny_list,
1074hyphens,
1075fail_fast,
1076domain_buffer,
1077already_punycode,
1078most_recent_label_start.as_slice(),
1079 );
1080 } else {
1081// Success! The whole input passes through on the fastest path!
1082return (domain_name.len(), false, false);
1083 }
1084 }
1085 }
10861087/// The part of `process` that doesn't need to be generic over the sink and
1088 /// can avoid monomorphizing in the interest of code size.
1089 /// Separating this into a different stack frame compared to `process_inner`
1090 /// improves performance in the ICU4X case.
1091#[allow(clippy::too_many_arguments)]
1092 #[inline(never)]
1093fn process_innermost<'a>(
1094&self,
1095 domain_name: &'a [u8],
1096 ascii_deny_list: AsciiDenyList,
1097 hyphens: Hyphens,
1098 fail_fast: bool,
1099 domain_buffer: &mut SmallVec<[char; 253]>,
1100 already_punycode: &mut SmallVec<[AlreadyAsciiLabel<'a>; 8]>,
1101 tail: &'a [u8],
1102 ) -> (usize, bool, bool) {
1103let deny_list = ascii_deny_list.bits;
1104let deny_list_deny_dot = deny_list | DOT_MASK;
11051106let mut had_errors = false;
11071108let mut passthrough_up_to = domain_name.len() - tail.len(); // Index into `domain_name`
1109 // 253 ASCII characters is the max length for a valid domain name
1110 // (excluding the root dot).
1111let mut current_label_start; // Index into `domain_buffer`
1112let mut seen_label = false;
1113let mut in_prefix = true;
1114for label in tail.split(|b| *b == b'.') {
1115// We check for passthrough only for the prefix. That is, if we
1116 // haven't moved on and started filling `domain_buffer`. Keeping
1117 // this stuff in one loop where the first items keep being skipped
1118 // once they have been skipped at least once instead of working
1119 // this into a fancier loop structure in order to make sure that
1120 // no item from the iterator is lost or processed twice.
1121 // Furthermore, after the passthrough fails, restarting the
1122 // normalization process after each pre-existing ASCII dot also
1123 // provides an opportunity for the processing to get back onto
1124 // an ASCII fast path that bypasses the normalizer for ASCII
1125 // after a pre-existing ASCII dot (pre-existing in the sense
1126 // of not coming from e.g. normalizing an ideographic dot).
1127if in_prefix && is_passthrough_ascii_label(label) {
1128if seen_label {
1129if true {
match (&domain_name[passthrough_up_to], &b'.') {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(domain_name[passthrough_up_to], b'.');
1130 passthrough_up_to += 1;
1131 }
1132 seen_label = true;
11331134 passthrough_up_to += label.len();
1135continue;
1136 }
1137if seen_label {
1138if in_prefix {
1139if true {
match (&domain_name[passthrough_up_to], &b'.') {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(domain_name[passthrough_up_to], b'.');
1140 passthrough_up_to += 1;
1141 } else {
1142 domain_buffer.push('.');
1143 }
1144 }
1145 seen_label = true;
1146 in_prefix = false;
1147 current_label_start = domain_buffer.len();
1148if !label.is_empty() {
1149let (ascii, non_ascii) = split_ascii_fast_path_prefix(label);
1150let non_punycode_ascii_label = if non_ascii.is_empty() {
1151if has_punycode_prefix(ascii) {
1152if (ascii.last() != Some(&b'-'))
1153 && (ascii.len() - 4 <= PUNYCODE_DECODE_MAX_INPUT_LENGTH)
1154 {
1155if let Ok(decode) =
1156 Decoder::default().decode::<u8, InternalCaller>(&ascii[4..])
1157 {
1158// 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4
1159 // characters.
1160let mut label_buffer = SmallVec::<[char; 59]>::new();
1161 label_buffer.extend(decode);
11621163if self.after_punycode_decode(
1164 domain_buffer,
1165 current_label_start,
1166&label_buffer,
1167 deny_list_deny_dot,
1168 fail_fast,
1169&mut had_errors,
1170 ) {
1171return (0, false, true);
1172 }
11731174if self.check_label(
1175 hyphens,
1176&mut domain_buffer[current_label_start..],
1177 fail_fast,
1178&mut had_errors,
1179true,
1180true,
1181 ) {
1182return (0, false, true);
1183 }
1184 } else {
1185// Punycode failed
1186if fail_fast {
1187return (0, false, true);
1188 }
1189 had_errors = true;
1190 domain_buffer.push('\u{FFFD}');
1191let mut iter = ascii.iter();
1192// Discard the first character that we replaced.
1193let _ = iter.next();
1194 domain_buffer.extend(iter.map(|c| {
1195// Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does
1196 // not matter.
1197apply_ascii_deny_list_to_potentially_upper_case_ascii(
1198*c, deny_list,
1199 )
1200 }));
1201 };
1202// If there were errors, we won't be trying to use this
1203 // anyway later, so it's fine to put it here unconditionally.
1204already_punycode.push(AlreadyAsciiLabel::MixedCasePunycode(label));
1205continue;
1206 } else if fail_fast {
1207return (0, false, true);
1208 }
1209// Else fall through to the complex path and rediscover error
1210 // there.
1211false
1212} else {
1213true
1214}
1215 } else {
1216false
1217};
1218for c in ascii.iter().map(|c| {
1219// Can't have dot here, so `deny_list` vs `deny_list_deny_dot` does
1220 // not matter.
1221apply_ascii_deny_list_to_potentially_upper_case_ascii(*c, deny_list)
1222 }) {
1223if c == '\u{FFFD}' {
1224if fail_fast {
1225return (0, false, true);
1226 }
1227 had_errors = true;
1228 }
1229 domain_buffer.push(c);
1230 }
1231if non_punycode_ascii_label {
1232if hyphens != Hyphens::Allow
1233 && check_hyphens(
1234&mut domain_buffer[current_label_start..],
1235 hyphens == Hyphens::CheckFirstLast,
1236 fail_fast,
1237&mut had_errors,
1238 )
1239 {
1240return (0, false, true);
1241 }
1242 already_punycode.push(if had_errors {
1243 AlreadyAsciiLabel::Other
1244 } else {
1245 AlreadyAsciiLabel::MixedCaseAscii(label)
1246 });
1247continue;
1248 }
1249 already_punycode.push(AlreadyAsciiLabel::Other);
1250let mut first_needs_combining_mark_check = ascii.is_empty();
1251let mut needs_contextj_check = !non_ascii.is_empty();
1252let mut mapping = self
1253.data
1254 .map_normalize(non_ascii.chars())
1255 .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list));
1256loop {
1257let n = mapping.next();
1258match n {
1259None | Some('.') => {
1260if domain_buffer[current_label_start..]
1261 .starts_with(&['x', 'n', '-', '-'])
1262 {
1263let mut punycode_precondition_failed = false;
1264for c in domain_buffer[current_label_start + 4..].iter_mut() {
1265if !c.is_ascii() {
1266if fail_fast {
1267return (0, false, true);
1268 }
1269 had_errors = true;
1270*c = '\u{FFFD}';
1271 punycode_precondition_failed = true;
1272 }
1273 }
12741275if let Some(last) = domain_buffer.last_mut() {
1276if *last == '-' {
1277// Either there's nothing after the "xn--" prefix
1278 // and we got the last hyphen of "xn--", or there
1279 // are no Punycode digits after the last delimiter
1280 // which would result in Punycode decode outputting
1281 // ASCII only.
1282if fail_fast {
1283return (0, false, true);
1284 }
1285 had_errors = true;
1286*last = '\u{FFFD}';
1287 punycode_precondition_failed = true;
1288 }
1289 } else {
1290::core::panicking::panic("internal error: entered unreachable code");unreachable!();
1291 }
12921293// Reject excessively long input
1294 // https://github.com/whatwg/url/issues/824
1295 // https://unicode-org.atlassian.net/browse/ICU-13727
1296if domain_buffer.len() - current_label_start - 4
1297> PUNYCODE_DECODE_MAX_INPUT_LENGTH
1298 {
1299if fail_fast {
1300return (0, false, true);
1301 }
1302 had_errors = true;
1303 domain_buffer[current_label_start
1304 + 4
1305+ PUNYCODE_DECODE_MAX_INPUT_LENGTH] = '\u{FFFD}';
1306 punycode_precondition_failed = true;
1307 }
13081309if !punycode_precondition_failed {
1310if let Ok(decode) = Decoder::default()
1311 .decode::<char, InternalCaller>(
1312&domain_buffer[current_label_start + 4..],
1313 )
1314 {
1315 first_needs_combining_mark_check = true;
1316 needs_contextj_check = true;
1317// 63 ASCII characters is the max length for a valid DNS label and xn-- takes 4
1318 // characters.
1319let mut label_buffer = SmallVec::<[char; 59]>::new();
1320 label_buffer.extend(decode);
13211322 domain_buffer.truncate(current_label_start);
1323if self.after_punycode_decode(
1324 domain_buffer,
1325 current_label_start,
1326&label_buffer,
1327 deny_list_deny_dot,
1328 fail_fast,
1329&mut had_errors,
1330 ) {
1331return (0, false, true);
1332 }
1333 } else {
1334// Punycode failed
1335if fail_fast {
1336return (0, false, true);
1337 }
1338 had_errors = true;
1339 domain_buffer[current_label_start] = '\u{FFFD}';
1340 needs_contextj_check = false; // ASCII label
1341first_needs_combining_mark_check = false;
1342 };
1343 } else {
1344 first_needs_combining_mark_check = false;
1345 needs_contextj_check = false; // Non-ASCII already turned to U+FFFD.
1346}
1347 }
1348if self.check_label(
1349 hyphens,
1350&mut domain_buffer[current_label_start..],
1351 fail_fast,
1352&mut had_errors,
1353 first_needs_combining_mark_check,
1354 needs_contextj_check,
1355 ) {
1356return (0, false, true);
1357 }
13581359if n.is_none() {
1360break;
1361 }
1362 domain_buffer.push('.');
1363 current_label_start = domain_buffer.len();
1364 first_needs_combining_mark_check = true;
1365 needs_contextj_check = true;
1366 already_punycode.push(AlreadyAsciiLabel::Other);
1367 }
1368Some(c) => {
1369if c == '\u{FFFD}' {
1370if fail_fast {
1371return (0, false, true);
1372 }
1373 had_errors = true;
1374 }
1375 domain_buffer.push(c);
1376 }
1377 }
1378 }
1379 } else {
1380// Empty label
1381already_punycode.push(AlreadyAsciiLabel::MixedCaseAscii(label));
1382 }
1383 }
13841385let is_bidi = self.is_bidi(domain_buffer);
1386if is_bidi {
1387for label in domain_buffer.split_mut(|c| *c == '.') {
1388if let Some((first, tail)) = label.split_first_mut() {
1389let first_bc = self.data.bidi_class(*first);
1390if !FIRST_BC_MASK.intersects(first_bc.to_mask()) {
1391// Neither RTL label nor LTR label
1392if fail_fast {
1393return (0, false, true);
1394 }
1395 had_errors = true;
1396*first = '\u{FFFD}';
1397continue;
1398 }
1399let is_ltr = first_bc.is_ltr();
1400// Trim NSM
1401let mut middle = tail;
1402#[allow(clippy::while_let_loop)]
1403loop {
1404if let Some((last, prior)) = middle.split_last_mut() {
1405let last_bc = self.data.bidi_class(*last);
1406if last_bc.is_nonspacing_mark() {
1407 middle = prior;
1408continue;
1409 }
1410let last_mask = if is_ltr { LAST_LTR_MASK } else { LAST_RTL_MASK };
1411if !last_mask.intersects(last_bc.to_mask()) {
1412if fail_fast {
1413return (0, false, true);
1414 }
1415 had_errors = true;
1416*last = '\u{FFFD}';
1417 }
1418if is_ltr {
1419for c in prior.iter_mut() {
1420let bc = self.data.bidi_class(*c);
1421if !MIDDLE_LTR_MASK.intersects(bc.to_mask()) {
1422if fail_fast {
1423return (0, false, true);
1424 }
1425 had_errors = true;
1426*c = '\u{FFFD}';
1427 }
1428 }
1429 } else {
1430let mut numeral_state = RtlNumeralState::Undecided;
1431for c in prior.iter_mut() {
1432let bc = self.data.bidi_class(*c);
1433if !MIDDLE_RTL_MASK.intersects(bc.to_mask()) {
1434if fail_fast {
1435return (0, false, true);
1436 }
1437 had_errors = true;
1438*c = '\u{FFFD}';
1439 } else {
1440match numeral_state {
1441 RtlNumeralState::Undecided => {
1442if bc.is_european_number() {
1443 numeral_state = RtlNumeralState::European;
1444 } else if bc.is_arabic_number() {
1445 numeral_state = RtlNumeralState::Arabic;
1446 }
1447 }
1448 RtlNumeralState::European => {
1449if bc.is_arabic_number() {
1450if fail_fast {
1451return (0, false, true);
1452 }
1453 had_errors = true;
1454*c = '\u{FFFD}';
1455 }
1456 }
1457 RtlNumeralState::Arabic => {
1458if bc.is_european_number() {
1459if fail_fast {
1460return (0, false, true);
1461 }
1462 had_errors = true;
1463*c = '\u{FFFD}';
1464 }
1465 }
1466 }
1467 }
1468 }
1469if (numeral_state == RtlNumeralState::European
1470 && last_bc.is_arabic_number())
1471 || (numeral_state == RtlNumeralState::Arabic
1472 && last_bc.is_european_number())
1473 {
1474if fail_fast {
1475return (0, false, true);
1476 }
1477 had_errors = true;
1478*last = '\u{FFFD}';
1479 }
1480 }
1481break;
1482 } else {
1483// One-character label or label where
1484 // everything after the first character
1485 // is just non-spacing marks.
1486break;
1487 }
1488 }
1489 }
1490 }
1491 }
14921493 (passthrough_up_to, is_bidi, had_errors)
1494 }
14951496#[inline(never)]
1497fn after_punycode_decode(
1498&self,
1499 domain_buffer: &mut SmallVec<[char; 253]>,
1500 current_label_start: usize,
1501 label_buffer: &[char],
1502 deny_list_deny_dot: u128,
1503 fail_fast: bool,
1504 had_errors: &mut bool,
1505 ) -> bool {
1506for c in self
1507.data
1508 .normalize_validate(label_buffer.iter().copied())
1509 .map(|c| apply_ascii_deny_list_to_lower_cased_unicode(c, deny_list_deny_dot))
1510 {
1511if c == '\u{FFFD}' {
1512if fail_fast {
1513return true;
1514 }
1515*had_errors = true;
1516 }
1517 domain_buffer.push(c);
1518 }
1519let normalized = &mut domain_buffer[current_label_start..];
1520if let Err(()) =
1521normalized1522 .iter_mut()
1523 .zip(label_buffer.iter())
1524 .try_for_each(|(norm_c, decoded_c)| {
1525if *norm_c == *decoded_c {
1526Ok(())
1527 } else {
1528// Mark the first difference
1529*norm_c = '\u{FFFD}';
1530Err(())
1531 }
1532 })
1533 {
1534if fail_fast {
1535return true;
1536 }
1537*had_errors = true;
1538 }
1539false
1540}
15411542#[inline(never)]
1543fn check_label(
1544&self,
1545 hyphens: Hyphens,
1546 mut_label: &mut [char],
1547 fail_fast: bool,
1548 had_errors: &mut bool,
1549 first_needs_combining_mark_check: bool,
1550 needs_contextj_check: bool,
1551 ) -> bool {
1552if hyphens != Hyphens::Allow1553 && check_hyphens(
1554mut_label,
1555hyphens == Hyphens::CheckFirstLast,
1556fail_fast,
1557had_errors,
1558 )
1559 {
1560return true;
1561 }
1562if first_needs_combining_mark_check {
1563if let Some(first) = mut_label.first_mut() {
1564if self.data.is_mark(*first) {
1565if fail_fast {
1566return true;
1567 }
1568*had_errors = true;
1569*first = '\u{FFFD}';
1570 }
1571 }
1572 }
1573if needs_contextj_check {
1574// ContextJ
1575for i in 0..mut_label.len() {
1576let c = mut_label[i];
1577if !in_inclusive_range_char(c, '\u{200C}', '\u{200D}') {
1578continue;
1579 }
1580let (head, joiner_and_tail) = mut_label.split_at_mut(i);
15811582if let Some((joiner, tail)) = joiner_and_tail.split_first_mut() {
1583if let Some(previous) = head.last() {
1584if self.data.is_virama(*previous) {
1585continue;
1586 }
1587 } else {
1588// No preceding character
1589if fail_fast {
1590return true;
1591 }
1592*had_errors = true;
1593*joiner = '\u{FFFD}';
1594continue;
1595 }
1596if c == '\u{200D}' {
1597// ZWJ only has the virama rule
1598if fail_fast {
1599return true;
1600 }
1601*had_errors = true;
1602*joiner = '\u{FFFD}';
1603continue;
1604 }
1605if true {
match (&c, &'\u{200C}') {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(c, '\u{200C}');
1606if !self.has_appropriately_joining_char(
1607 head.iter().rev().copied(),
1608 LEFT_OR_DUAL_JOINING_MASK,
1609 ) || !self.has_appropriately_joining_char(
1610 tail.iter().copied(),
1611 RIGHT_OR_DUAL_JOINING_MASK,
1612 ) {
1613if fail_fast {
1614return true;
1615 }
1616*had_errors = true;
1617*joiner = '\u{FFFD}';
1618 }
1619 } else {
1620if true {
if !false { ::core::panicking::panic("assertion failed: false") };
};debug_assert!(false);
1621 }
1622 }
1623 }
16241625if !is_ascii(mut_label) && mut_label.len() > PUNYCODE_ENCODE_MAX_INPUT_LENGTH {
1626// Limit quadratic behavior
1627 // https://github.com/whatwg/url/issues/824
1628 // https://unicode-org.atlassian.net/browse/ICU-13727
1629if fail_fast {
1630return true;
1631 }
1632*had_errors = true;
1633mut_label[PUNYCODE_ENCODE_MAX_INPUT_LENGTH] = '\u{FFFD}';
1634 }
1635false
1636}
16371638#[inline(always)]
1639fn has_appropriately_joining_char<I: Iterator<Item = char>>(
1640&self,
1641 iter: I,
1642 required_mask: JoiningTypeMask,
1643 ) -> bool {
1644for c in iter {
1645let jt = self.data.joining_type(c);
1646if jt.to_mask().intersects(required_mask) {
1647return true;
1648 }
1649if jt.is_transparent() {
1650continue;
1651 }
1652return false;
1653 }
1654false
1655}
16561657#[inline(always)]
1658fn is_bidi(&self, buffer: &[char]) -> bool {
1659for &c in buffer {
1660if c < '\u{0590}' {
1661// Below Hebrew
1662continue;
1663 }
1664if in_inclusive_range_char(c, '\u{0900}', '\u{FB1C}') {
1665if true {
match (&c, &'\u{200F}') {
(left_val, right_val) => {
if *left_val == *right_val {
let kind = ::core::panicking::AssertKind::Ne;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_ne!(c, '\u{200F}'); // disallowed
1666continue;
1667 }
1668if in_inclusive_range_char(c, '\u{1F000}', '\u{3FFFF}') {
1669continue;
1670 }
1671if in_inclusive_range_char(c, '\u{FF00}', '\u{107FF}') {
1672continue;
1673 }
1674if in_inclusive_range_char(c, '\u{11000}', '\u{1E7FF}') {
1675continue;
1676 }
1677if RTL_MASK.intersects(self.data.bidi_class(c).to_mask()) {
1678return true;
1679 }
1680 }
1681false
1682}
1683}
16841685fn check_hyphens(
1686 mut_label: &mut [char],
1687 allow_third_fourth: bool,
1688 fail_fast: bool,
1689 had_errors: &mut bool,
1690) -> bool {
1691if let Some(first) = mut_label.first_mut() {
1692if *first == '-' {
1693if fail_fast {
1694return true;
1695 }
1696*had_errors = true;
1697*first = '\u{FFFD}';
1698 }
1699 }
1700if let Some(last) = mut_label.last_mut() {
1701if *last == '-' {
1702if fail_fast {
1703return true;
1704 }
1705*had_errors = true;
1706*last = '\u{FFFD}';
1707 }
1708 }
1709if allow_third_fourth {
1710return false;
1711 }
1712if mut_label.len() >= 4 && mut_label[2] == '-' && mut_label[3] == '-' {
1713if fail_fast {
1714return true;
1715 }
1716*had_errors = true;
1717mut_label[2] = '\u{FFFD}';
1718mut_label[3] = '\u{FFFD}';
1719 }
1720false
1721}