idna/
deprecated.rs

1// Copyright 2013-2014 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Deprecated API for [*Unicode IDNA Compatibility Processing*
10//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
11
12#![allow(deprecated)]
13
14use alloc::borrow::Cow;
15use alloc::string::String;
16
17use crate::uts46::*;
18use crate::Errors;
19
20/// Performs preprocessing equivalent to UTS 46 transitional processing
21/// if `transitional` is `true`. If `transitional` is `false`, merely
22/// lets the input pass through as-is (for call site convenience).
23///
24/// The output of this function is to be passed to [`Uts46::process`].
25fn map_transitional(domain: &str, transitional: bool) -> Cow<'_, str> {
26    if !transitional {
27        return Cow::Borrowed(domain);
28    }
29    let mut chars = domain.chars();
30    loop {
31        let prev = chars.clone();
32        if let Some(c) = chars.next() {
33            match c {
34                'ß' | 'ẞ' | 'ς' | '\u{200C}' | '\u{200D}' => {
35                    let mut s = String::with_capacity(domain.len());
36                    let tail = prev.as_str();
37                    let head = &domain[..domain.len() - tail.len()];
38                    s.push_str(head);
39                    for c in tail.chars() {
40                        match c {
41                            'ß' | 'ẞ' => {
42                                s.push_str("ss");
43                            }
44                            'ς' => {
45                                s.push('σ');
46                            }
47                            '\u{200C}' | '\u{200D}' => {}
48                            _ => {
49                                s.push(c);
50                            }
51                        }
52                    }
53                    return Cow::Owned(s);
54                }
55                _ => {}
56            }
57        } else {
58            break;
59        }
60    }
61    Cow::Borrowed(domain)
62}
63
64/// Deprecated. Use the crate-top-level functions or [`Uts46`].
65#[derive(Default)]
66#[deprecated]
67pub struct Idna {
68    config: Config,
69}
70
71impl Idna {
72    pub fn new(config: Config) -> Self {
73        Self { config }
74    }
75
76    /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII)
77    #[allow(clippy::wrong_self_convention)] // Retain old weirdness in deprecated API
78    pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
79        let mapped = map_transitional(domain, self.config.transitional_processing);
80        match Uts46::new().process(
81            mapped.as_bytes(),
82            self.config.deny_list(),
83            self.config.hyphens(),
84            ErrorPolicy::FailFast, // Old code did not appear to expect the output to be useful in the error case.
85            |_, _, _| false,
86            out,
87            None,
88        ) {
89            Ok(ProcessingSuccess::Passthrough) => {
90                if self.config.verify_dns_length && !verify_dns_length(&mapped, true) {
91                    return Err(crate::Errors::default());
92                }
93                out.push_str(&mapped);
94                Ok(())
95            }
96            Ok(ProcessingSuccess::WroteToSink) => {
97                if self.config.verify_dns_length && !verify_dns_length(out, true) {
98                    return Err(crate::Errors::default());
99                }
100                Ok(())
101            }
102            Err(ProcessingError::ValidityError) => Err(crate::Errors::default()),
103            Err(ProcessingError::SinkError) => unreachable!(),
104        }
105    }
106
107    /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode)
108    #[allow(clippy::wrong_self_convention)] // Retain old weirdness in deprecated API
109    pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
110        let mapped = map_transitional(domain, self.config.transitional_processing);
111        match Uts46::new().process(
112            mapped.as_bytes(),
113            self.config.deny_list(),
114            self.config.hyphens(),
115            ErrorPolicy::MarkErrors,
116            |_, _, _| true,
117            out,
118            None,
119        ) {
120            Ok(ProcessingSuccess::Passthrough) => {
121                out.push_str(&mapped);
122                Ok(())
123            }
124            Ok(ProcessingSuccess::WroteToSink) => Ok(()),
125            Err(ProcessingError::ValidityError) => Err(crate::Errors::default()),
126            Err(ProcessingError::SinkError) => unreachable!(),
127        }
128    }
129}
130
131/// Deprecated configuration API.
132#[derive(Clone, Copy)]
133#[must_use]
134#[deprecated]
135pub struct Config {
136    use_std3_ascii_rules: bool,
137    transitional_processing: bool,
138    verify_dns_length: bool,
139    check_hyphens: bool,
140}
141
142/// The defaults are that of _beStrict=false_ in the [WHATWG URL Standard](https://url.spec.whatwg.org/#idna)
143impl Default for Config {
144    fn default() -> Self {
145        Config {
146            use_std3_ascii_rules: false,
147            transitional_processing: false,
148            check_hyphens: false,
149            // Only use for to_ascii, not to_unicode
150            verify_dns_length: false,
151        }
152    }
153}
154
155impl Config {
156    /// Whether to enforce STD3 or WHATWG URL Standard ASCII deny list.
157    ///
158    /// `true` for STD3, `false` for no deny list.
159    ///
160    /// Note that `true` rejects pseudo-hosts used by various TXT record-based protocols.
161    #[inline]
162    pub fn use_std3_ascii_rules(mut self, value: bool) -> Self {
163        self.use_std3_ascii_rules = value;
164        self
165    }
166
167    /// Whether to enable (deprecated) transitional processing.
168    ///
169    /// Note that Firefox, Safari, and Chrome do not use transitional
170    /// processing.
171    #[inline]
172    pub fn transitional_processing(mut self, value: bool) -> Self {
173        self.transitional_processing = value;
174        self
175    }
176
177    /// Whether the _VerifyDNSLength_ operation should be performed
178    /// by `to_ascii`.
179    ///
180    /// For compatibility with previous behavior, even when set to `true`,
181    /// the trailing root label dot is allowed contrary to the spec.
182    #[inline]
183    pub fn verify_dns_length(mut self, value: bool) -> Self {
184        self.verify_dns_length = value;
185        self
186    }
187
188    /// Whether to enforce STD3 rules for hyphen placement.
189    ///
190    /// `true` to deny hyphens in the first and last positions.
191    /// `false` to not enforce hyphen placement.
192    ///
193    /// Note that for backward compatibility this is not the same as
194    /// UTS 46 _CheckHyphens_, which also disallows hyphens in the
195    /// third and fourth positions.
196    ///
197    /// Note that `true` rejects real-world names, including some GitHub user pages.
198    #[inline]
199    pub fn check_hyphens(mut self, value: bool) -> Self {
200        self.check_hyphens = value;
201        self
202    }
203
204    /// Obsolete method retained to ease migration. The argument must be `false`.
205    ///
206    /// Panics
207    ///
208    /// If the argument is `true`.
209    #[inline]
210    #[allow(unused_mut)]
211    pub fn use_idna_2008_rules(mut self, value: bool) -> Self {
212        assert!(!value, "IDNA 2008 rules are no longer supported");
213        self
214    }
215
216    /// Compute the deny list
217    fn deny_list(&self) -> AsciiDenyList {
218        if self.use_std3_ascii_rules {
219            AsciiDenyList::STD3
220        } else {
221            AsciiDenyList::EMPTY
222        }
223    }
224
225    /// Compute the hyphen mode
226    fn hyphens(&self) -> Hyphens {
227        if self.check_hyphens {
228            Hyphens::CheckFirstLast
229        } else {
230            Hyphens::Allow
231        }
232    }
233
234    /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII)
235    pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
236        let mut result = String::with_capacity(domain.len());
237        let mut codec = Idna::new(self);
238        codec.to_ascii(domain, &mut result).map(|()| result)
239    }
240
241    /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode)
242    pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
243        let mut codec = Idna::new(self);
244        let mut out = String::with_capacity(domain.len());
245        let result = codec.to_unicode(domain, &mut out);
246        (out, result)
247    }
248}