url/
host.rs

1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use crate::net::{Ipv4Addr, Ipv6Addr};
10use alloc::borrow::Cow;
11use alloc::borrow::ToOwned;
12use alloc::string::String;
13use alloc::vec::Vec;
14use core::cmp;
15use core::fmt::{self, Formatter};
16
17use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
18#[cfg(feature = "serde")]
19use serde_derive::{Deserialize, Serialize};
20
21use crate::parser::{ParseError, ParseResult};
22
23#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
24#[derive(#[automatically_derived]
impl ::core::marker::Copy for HostInternal { }Copy, #[automatically_derived]
impl ::core::clone::Clone for HostInternal {
    #[inline]
    fn clone(&self) -> HostInternal {
        let _: ::core::clone::AssertParamIsClone<Ipv4Addr>;
        let _: ::core::clone::AssertParamIsClone<Ipv6Addr>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for HostInternal {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            HostInternal::None =>
                ::core::fmt::Formatter::write_str(f, "None"),
            HostInternal::Domain =>
                ::core::fmt::Formatter::write_str(f, "Domain"),
            HostInternal::Ipv4(__self_0) =>
                ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Ipv4",
                    &__self_0),
            HostInternal::Ipv6(__self_0) =>
                ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Ipv6",
                    &__self_0),
        }
    }
}Debug, #[automatically_derived]
impl ::core::cmp::Eq for HostInternal {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<Ipv4Addr>;
        let _: ::core::cmp::AssertParamIsEq<Ipv6Addr>;
    }
}Eq, #[automatically_derived]
impl ::core::cmp::PartialEq for HostInternal {
    #[inline]
    fn eq(&self, other: &HostInternal) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr &&
            match (self, other) {
                (HostInternal::Ipv4(__self_0), HostInternal::Ipv4(__arg1_0))
                    => __self_0 == __arg1_0,
                (HostInternal::Ipv6(__self_0), HostInternal::Ipv6(__arg1_0))
                    => __self_0 == __arg1_0,
                _ => true,
            }
    }
}PartialEq)]
25pub(crate) enum HostInternal {
26    None,
27    Domain,
28    Ipv4(Ipv4Addr),
29    Ipv6(Ipv6Addr),
30}
31
32impl From<Host<Cow<'_, str>>> for HostInternal {
33    fn from(host: Host<Cow<'_, str>>) -> Self {
34        match host {
35            Host::Domain(ref s) if s.is_empty() => Self::None,
36            Host::Domain(_) => Self::Domain,
37            Host::Ipv4(address) => Self::Ipv4(address),
38            Host::Ipv6(address) => Self::Ipv6(address),
39        }
40    }
41}
42
43/// The host name of an URL.
44#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
45#[derive(#[automatically_derived]
impl<S: ::core::clone::Clone> ::core::clone::Clone for Host<S> {
    #[inline]
    fn clone(&self) -> Host<S> {
        match self {
            Host::Domain(__self_0) =>
                Host::Domain(::core::clone::Clone::clone(__self_0)),
            Host::Ipv4(__self_0) =>
                Host::Ipv4(::core::clone::Clone::clone(__self_0)),
            Host::Ipv6(__self_0) =>
                Host::Ipv6(::core::clone::Clone::clone(__self_0)),
        }
    }
}Clone, #[automatically_derived]
impl<S: ::core::fmt::Debug> ::core::fmt::Debug for Host<S> {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            Host::Domain(__self_0) =>
                ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Domain",
                    &__self_0),
            Host::Ipv4(__self_0) =>
                ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Ipv4",
                    &__self_0),
            Host::Ipv6(__self_0) =>
                ::core::fmt::Formatter::debug_tuple_field1_finish(f, "Ipv6",
                    &__self_0),
        }
    }
}Debug, #[automatically_derived]
impl<S: ::core::cmp::Eq> ::core::cmp::Eq for Host<S> {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<S>;
        let _: ::core::cmp::AssertParamIsEq<Ipv4Addr>;
        let _: ::core::cmp::AssertParamIsEq<Ipv6Addr>;
    }
}Eq, #[automatically_derived]
impl<S: ::core::cmp::Ord> ::core::cmp::Ord for Host<S> {
    #[inline]
    fn cmp(&self, other: &Host<S>) -> ::core::cmp::Ordering {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match ::core::cmp::Ord::cmp(&__self_discr, &__arg1_discr) {
            ::core::cmp::Ordering::Equal =>
                match (self, other) {
                    (Host::Domain(__self_0), Host::Domain(__arg1_0)) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (Host::Ipv4(__self_0), Host::Ipv4(__arg1_0)) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (Host::Ipv6(__self_0), Host::Ipv6(__arg1_0)) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    _ => unsafe { ::core::intrinsics::unreachable() }
                },
            cmp => cmp,
        }
    }
}Ord, #[automatically_derived]
impl<S: ::core::cmp::PartialOrd> ::core::cmp::PartialOrd for Host<S> {
    #[inline]
    fn partial_cmp(&self, other: &Host<S>)
        -> ::core::option::Option<::core::cmp::Ordering> {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match (self, other) {
            (Host::Domain(__self_0), Host::Domain(__arg1_0)) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (Host::Ipv4(__self_0), Host::Ipv4(__arg1_0)) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (Host::Ipv6(__self_0), Host::Ipv6(__arg1_0)) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            _ =>
                ::core::cmp::PartialOrd::partial_cmp(&__self_discr,
                    &__arg1_discr),
        }
    }
}PartialOrd, #[automatically_derived]
impl<S: ::core::hash::Hash> ::core::hash::Hash for Host<S> {
    #[inline]
    fn hash<__H: ::core::hash::Hasher>(&self, state: &mut __H) -> () {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        ::core::hash::Hash::hash(&__self_discr, state);
        match self {
            Host::Domain(__self_0) =>
                ::core::hash::Hash::hash(__self_0, state),
            Host::Ipv4(__self_0) => ::core::hash::Hash::hash(__self_0, state),
            Host::Ipv6(__self_0) => ::core::hash::Hash::hash(__self_0, state),
        }
    }
}Hash)]
46pub enum Host<S = String> {
47    /// A DNS domain name, as '.' dot-separated labels.
48    /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
49    /// a special URL, or percent encoded for non-special URLs. Hosts for
50    /// non-special URLs are also called opaque hosts.
51    Domain(S),
52
53    /// An IPv4 address.
54    /// `Url::host_str` returns the serialization of this address,
55    /// as four decimal integers separated by `.` dots.
56    Ipv4(Ipv4Addr),
57
58    /// An IPv6 address.
59    /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
60    /// in the format per [RFC 5952 *A Recommendation
61    /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
62    /// lowercase hexadecimal with maximal `::` compression.
63    Ipv6(Ipv6Addr),
64}
65
66impl Host<&str> {
67    /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
68    pub fn to_owned(&self) -> Host<String> {
69        match *self {
70            Host::Domain(domain) => Host::Domain(domain.to_owned()),
71            Host::Ipv4(address) => Host::Ipv4(address),
72            Host::Ipv6(address) => Host::Ipv6(address),
73        }
74    }
75}
76
77impl Host<String> {
78    /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
79    ///
80    /// <https://url.spec.whatwg.org/#host-parsing>
81    pub fn parse(input: &str) -> Result<Self, ParseError> {
82        Host::<Cow<str>>::parse_cow(input.into()).map(|i| i.into_owned())
83    }
84
85    /// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
86    pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
87        Host::<Cow<str>>::parse_opaque_cow(input.into()).map(|i| i.into_owned())
88    }
89}
90
91impl<'a> Host<Cow<'a, str>> {
92    pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
93        if input.starts_with('[') {
94            if !input.ends_with(']') {
95                return Err(ParseError::InvalidIpv6Address);
96            }
97            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
98        }
99        let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into();
100        let domain: Cow<'a, [u8]> = match domain {
101            Cow::Owned(v) => Cow::Owned(v),
102            // if borrowed then we can use the original cow
103            Cow::Borrowed(_) => match input {
104                Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()),
105                Cow::Owned(input) => Cow::Owned(input.into_bytes()),
106            },
107        };
108
109        let domain = idna::domain_to_ascii_from_cow(domain, idna::AsciiDenyList::URL)?;
110
111        if domain.is_empty() {
112            return Err(ParseError::EmptyHost);
113        }
114
115        if ends_in_a_number(&domain) {
116            let address = parse_ipv4addr(&domain)?;
117            Ok(Host::Ipv4(address))
118        } else {
119            Ok(Host::Domain(domain))
120        }
121    }
122
123    pub(crate) fn parse_opaque_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
124        if input.starts_with('[') {
125            if !input.ends_with(']') {
126                return Err(ParseError::InvalidIpv6Address);
127            }
128            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
129        }
130
131        let is_invalid_host_char = |c| {
132            #[allow(non_exhaustive_omitted_patterns)] match c {
    '\0' | '\t' | '\n' | '\r' | ' ' | '#' | '/' | ':' | '<' | '>' | '?' | '@'
        | '[' | '\\' | ']' | '^' | '|' => true,
    _ => false,
}matches!(
133                c,
134                '\0' | '\t'
135                    | '\n'
136                    | '\r'
137                    | ' '
138                    | '#'
139                    | '/'
140                    | ':'
141                    | '<'
142                    | '>'
143                    | '?'
144                    | '@'
145                    | '['
146                    | '\\'
147                    | ']'
148                    | '^'
149                    | '|'
150            )
151        };
152
153        if input.find(is_invalid_host_char).is_some() {
154            return Err(ParseError::InvalidDomainCharacter);
155        }
156
157        // Call utf8_percent_encode and use the result.
158        // Note: This returns Cow::Borrowed for single-item results (either from input
159        // or from the static encoding table), and Cow::Owned for multi-item results.
160        // We cannot distinguish between "borrowed from input" vs "borrowed from static table"
161        // based on the Cow variant alone.
162        Ok(Host::Domain(
163            match utf8_percent_encode(&input, CONTROLS).into() {
164                Cow::Owned(v) => Cow::Owned(v),
165                // If we're borrowing, we need to check if it's the same as the input
166                Cow::Borrowed(v) => {
167                    if v == &*input {
168                        input // No encoding happened, reuse original
169                    } else {
170                        Cow::Owned(v.to_owned()) // Borrowed from static table, need to own it
171                    }
172                }
173            },
174        ))
175    }
176
177    pub(crate) fn into_owned(self) -> Host<String> {
178        match self {
179            Host::Domain(s) => Host::Domain(s.into_owned()),
180            Host::Ipv4(ip) => Host::Ipv4(ip),
181            Host::Ipv6(ip) => Host::Ipv6(ip),
182        }
183    }
184}
185
186impl<S: AsRef<str>> fmt::Display for Host<S> {
187    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
188        match *self {
189            Self::Domain(ref domain) => domain.as_ref().fmt(f),
190            Self::Ipv4(ref addr) => addr.fmt(f),
191            Self::Ipv6(ref addr) => {
192                f.write_str("[")?;
193                write_ipv6(addr, f)?;
194                f.write_str("]")
195            }
196        }
197    }
198}
199
200impl<S, T> PartialEq<Host<T>> for Host<S>
201where
202    S: PartialEq<T>,
203{
204    fn eq(&self, other: &Host<T>) -> bool {
205        match (self, other) {
206            (Self::Domain(a), Host::Domain(b)) => a == b,
207            (Self::Ipv4(a), Host::Ipv4(b)) => a == b,
208            (Self::Ipv6(a), Host::Ipv6(b)) => a == b,
209            (_, _) => false,
210        }
211    }
212}
213
214fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
215    let segments = addr.segments();
216    let (compress_start, compress_end) = longest_zero_sequence(&segments);
217    let mut i = 0;
218    while i < 8 {
219        if i == compress_start {
220            f.write_str(":")?;
221            if i == 0 {
222                f.write_str(":")?;
223            }
224            if compress_end < 8 {
225                i = compress_end;
226            } else {
227                break;
228            }
229        }
230        f.write_fmt(format_args!("{0:x}", segments[i as usize]))write!(f, "{:x}", segments[i as usize])?;
231        if i < 7 {
232            f.write_str(":")?;
233        }
234        i += 1;
235    }
236    Ok(())
237}
238
239// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
240fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
241    let mut longest = -1;
242    let mut longest_length = -1;
243    let mut start = -1;
244    macro_rules! finish_sequence(
245        ($end: expr) => {
246            if start >= 0 {
247                let length = $end - start;
248                if length > longest_length {
249                    longest = start;
250                    longest_length = length;
251                }
252            }
253        };
254    );
255    for i in 0..8 {
256        if pieces[i as usize] == 0 {
257            if start < 0 {
258                start = i;
259            }
260        } else {
261            if start >= 0 {
    let length = i - start;
    if length > longest_length { longest = start; longest_length = length; }
};finish_sequence!(i);
262            start = -1;
263        }
264    }
265    if start >= 0 {
    let length = 8 - start;
    if length > longest_length { longest = start; longest_length = length; }
};finish_sequence!(8);
266    // https://url.spec.whatwg.org/#concept-ipv6-serializer
267    // step 3: ignore lone zeroes
268    if longest_length < 2 {
269        (-1, -2)
270    } else {
271        (longest, longest + longest_length)
272    }
273}
274
275/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
276fn ends_in_a_number(input: &str) -> bool {
277    let mut parts = input.rsplit('.');
278    let last = parts.next().unwrap();
279    let last = if last.is_empty() {
280        if let Some(last) = parts.next() {
281            last
282        } else {
283            return false;
284        }
285    } else {
286        last
287    };
288    if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) {
289        return true;
290    }
291
292    parse_ipv4number(last).is_ok()
293}
294
295/// <https://url.spec.whatwg.org/#ipv4-number-parser>
296/// Ok(None) means the input is a valid number, but it overflows a `u32`.
297fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
298    if input.is_empty() {
299        return Err(());
300    }
301
302    let mut r = 10;
303    if input.starts_with("0x") || input.starts_with("0X") {
304        input = &input[2..];
305        r = 16;
306    } else if input.len() >= 2 && input.starts_with('0') {
307        input = &input[1..];
308        r = 8;
309    }
310
311    if input.is_empty() {
312        return Ok(Some(0));
313    }
314
315    let valid_number = match r {
316        8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)),
317        10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()),
318        16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()),
319        _ => false,
320    };
321    if !valid_number {
322        return Err(());
323    }
324
325    match u32::from_str_radix(input, r) {
326        Ok(num) => Ok(Some(num)),
327        Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
328                            // The validity of the chars in the input is checked above.
329    }
330}
331
332/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
333fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
334    let mut parts: Vec<&str> = input.split('.').collect();
335    if parts.last() == Some(&"") {
336        parts.pop();
337    }
338    if parts.len() > 4 {
339        return Err(ParseError::InvalidIpv4Address);
340    }
341    let mut numbers: Vec<u32> = Vec::new();
342    for part in parts {
343        match parse_ipv4number(part) {
344            Ok(Some(n)) => numbers.push(n),
345            Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
346            Err(()) => return Err(ParseError::InvalidIpv4Address),
347        };
348    }
349    let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
350    // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
351    if ipv4 > u32::MAX >> (8 * numbers.len() as u32) {
352        return Err(ParseError::InvalidIpv4Address);
353    }
354    if numbers.iter().any(|x| *x > 255) {
355        return Err(ParseError::InvalidIpv4Address);
356    }
357    for (counter, n) in numbers.iter().enumerate() {
358        ipv4 += n << (8 * (3 - counter as u32))
359    }
360    Ok(Ipv4Addr::from(ipv4))
361}
362
363/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
364fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
365    let input = input.as_bytes();
366    let len = input.len();
367    let mut is_ip_v4 = false;
368    let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
369    let mut piece_pointer = 0;
370    let mut compress_pointer = None;
371    let mut i = 0;
372
373    if len < 2 {
374        return Err(ParseError::InvalidIpv6Address);
375    }
376
377    if input[0] == b':' {
378        if input[1] != b':' {
379            return Err(ParseError::InvalidIpv6Address);
380        }
381        i = 2;
382        piece_pointer = 1;
383        compress_pointer = Some(1);
384    }
385
386    while i < len {
387        if piece_pointer == 8 {
388            return Err(ParseError::InvalidIpv6Address);
389        }
390        if input[i] == b':' {
391            if compress_pointer.is_some() {
392                return Err(ParseError::InvalidIpv6Address);
393            }
394            i += 1;
395            piece_pointer += 1;
396            compress_pointer = Some(piece_pointer);
397            continue;
398        }
399        let start = i;
400        let end = cmp::min(len, start + 4);
401        let mut value = 0u16;
402        while i < end {
403            match (input[i] as char).to_digit(16) {
404                Some(digit) => {
405                    value = value * 0x10 + digit as u16;
406                    i += 1;
407                }
408                None => break,
409            }
410        }
411        if i < len {
412            match input[i] {
413                b'.' => {
414                    if i == start {
415                        return Err(ParseError::InvalidIpv6Address);
416                    }
417                    i = start;
418                    if piece_pointer > 6 {
419                        return Err(ParseError::InvalidIpv6Address);
420                    }
421                    is_ip_v4 = true;
422                }
423                b':' => {
424                    i += 1;
425                    if i == len {
426                        return Err(ParseError::InvalidIpv6Address);
427                    }
428                }
429                _ => return Err(ParseError::InvalidIpv6Address),
430            }
431        }
432        if is_ip_v4 {
433            break;
434        }
435        pieces[piece_pointer] = value;
436        piece_pointer += 1;
437    }
438
439    if is_ip_v4 {
440        if piece_pointer > 6 {
441            return Err(ParseError::InvalidIpv6Address);
442        }
443        let mut numbers_seen = 0;
444        while i < len {
445            if numbers_seen > 0 {
446                if numbers_seen < 4 && (i < len && input[i] == b'.') {
447                    i += 1
448                } else {
449                    return Err(ParseError::InvalidIpv6Address);
450                }
451            }
452
453            let mut ipv4_piece = None;
454            while i < len {
455                let digit = match input[i] {
456                    c @ b'0'..=b'9' => c - b'0',
457                    _ => break,
458                };
459                match ipv4_piece {
460                    None => ipv4_piece = Some(digit as u16),
461                    Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
462                    Some(ref mut v) => {
463                        *v = *v * 10 + digit as u16;
464                        if *v > 255 {
465                            return Err(ParseError::InvalidIpv6Address);
466                        }
467                    }
468                }
469                i += 1;
470            }
471
472            pieces[piece_pointer] = if let Some(v) = ipv4_piece {
473                pieces[piece_pointer] * 0x100 + v
474            } else {
475                return Err(ParseError::InvalidIpv6Address);
476            };
477            numbers_seen += 1;
478
479            if numbers_seen == 2 || numbers_seen == 4 {
480                piece_pointer += 1;
481            }
482        }
483
484        if numbers_seen != 4 {
485            return Err(ParseError::InvalidIpv6Address);
486        }
487    }
488
489    if i < len {
490        return Err(ParseError::InvalidIpv6Address);
491    }
492
493    match compress_pointer {
494        Some(compress_pointer) => {
495            let mut swaps = piece_pointer - compress_pointer;
496            piece_pointer = 7;
497            while swaps > 0 {
498                pieces.swap(piece_pointer, compress_pointer + swaps - 1);
499                swaps -= 1;
500                piece_pointer -= 1;
501            }
502        }
503        _ => {
504            if piece_pointer != 8 {
505                return Err(ParseError::InvalidIpv6Address);
506            }
507        }
508    }
509    Ok(Ipv6Addr::new(
510        pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
511    ))
512}