url/
host.rs

1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use crate::net::{Ipv4Addr, Ipv6Addr};
10use alloc::borrow::Cow;
11use alloc::borrow::ToOwned;
12use alloc::string::String;
13use alloc::string::ToString;
14use alloc::vec::Vec;
15use core::cmp;
16use core::fmt::{self, Formatter};
17
18use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
19#[cfg(feature = "serde")]
20use serde::{Deserialize, Serialize};
21
22use crate::parser::{ParseError, ParseResult};
23
24#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
25#[derive(Copy, Clone, Debug, Eq, PartialEq)]
26pub(crate) enum HostInternal {
27    None,
28    Domain,
29    Ipv4(Ipv4Addr),
30    Ipv6(Ipv6Addr),
31}
32
33impl From<Host<String>> for HostInternal {
34    fn from(host: Host<String>) -> HostInternal {
35        match host {
36            Host::Domain(ref s) if s.is_empty() => HostInternal::None,
37            Host::Domain(_) => HostInternal::Domain,
38            Host::Ipv4(address) => HostInternal::Ipv4(address),
39            Host::Ipv6(address) => HostInternal::Ipv6(address),
40        }
41    }
42}
43
44/// The host name of an URL.
45#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
46#[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
47pub enum Host<S = String> {
48    /// A DNS domain name, as '.' dot-separated labels.
49    /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
50    /// a special URL, or percent encoded for non-special URLs. Hosts for
51    /// non-special URLs are also called opaque hosts.
52    Domain(S),
53
54    /// An IPv4 address.
55    /// `Url::host_str` returns the serialization of this address,
56    /// as four decimal integers separated by `.` dots.
57    Ipv4(Ipv4Addr),
58
59    /// An IPv6 address.
60    /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
61    /// in the format per [RFC 5952 *A Recommendation
62    /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
63    /// lowercase hexadecimal with maximal `::` compression.
64    Ipv6(Ipv6Addr),
65}
66
67impl<'a> Host<&'a str> {
68    /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
69    pub fn to_owned(&self) -> Host<String> {
70        match *self {
71            Host::Domain(domain) => Host::Domain(domain.to_owned()),
72            Host::Ipv4(address) => Host::Ipv4(address),
73            Host::Ipv6(address) => Host::Ipv6(address),
74        }
75    }
76}
77
78impl Host<String> {
79    /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
80    ///
81    /// <https://url.spec.whatwg.org/#host-parsing>
82    pub fn parse(input: &str) -> Result<Self, ParseError> {
83        if input.starts_with('[') {
84            if !input.ends_with(']') {
85                return Err(ParseError::InvalidIpv6Address);
86            }
87            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
88        }
89        let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into();
90
91        let domain = Self::domain_to_ascii(&domain)?;
92
93        if domain.is_empty() {
94            return Err(ParseError::EmptyHost);
95        }
96
97        if ends_in_a_number(&domain) {
98            let address = parse_ipv4addr(&domain)?;
99            Ok(Host::Ipv4(address))
100        } else {
101            Ok(Host::Domain(domain.to_string()))
102        }
103    }
104
105    // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
106    pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
107        if input.starts_with('[') {
108            if !input.ends_with(']') {
109                return Err(ParseError::InvalidIpv6Address);
110            }
111            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
112        }
113
114        let is_invalid_host_char = |c| {
115            matches!(
116                c,
117                '\0' | '\t'
118                    | '\n'
119                    | '\r'
120                    | ' '
121                    | '#'
122                    | '/'
123                    | ':'
124                    | '<'
125                    | '>'
126                    | '?'
127                    | '@'
128                    | '['
129                    | '\\'
130                    | ']'
131                    | '^'
132                    | '|'
133            )
134        };
135
136        if input.find(is_invalid_host_char).is_some() {
137            Err(ParseError::InvalidDomainCharacter)
138        } else {
139            Ok(Host::Domain(
140                utf8_percent_encode(input, CONTROLS).to_string(),
141            ))
142        }
143    }
144
145    /// convert domain with idna
146    fn domain_to_ascii(domain: &[u8]) -> Result<Cow<'_, str>, ParseError> {
147        idna::domain_to_ascii_cow(domain, idna::AsciiDenyList::URL).map_err(Into::into)
148    }
149}
150
151impl<S: AsRef<str>> fmt::Display for Host<S> {
152    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
153        match *self {
154            Host::Domain(ref domain) => domain.as_ref().fmt(f),
155            Host::Ipv4(ref addr) => addr.fmt(f),
156            Host::Ipv6(ref addr) => {
157                f.write_str("[")?;
158                write_ipv6(addr, f)?;
159                f.write_str("]")
160            }
161        }
162    }
163}
164
165impl<S, T> PartialEq<Host<T>> for Host<S>
166where
167    S: PartialEq<T>,
168{
169    fn eq(&self, other: &Host<T>) -> bool {
170        match (self, other) {
171            (Host::Domain(a), Host::Domain(b)) => a == b,
172            (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
173            (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
174            (_, _) => false,
175        }
176    }
177}
178
179fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
180    let segments = addr.segments();
181    let (compress_start, compress_end) = longest_zero_sequence(&segments);
182    let mut i = 0;
183    while i < 8 {
184        if i == compress_start {
185            f.write_str(":")?;
186            if i == 0 {
187                f.write_str(":")?;
188            }
189            if compress_end < 8 {
190                i = compress_end;
191            } else {
192                break;
193            }
194        }
195        write!(f, "{:x}", segments[i as usize])?;
196        if i < 7 {
197            f.write_str(":")?;
198        }
199        i += 1;
200    }
201    Ok(())
202}
203
204// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
205fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
206    let mut longest = -1;
207    let mut longest_length = -1;
208    let mut start = -1;
209    macro_rules! finish_sequence(
210        ($end: expr) => {
211            if start >= 0 {
212                let length = $end - start;
213                if length > longest_length {
214                    longest = start;
215                    longest_length = length;
216                }
217            }
218        };
219    );
220    for i in 0..8 {
221        if pieces[i as usize] == 0 {
222            if start < 0 {
223                start = i;
224            }
225        } else {
226            finish_sequence!(i);
227            start = -1;
228        }
229    }
230    finish_sequence!(8);
231    // https://url.spec.whatwg.org/#concept-ipv6-serializer
232    // step 3: ignore lone zeroes
233    if longest_length < 2 {
234        (-1, -2)
235    } else {
236        (longest, longest + longest_length)
237    }
238}
239
240/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
241fn ends_in_a_number(input: &str) -> bool {
242    let mut parts = input.rsplit('.');
243    let last = parts.next().unwrap();
244    let last = if last.is_empty() {
245        if let Some(last) = parts.next() {
246            last
247        } else {
248            return false;
249        }
250    } else {
251        last
252    };
253    if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) {
254        return true;
255    }
256
257    parse_ipv4number(last).is_ok()
258}
259
260/// <https://url.spec.whatwg.org/#ipv4-number-parser>
261/// Ok(None) means the input is a valid number, but it overflows a `u32`.
262fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
263    if input.is_empty() {
264        return Err(());
265    }
266
267    let mut r = 10;
268    if input.starts_with("0x") || input.starts_with("0X") {
269        input = &input[2..];
270        r = 16;
271    } else if input.len() >= 2 && input.starts_with('0') {
272        input = &input[1..];
273        r = 8;
274    }
275
276    if input.is_empty() {
277        return Ok(Some(0));
278    }
279
280    let valid_number = match r {
281        8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)),
282        10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()),
283        16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()),
284        _ => false,
285    };
286    if !valid_number {
287        return Err(());
288    }
289
290    match u32::from_str_radix(input, r) {
291        Ok(num) => Ok(Some(num)),
292        Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
293                            // The validity of the chars in the input is checked above.
294    }
295}
296
297/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
298fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
299    let mut parts: Vec<&str> = input.split('.').collect();
300    if parts.last() == Some(&"") {
301        parts.pop();
302    }
303    if parts.len() > 4 {
304        return Err(ParseError::InvalidIpv4Address);
305    }
306    let mut numbers: Vec<u32> = Vec::new();
307    for part in parts {
308        match parse_ipv4number(part) {
309            Ok(Some(n)) => numbers.push(n),
310            Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
311            Err(()) => return Err(ParseError::InvalidIpv4Address),
312        };
313    }
314    let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
315    // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
316    if ipv4 > u32::MAX >> (8 * numbers.len() as u32) {
317        return Err(ParseError::InvalidIpv4Address);
318    }
319    if numbers.iter().any(|x| *x > 255) {
320        return Err(ParseError::InvalidIpv4Address);
321    }
322    for (counter, n) in numbers.iter().enumerate() {
323        ipv4 += n << (8 * (3 - counter as u32))
324    }
325    Ok(Ipv4Addr::from(ipv4))
326}
327
328/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
329fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
330    let input = input.as_bytes();
331    let len = input.len();
332    let mut is_ip_v4 = false;
333    let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
334    let mut piece_pointer = 0;
335    let mut compress_pointer = None;
336    let mut i = 0;
337
338    if len < 2 {
339        return Err(ParseError::InvalidIpv6Address);
340    }
341
342    if input[0] == b':' {
343        if input[1] != b':' {
344            return Err(ParseError::InvalidIpv6Address);
345        }
346        i = 2;
347        piece_pointer = 1;
348        compress_pointer = Some(1);
349    }
350
351    while i < len {
352        if piece_pointer == 8 {
353            return Err(ParseError::InvalidIpv6Address);
354        }
355        if input[i] == b':' {
356            if compress_pointer.is_some() {
357                return Err(ParseError::InvalidIpv6Address);
358            }
359            i += 1;
360            piece_pointer += 1;
361            compress_pointer = Some(piece_pointer);
362            continue;
363        }
364        let start = i;
365        let end = cmp::min(len, start + 4);
366        let mut value = 0u16;
367        while i < end {
368            match (input[i] as char).to_digit(16) {
369                Some(digit) => {
370                    value = value * 0x10 + digit as u16;
371                    i += 1;
372                }
373                None => break,
374            }
375        }
376        if i < len {
377            match input[i] {
378                b'.' => {
379                    if i == start {
380                        return Err(ParseError::InvalidIpv6Address);
381                    }
382                    i = start;
383                    if piece_pointer > 6 {
384                        return Err(ParseError::InvalidIpv6Address);
385                    }
386                    is_ip_v4 = true;
387                }
388                b':' => {
389                    i += 1;
390                    if i == len {
391                        return Err(ParseError::InvalidIpv6Address);
392                    }
393                }
394                _ => return Err(ParseError::InvalidIpv6Address),
395            }
396        }
397        if is_ip_v4 {
398            break;
399        }
400        pieces[piece_pointer] = value;
401        piece_pointer += 1;
402    }
403
404    if is_ip_v4 {
405        if piece_pointer > 6 {
406            return Err(ParseError::InvalidIpv6Address);
407        }
408        let mut numbers_seen = 0;
409        while i < len {
410            if numbers_seen > 0 {
411                if numbers_seen < 4 && (i < len && input[i] == b'.') {
412                    i += 1
413                } else {
414                    return Err(ParseError::InvalidIpv6Address);
415                }
416            }
417
418            let mut ipv4_piece = None;
419            while i < len {
420                let digit = match input[i] {
421                    c @ b'0'..=b'9' => c - b'0',
422                    _ => break,
423                };
424                match ipv4_piece {
425                    None => ipv4_piece = Some(digit as u16),
426                    Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
427                    Some(ref mut v) => {
428                        *v = *v * 10 + digit as u16;
429                        if *v > 255 {
430                            return Err(ParseError::InvalidIpv6Address);
431                        }
432                    }
433                }
434                i += 1;
435            }
436
437            pieces[piece_pointer] = if let Some(v) = ipv4_piece {
438                pieces[piece_pointer] * 0x100 + v
439            } else {
440                return Err(ParseError::InvalidIpv6Address);
441            };
442            numbers_seen += 1;
443
444            if numbers_seen == 2 || numbers_seen == 4 {
445                piece_pointer += 1;
446            }
447        }
448
449        if numbers_seen != 4 {
450            return Err(ParseError::InvalidIpv6Address);
451        }
452    }
453
454    if i < len {
455        return Err(ParseError::InvalidIpv6Address);
456    }
457
458    match compress_pointer {
459        Some(compress_pointer) => {
460            let mut swaps = piece_pointer - compress_pointer;
461            piece_pointer = 7;
462            while swaps > 0 {
463                pieces.swap(piece_pointer, compress_pointer + swaps - 1);
464                swaps -= 1;
465                piece_pointer -= 1;
466            }
467        }
468        _ => {
469            if piece_pointer != 8 {
470                return Err(ParseError::InvalidIpv6Address);
471            }
472        }
473    }
474    Ok(Ipv6Addr::new(
475        pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
476    ))
477}