url/lib.rs
1// Copyright 2013-2015 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*!
10
11rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12for the [Rust](http://rust-lang.org/) programming language.
13
14
15# URL parsing and data structures
16
17First, URL parsing may fail for various reasons and therefore returns a `Result`.
18
19```
20use url::{Url, ParseError};
21
22assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23```
24
25Let’s parse a valid URL and look at its components.
26
27```
28use url::{Url, Host, Position};
29# use url::ParseError;
30# fn run() -> Result<(), ParseError> {
31let issue_list_url = Url::parse(
32 "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33)?;
34
35
36assert!(issue_list_url.scheme() == "https");
37assert!(issue_list_url.username() == "");
38assert!(issue_list_url.password() == None);
39assert!(issue_list_url.host_str() == Some("github.com"));
40assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41assert!(issue_list_url.port() == None);
42assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44 Some(vec!["rust-lang", "rust", "issues"]));
45assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47assert!(issue_list_url.fragment() == None);
48assert!(!issue_list_url.cannot_be_a_base());
49# Ok(())
50# }
51# run().unwrap();
52```
53
54Some URLs are said to be *cannot-be-a-base*:
55they don’t have a username, password, host, or port,
56and their "path" is an arbitrary string rather than slash-separated segments:
57
58```
59use url::Url;
60# use url::ParseError;
61
62# fn run() -> Result<(), ParseError> {
63let data_url = Url::parse("data:text/plain,Hello?World#")?;
64
65assert!(data_url.cannot_be_a_base());
66assert!(data_url.scheme() == "data");
67assert!(data_url.path() == "text/plain,Hello");
68assert!(data_url.path_segments().is_none());
69assert!(data_url.query() == Some("World"));
70assert!(data_url.fragment() == Some(""));
71# Ok(())
72# }
73# run().unwrap();
74```
75
76## Default Features
77
78Versions `<= 2.5.2` of the crate have no default features. Versions `> 2.5.2` have the default feature 'std'.
79If you are upgrading across this boundary and you have specified `default-features = false`, then
80you will need to add the 'std' feature or the 'alloc' feature to your dependency.
81The 'std' feature has the same behavior as the previous versions. The 'alloc' feature
82provides no_std support.
83
84## Serde
85
86Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
87
88# Base URL
89
90Many contexts allow URL *references* that can be relative to a *base URL*:
91
92```html
93<link rel="stylesheet" href="../main.css">
94```
95
96Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
97
98```
99use url::{Url, ParseError};
100
101assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
102```
103
104Use the `join` method on an `Url` to use it as a base URL:
105
106```
107use url::Url;
108# use url::ParseError;
109
110# fn run() -> Result<(), ParseError> {
111let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
112let css_url = this_document.join("../main.css")?;
113assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
114# Ok(())
115# }
116# run().unwrap();
117```
118
119# Feature: `serde`
120
121If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
122[`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
123[`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
124See [serde documentation](https://serde.rs) for more information.
125
126```toml
127url = { version = "2", features = ["serde"] }
128```
129
130# Feature: `debugger_visualizer`
131
132If you enable the `debugger_visualizer` feature, the `url` crate will include
133a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects)
134for [Visual Studio](https://www.visualstudio.com/) that allows you to view
135[`Url`](struct.Url.html) objects in the debugger.
136
137This feature requires Rust 1.71 or later.
138
139```toml
140url = { version = "2", features = ["debugger_visualizer"] }
141```
142
143*/
144
145#![no_std]
146#![doc(html_root_url = "https://docs.rs/url/2.5.4")]
147#![cfg_attr(
148 feature = "debugger_visualizer",
149 debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
150)]
151
152pub use form_urlencoded;
153
154// For forwards compatibility
155#[cfg(feature = "std")]
156extern crate std;
157
158#[macro_use]
159extern crate alloc;
160
161#[cfg(feature = "serde")]
162extern crate serde;
163
164use crate::host::HostInternal;
165
166use crate::net::IpAddr;
167#[cfg(feature = "std")]
168#[cfg(any(
169 unix,
170 windows,
171 target_os = "redox",
172 target_os = "wasi",
173 target_os = "hermit"
174))]
175use crate::net::{SocketAddr, ToSocketAddrs};
176use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO};
177use alloc::borrow::ToOwned;
178use alloc::str;
179use alloc::string::{String, ToString};
180use core::borrow::Borrow;
181use core::convert::TryFrom;
182use core::fmt::Write;
183use core::ops::{Range, RangeFrom, RangeTo};
184use core::{cmp, fmt, hash, mem};
185use percent_encoding::utf8_percent_encode;
186#[cfg(feature = "std")]
187#[cfg(any(
188 unix,
189 windows,
190 target_os = "redox",
191 target_os = "wasi",
192 target_os = "hermit"
193))]
194use std::io;
195#[cfg(feature = "std")]
196use std::path::{Path, PathBuf};
197
198/// `std` version of `net`
199#[cfg(feature = "std")]
200pub(crate) mod net {
201 pub use std::net::*;
202}
203/// `no_std` nightly version of `net`
204#[cfg(not(feature = "std"))]
205pub(crate) mod net {
206 pub use core::net::*;
207}
208
209pub use crate::host::Host;
210pub use crate::origin::{OpaqueOrigin, Origin};
211pub use crate::parser::{ParseError, SyntaxViolation};
212pub use crate::path_segments::PathSegmentsMut;
213pub use crate::slicing::Position;
214pub use form_urlencoded::EncodingOverride;
215
216mod host;
217mod origin;
218mod parser;
219mod path_segments;
220mod slicing;
221
222#[doc(hidden)]
223pub mod quirks;
224
225/// A parsed URL record.
226#[derive(Clone)]
227pub struct Url {
228 /// Syntax in pseudo-BNF:
229 ///
230 /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
231 /// non-hierarchical = non-hierarchical-path
232 /// non-hierarchical-path = /* Does not start with "/" */
233 /// hierarchical = authority? hierarchical-path
234 /// authority = "//" userinfo? host [ ":" port ]?
235 /// userinfo = username [ ":" password ]? "@"
236 /// hierarchical-path = [ "/" path-segment ]+
237 serialization: String,
238
239 // Components
240 scheme_end: u32, // Before ':'
241 username_end: u32, // Before ':' (if a password is given) or '@' (if not)
242 host_start: u32,
243 host_end: u32,
244 host: HostInternal,
245 port: Option<u16>,
246 path_start: u32, // Before initial '/', if any
247 query_start: Option<u32>, // Before '?', unlike Position::QueryStart
248 fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
249}
250
251/// Full configuration for the URL parser.
252#[derive(Copy, Clone)]
253#[must_use]
254pub struct ParseOptions<'a> {
255 base_url: Option<&'a Url>,
256 encoding_override: EncodingOverride<'a>,
257 violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
258}
259
260impl<'a> ParseOptions<'a> {
261 /// Change the base URL
262 ///
263 /// See the notes of [`Url::join`] for more details about how this base is considered
264 /// when parsing.
265 pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
266 self.base_url = new;
267 self
268 }
269
270 /// Override the character encoding of query strings.
271 /// This is a legacy concept only relevant for HTML.
272 pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
273 self.encoding_override = new;
274 self
275 }
276
277 /// Call the provided function or closure for a non-fatal `SyntaxViolation`
278 /// when it occurs during parsing. Note that since the provided function is
279 /// `Fn`, the caller might need to utilize _interior mutability_, such as with
280 /// a `RefCell`, to collect the violations.
281 ///
282 /// ## Example
283 /// ```
284 /// use std::cell::RefCell;
285 /// use url::{Url, SyntaxViolation};
286 /// # use url::ParseError;
287 /// # fn run() -> Result<(), url::ParseError> {
288 /// let violations = RefCell::new(Vec::new());
289 /// let url = Url::options()
290 /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
291 /// .parse("https:////example.com")?;
292 /// assert_eq!(url.as_str(), "https://example.com/");
293 /// assert_eq!(violations.into_inner(),
294 /// vec!(SyntaxViolation::ExpectedDoubleSlash));
295 /// # Ok(())
296 /// # }
297 /// # run().unwrap();
298 /// ```
299 pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
300 self.violation_fn = new;
301 self
302 }
303
304 /// Parse an URL string with the configuration so far.
305 pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
306 Parser {
307 serialization: String::with_capacity(input.len()),
308 base_url: self.base_url,
309 query_encoding_override: self.encoding_override,
310 violation_fn: self.violation_fn,
311 context: Context::UrlParser,
312 }
313 .parse_url(input)
314 }
315}
316
317impl Url {
318 /// Parse an absolute URL from a string.
319 ///
320 /// # Examples
321 ///
322 /// ```rust
323 /// use url::Url;
324 /// # use url::ParseError;
325 ///
326 /// # fn run() -> Result<(), ParseError> {
327 /// let url = Url::parse("https://example.net")?;
328 /// # Ok(())
329 /// # }
330 /// # run().unwrap();
331 /// ```
332 ///
333 /// # Errors
334 ///
335 /// If the function can not parse an absolute URL from the given string,
336 /// a [`ParseError`] variant will be returned.
337 ///
338 /// [`ParseError`]: enum.ParseError.html
339 #[inline]
340 pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
341 Url::options().parse(input)
342 }
343
344 /// Parse an absolute URL from a string and add params to its query string.
345 ///
346 /// Existing params are not removed.
347 ///
348 /// # Examples
349 ///
350 /// ```rust
351 /// use url::Url;
352 /// # use url::ParseError;
353 ///
354 /// # fn run() -> Result<(), ParseError> {
355 /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
356 /// &[("lang", "rust"), ("browser", "servo")])?;
357 /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
358 /// # Ok(())
359 /// # }
360 /// # run().unwrap();
361 /// ```
362 ///
363 /// # Errors
364 ///
365 /// If the function can not parse an absolute URL from the given string,
366 /// a [`ParseError`] variant will be returned.
367 ///
368 /// [`ParseError`]: enum.ParseError.html
369 #[inline]
370 pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
371 where
372 I: IntoIterator,
373 I::Item: Borrow<(K, V)>,
374 K: AsRef<str>,
375 V: AsRef<str>,
376 {
377 let mut url = Url::options().parse(input);
378
379 if let Ok(ref mut url) = url {
380 url.query_pairs_mut().extend_pairs(iter);
381 }
382
383 url
384 }
385
386 /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
387 fn strip_trailing_spaces_from_opaque_path(&mut self) {
388 if !self.cannot_be_a_base() {
389 return;
390 }
391
392 if self.fragment_start.is_some() {
393 return;
394 }
395
396 if self.query_start.is_some() {
397 return;
398 }
399
400 let trailing_space_count = self
401 .serialization
402 .chars()
403 .rev()
404 .take_while(|c| *c == ' ')
405 .count();
406
407 let start = self.serialization.len() - trailing_space_count;
408
409 self.serialization.truncate(start);
410 }
411
412 /// Parse a string as an URL, with this URL as the base URL.
413 ///
414 /// The inverse of this is [`make_relative`].
415 ///
416 /// # Notes
417 ///
418 /// - A trailing slash is significant.
419 /// Without it, the last path component is considered to be a “file” name
420 /// to be removed to get at the “directory” that is used as the base.
421 /// - A [scheme relative special URL](https://url.spec.whatwg.org/#scheme-relative-special-url-string)
422 /// as input replaces everything in the base URL after the scheme.
423 /// - An absolute URL (with a scheme) as input replaces the whole base URL (even the scheme).
424 ///
425 /// # Examples
426 ///
427 /// ```rust
428 /// use url::Url;
429 /// # use url::ParseError;
430 ///
431 /// // Base without a trailing slash
432 /// # fn run() -> Result<(), ParseError> {
433 /// let base = Url::parse("https://example.net/a/b.html")?;
434 /// let url = base.join("c.png")?;
435 /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png
436 ///
437 /// // Base with a trailing slash
438 /// let base = Url::parse("https://example.net/a/b/")?;
439 /// let url = base.join("c.png")?;
440 /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
441 ///
442 /// // Input as scheme relative special URL
443 /// let base = Url::parse("https://alice.com/a")?;
444 /// let url = base.join("//eve.com/b")?;
445 /// assert_eq!(url.as_str(), "https://eve.com/b");
446 ///
447 /// // Input as absolute URL
448 /// let base = Url::parse("https://alice.com/a")?;
449 /// let url = base.join("http://eve.com/b")?;
450 /// assert_eq!(url.as_str(), "http://eve.com/b"); // http instead of https
451
452 /// # Ok(())
453 /// # }
454 /// # run().unwrap();
455 /// ```
456 ///
457 /// # Errors
458 ///
459 /// If the function can not parse an URL from the given string
460 /// with this URL as the base URL, a [`ParseError`] variant will be returned.
461 ///
462 /// [`ParseError`]: enum.ParseError.html
463 /// [`make_relative`]: #method.make_relative
464 #[inline]
465 pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
466 Url::options().base_url(Some(self)).parse(input)
467 }
468
469 /// Creates a relative URL if possible, with this URL as the base URL.
470 ///
471 /// This is the inverse of [`join`].
472 ///
473 /// # Examples
474 ///
475 /// ```rust
476 /// use url::Url;
477 /// # use url::ParseError;
478 ///
479 /// # fn run() -> Result<(), ParseError> {
480 /// let base = Url::parse("https://example.net/a/b.html")?;
481 /// let url = Url::parse("https://example.net/a/c.png")?;
482 /// let relative = base.make_relative(&url);
483 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
484 ///
485 /// let base = Url::parse("https://example.net/a/b/")?;
486 /// let url = Url::parse("https://example.net/a/b/c.png")?;
487 /// let relative = base.make_relative(&url);
488 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
489 ///
490 /// let base = Url::parse("https://example.net/a/b/")?;
491 /// let url = Url::parse("https://example.net/a/d/c.png")?;
492 /// let relative = base.make_relative(&url);
493 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
494 ///
495 /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
496 /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
497 /// let relative = base.make_relative(&url);
498 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
499 /// # Ok(())
500 /// # }
501 /// # run().unwrap();
502 /// ```
503 ///
504 /// # Errors
505 ///
506 /// If this URL can't be a base for the given URL, `None` is returned.
507 /// This is for example the case if the scheme, host or port are not the same.
508 ///
509 /// [`join`]: #method.join
510 pub fn make_relative(&self, url: &Url) -> Option<String> {
511 if self.cannot_be_a_base() {
512 return None;
513 }
514
515 // Scheme, host and port need to be the same
516 if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
517 return None;
518 }
519
520 // We ignore username/password at this point
521
522 // The path has to be transformed
523 let mut relative = String::new();
524
525 // Extract the filename of both URIs, these need to be handled separately
526 fn extract_path_filename(s: &str) -> (&str, &str) {
527 let last_slash_idx = s.rfind('/').unwrap_or(0);
528 let (path, filename) = s.split_at(last_slash_idx);
529 if filename.is_empty() {
530 (path, "")
531 } else {
532 (path, &filename[1..])
533 }
534 }
535
536 let (base_path, base_filename) = extract_path_filename(self.path());
537 let (url_path, url_filename) = extract_path_filename(url.path());
538
539 let mut base_path = base_path.split('/').peekable();
540 let mut url_path = url_path.split('/').peekable();
541
542 // Skip over the common prefix
543 while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
544 base_path.next();
545 url_path.next();
546 }
547
548 // Add `..` segments for the remainder of the base path
549 for base_path_segment in base_path {
550 // Skip empty last segments
551 if base_path_segment.is_empty() {
552 break;
553 }
554
555 if !relative.is_empty() {
556 relative.push('/');
557 }
558
559 relative.push_str("..");
560 }
561
562 // Append the remainder of the other URI
563 for url_path_segment in url_path {
564 if !relative.is_empty() {
565 relative.push('/');
566 }
567
568 relative.push_str(url_path_segment);
569 }
570
571 // Add the filename if they are not the same
572 if !relative.is_empty() || base_filename != url_filename {
573 // If the URIs filename is empty this means that it was a directory
574 // so we'll have to append a '/'.
575 //
576 // Otherwise append it directly as the new filename.
577 if url_filename.is_empty() {
578 relative.push('/');
579 } else {
580 if !relative.is_empty() {
581 relative.push('/');
582 }
583 relative.push_str(url_filename);
584 }
585 }
586
587 // Query and fragment are only taken from the other URI
588 if let Some(query) = url.query() {
589 relative.push('?');
590 relative.push_str(query);
591 }
592
593 if let Some(fragment) = url.fragment() {
594 relative.push('#');
595 relative.push_str(fragment);
596 }
597
598 Some(relative)
599 }
600
601 /// Return a default `ParseOptions` that can fully configure the URL parser.
602 ///
603 /// # Examples
604 ///
605 /// Get default `ParseOptions`, then change base url
606 ///
607 /// ```rust
608 /// use url::Url;
609 /// # use url::ParseError;
610 /// # fn run() -> Result<(), ParseError> {
611 /// let options = Url::options();
612 /// let api = Url::parse("https://api.example.com")?;
613 /// let base_url = options.base_url(Some(&api));
614 /// let version_url = base_url.parse("version.json")?;
615 /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
616 /// # Ok(())
617 /// # }
618 /// # run().unwrap();
619 /// ```
620 pub fn options<'a>() -> ParseOptions<'a> {
621 ParseOptions {
622 base_url: None,
623 encoding_override: None,
624 violation_fn: None,
625 }
626 }
627
628 /// Return the serialization of this URL.
629 ///
630 /// This is fast since that serialization is already stored in the `Url` struct.
631 ///
632 /// # Examples
633 ///
634 /// ```rust
635 /// use url::Url;
636 /// # use url::ParseError;
637 ///
638 /// # fn run() -> Result<(), ParseError> {
639 /// let url_str = "https://example.net/";
640 /// let url = Url::parse(url_str)?;
641 /// assert_eq!(url.as_str(), url_str);
642 /// # Ok(())
643 /// # }
644 /// # run().unwrap();
645 /// ```
646 #[inline]
647 pub fn as_str(&self) -> &str {
648 &self.serialization
649 }
650
651 /// Return the serialization of this URL.
652 ///
653 /// This consumes the `Url` and takes ownership of the `String` stored in it.
654 ///
655 /// # Examples
656 ///
657 /// ```rust
658 /// use url::Url;
659 /// # use url::ParseError;
660 ///
661 /// # fn run() -> Result<(), ParseError> {
662 /// let url_str = "https://example.net/";
663 /// let url = Url::parse(url_str)?;
664 /// assert_eq!(String::from(url), url_str);
665 /// # Ok(())
666 /// # }
667 /// # run().unwrap();
668 /// ```
669 #[inline]
670 #[deprecated(since = "2.3.0", note = "use Into<String>")]
671 pub fn into_string(self) -> String {
672 self.into()
673 }
674
675 /// For internal testing, not part of the public API.
676 ///
677 /// Methods of the `Url` struct assume a number of invariants.
678 /// This checks each of these invariants and panic if one is not met.
679 /// This is for testing rust-url itself.
680 #[doc(hidden)]
681 pub fn check_invariants(&self) -> Result<(), String> {
682 macro_rules! assert {
683 ($x: expr) => {
684 if !$x {
685 return Err(format!(
686 "!( {} ) for URL {:?}",
687 stringify!($x),
688 self.serialization
689 ));
690 }
691 };
692 }
693
694 macro_rules! assert_eq {
695 ($a: expr, $b: expr) => {
696 {
697 let a = $a;
698 let b = $b;
699 if a != b {
700 return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
701 a, b, stringify!($a), stringify!($b),
702 self.serialization))
703 }
704 }
705 }
706 }
707
708 assert!(self.scheme_end >= 1);
709 assert!(self.byte_at(0).is_ascii_alphabetic());
710 assert!(self
711 .slice(1..self.scheme_end)
712 .chars()
713 .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
714 assert_eq!(self.byte_at(self.scheme_end), b':');
715
716 if self.slice(self.scheme_end + 1..).starts_with("//") {
717 // URL with authority
718 if self.username_end != self.serialization.len() as u32 {
719 match self.byte_at(self.username_end) {
720 b':' => {
721 assert!(self.host_start >= self.username_end + 2);
722 assert_eq!(self.byte_at(self.host_start - 1), b'@');
723 }
724 b'@' => assert!(self.host_start == self.username_end + 1),
725 _ => assert_eq!(self.username_end, self.scheme_end + 3),
726 }
727 }
728 assert!(self.host_start >= self.username_end);
729 assert!(self.host_end >= self.host_start);
730 let host_str = self.slice(self.host_start..self.host_end);
731 match self.host {
732 HostInternal::None => assert_eq!(host_str, ""),
733 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
734 HostInternal::Ipv6(address) => {
735 let h: Host<String> = Host::Ipv6(address);
736 assert_eq!(host_str, h.to_string())
737 }
738 HostInternal::Domain => {
739 if SchemeType::from(self.scheme()).is_special() {
740 assert!(!host_str.is_empty())
741 }
742 }
743 }
744 if self.path_start == self.host_end {
745 assert_eq!(self.port, None);
746 } else {
747 assert_eq!(self.byte_at(self.host_end), b':');
748 let port_str = self.slice(self.host_end + 1..self.path_start);
749 assert_eq!(
750 self.port,
751 Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
752 );
753 }
754 assert!(
755 self.path_start as usize == self.serialization.len()
756 || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
757 );
758 } else {
759 // Anarchist URL (no authority)
760 assert_eq!(self.username_end, self.scheme_end + 1);
761 assert_eq!(self.host_start, self.scheme_end + 1);
762 assert_eq!(self.host_end, self.scheme_end + 1);
763 assert_eq!(self.host, HostInternal::None);
764 assert_eq!(self.port, None);
765 if self.path().starts_with("//") {
766 // special case when first path segment is empty
767 assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
768 assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
769 assert_eq!(self.path_start, self.scheme_end + 3);
770 } else {
771 assert_eq!(self.path_start, self.scheme_end + 1);
772 }
773 }
774 if let Some(start) = self.query_start {
775 assert!(start >= self.path_start);
776 assert_eq!(self.byte_at(start), b'?');
777 }
778 if let Some(start) = self.fragment_start {
779 assert!(start >= self.path_start);
780 assert_eq!(self.byte_at(start), b'#');
781 }
782 if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
783 assert!(fragment_start > query_start);
784 }
785
786 let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
787 assert_eq!(&self.serialization, &other.serialization);
788 assert_eq!(self.scheme_end, other.scheme_end);
789 assert_eq!(self.username_end, other.username_end);
790 assert_eq!(self.host_start, other.host_start);
791 assert_eq!(self.host_end, other.host_end);
792 assert!(
793 self.host == other.host ||
794 // XXX No host round-trips to empty host.
795 // See https://github.com/whatwg/url/issues/79
796 (self.host_str(), other.host_str()) == (None, Some(""))
797 );
798 assert_eq!(self.port, other.port);
799 assert_eq!(self.path_start, other.path_start);
800 assert_eq!(self.query_start, other.query_start);
801 assert_eq!(self.fragment_start, other.fragment_start);
802 Ok(())
803 }
804
805 /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
806 ///
807 /// Note: this returns an opaque origin for `file:` URLs, which causes
808 /// `url.origin() != url.origin()`.
809 ///
810 /// # Examples
811 ///
812 /// URL with `ftp` scheme:
813 ///
814 /// ```rust
815 /// use url::{Host, Origin, Url};
816 /// # use url::ParseError;
817 ///
818 /// # fn run() -> Result<(), ParseError> {
819 /// let url = Url::parse("ftp://example.com/foo")?;
820 /// assert_eq!(url.origin(),
821 /// Origin::Tuple("ftp".into(),
822 /// Host::Domain("example.com".into()),
823 /// 21));
824 /// # Ok(())
825 /// # }
826 /// # run().unwrap();
827 /// ```
828 ///
829 /// URL with `blob` scheme:
830 ///
831 /// ```rust
832 /// use url::{Host, Origin, Url};
833 /// # use url::ParseError;
834 ///
835 /// # fn run() -> Result<(), ParseError> {
836 /// let url = Url::parse("blob:https://example.com/foo")?;
837 /// assert_eq!(url.origin(),
838 /// Origin::Tuple("https".into(),
839 /// Host::Domain("example.com".into()),
840 /// 443));
841 /// # Ok(())
842 /// # }
843 /// # run().unwrap();
844 /// ```
845 ///
846 /// URL with `file` scheme:
847 ///
848 /// ```rust
849 /// use url::{Host, Origin, Url};
850 /// # use url::ParseError;
851 ///
852 /// # fn run() -> Result<(), ParseError> {
853 /// let url = Url::parse("file:///tmp/foo")?;
854 /// assert!(!url.origin().is_tuple());
855 ///
856 /// let other_url = Url::parse("file:///tmp/foo")?;
857 /// assert!(url.origin() != other_url.origin());
858 /// # Ok(())
859 /// # }
860 /// # run().unwrap();
861 /// ```
862 ///
863 /// URL with other scheme:
864 ///
865 /// ```rust
866 /// use url::{Host, Origin, Url};
867 /// # use url::ParseError;
868 ///
869 /// # fn run() -> Result<(), ParseError> {
870 /// let url = Url::parse("foo:bar")?;
871 /// assert!(!url.origin().is_tuple());
872 /// # Ok(())
873 /// # }
874 /// # run().unwrap();
875 /// ```
876 #[inline]
877 pub fn origin(&self) -> Origin {
878 origin::url_origin(self)
879 }
880
881 /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
882 ///
883 /// # Examples
884 ///
885 /// ```
886 /// use url::Url;
887 /// # use url::ParseError;
888 ///
889 /// # fn run() -> Result<(), ParseError> {
890 /// let url = Url::parse("file:///tmp/foo")?;
891 /// assert_eq!(url.scheme(), "file");
892 /// # Ok(())
893 /// # }
894 /// # run().unwrap();
895 /// ```
896 #[inline]
897 pub fn scheme(&self) -> &str {
898 self.slice(..self.scheme_end)
899 }
900
901 /// Return whether the URL is special (has a special scheme)
902 ///
903 /// # Examples
904 ///
905 /// ```
906 /// use url::Url;
907 /// # use url::ParseError;
908 ///
909 /// # fn run() -> Result<(), ParseError> {
910 /// assert!(Url::parse("http:///tmp/foo")?.is_special());
911 /// assert!(Url::parse("file:///tmp/foo")?.is_special());
912 /// assert!(!Url::parse("moz:///tmp/foo")?.is_special());
913 /// # Ok(())
914 /// # }
915 /// # run().unwrap();
916 /// ```
917 pub fn is_special(&self) -> bool {
918 let scheme_type = SchemeType::from(self.scheme());
919 scheme_type.is_special()
920 }
921
922 /// Return whether the URL has an 'authority',
923 /// which can contain a username, password, host, and port number.
924 ///
925 /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
926 /// or cannot-be-a-base like `data:text/plain,Stuff`.
927 ///
928 /// See also the `authority` method.
929 ///
930 /// # Examples
931 ///
932 /// ```
933 /// use url::Url;
934 /// # use url::ParseError;
935 ///
936 /// # fn run() -> Result<(), ParseError> {
937 /// let url = Url::parse("ftp://rms@example.com")?;
938 /// assert!(url.has_authority());
939 ///
940 /// let url = Url::parse("unix:/run/foo.socket")?;
941 /// assert!(!url.has_authority());
942 ///
943 /// let url = Url::parse("data:text/plain,Stuff")?;
944 /// assert!(!url.has_authority());
945 /// # Ok(())
946 /// # }
947 /// # run().unwrap();
948 /// ```
949 #[inline]
950 pub fn has_authority(&self) -> bool {
951 debug_assert!(self.byte_at(self.scheme_end) == b':');
952 self.slice(self.scheme_end..).starts_with("://")
953 }
954
955 /// Return the authority of this URL as an ASCII string.
956 ///
957 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
958 /// of a special URL, or percent encoded for non-special URLs.
959 /// IPv6 addresses are given between `[` and `]` brackets.
960 /// Ports are omitted if they match the well known port of a special URL.
961 ///
962 /// Username and password are percent-encoded.
963 ///
964 /// See also the `has_authority` method.
965 ///
966 /// # Examples
967 ///
968 /// ```
969 /// use url::Url;
970 /// # use url::ParseError;
971 ///
972 /// # fn run() -> Result<(), ParseError> {
973 /// let url = Url::parse("unix:/run/foo.socket")?;
974 /// assert_eq!(url.authority(), "");
975 /// let url = Url::parse("file:///tmp/foo")?;
976 /// assert_eq!(url.authority(), "");
977 /// let url = Url::parse("https://user:password@example.com/tmp/foo")?;
978 /// assert_eq!(url.authority(), "user:password@example.com");
979 /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?;
980 /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667");
981 /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?;
982 /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com");
983 /// # Ok(())
984 /// # }
985 /// # run().unwrap();
986 /// ```
987 pub fn authority(&self) -> &str {
988 let scheme_separator_len = "://".len() as u32;
989 if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len {
990 self.slice(self.scheme_end + scheme_separator_len..self.path_start)
991 } else {
992 ""
993 }
994 }
995
996 /// Return whether this URL is a cannot-be-a-base URL,
997 /// meaning that parsing a relative URL string with this URL as the base will return an error.
998 ///
999 /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
1000 /// as is typically the case of `data:` and `mailto:` URLs.
1001 ///
1002 /// # Examples
1003 ///
1004 /// ```
1005 /// use url::Url;
1006 /// # use url::ParseError;
1007 ///
1008 /// # fn run() -> Result<(), ParseError> {
1009 /// let url = Url::parse("ftp://rms@example.com")?;
1010 /// assert!(!url.cannot_be_a_base());
1011 ///
1012 /// let url = Url::parse("unix:/run/foo.socket")?;
1013 /// assert!(!url.cannot_be_a_base());
1014 ///
1015 /// let url = Url::parse("data:text/plain,Stuff")?;
1016 /// assert!(url.cannot_be_a_base());
1017 /// # Ok(())
1018 /// # }
1019 /// # run().unwrap();
1020 /// ```
1021 #[inline]
1022 pub fn cannot_be_a_base(&self) -> bool {
1023 !self.slice(self.scheme_end + 1..).starts_with('/')
1024 }
1025
1026 /// Return the username for this URL (typically the empty string)
1027 /// as a percent-encoded ASCII string.
1028 ///
1029 /// # Examples
1030 ///
1031 /// ```
1032 /// use url::Url;
1033 /// # use url::ParseError;
1034 ///
1035 /// # fn run() -> Result<(), ParseError> {
1036 /// let url = Url::parse("ftp://rms@example.com")?;
1037 /// assert_eq!(url.username(), "rms");
1038 ///
1039 /// let url = Url::parse("ftp://:secret123@example.com")?;
1040 /// assert_eq!(url.username(), "");
1041 ///
1042 /// let url = Url::parse("https://example.com")?;
1043 /// assert_eq!(url.username(), "");
1044 /// # Ok(())
1045 /// # }
1046 /// # run().unwrap();
1047 /// ```
1048 pub fn username(&self) -> &str {
1049 let scheme_separator_len = "://".len() as u32;
1050 if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
1051 self.slice(self.scheme_end + scheme_separator_len..self.username_end)
1052 } else {
1053 ""
1054 }
1055 }
1056
1057 /// Return the password for this URL, if any, as a percent-encoded ASCII string.
1058 ///
1059 /// # Examples
1060 ///
1061 /// ```
1062 /// use url::Url;
1063 /// # use url::ParseError;
1064 ///
1065 /// # fn run() -> Result<(), ParseError> {
1066 /// let url = Url::parse("ftp://rms:secret123@example.com")?;
1067 /// assert_eq!(url.password(), Some("secret123"));
1068 ///
1069 /// let url = Url::parse("ftp://:secret123@example.com")?;
1070 /// assert_eq!(url.password(), Some("secret123"));
1071 ///
1072 /// let url = Url::parse("ftp://rms@example.com")?;
1073 /// assert_eq!(url.password(), None);
1074 ///
1075 /// let url = Url::parse("https://example.com")?;
1076 /// assert_eq!(url.password(), None);
1077 /// # Ok(())
1078 /// # }
1079 /// # run().unwrap();
1080 /// ```
1081 pub fn password(&self) -> Option<&str> {
1082 // This ':' is not the one marking a port number since a host can not be empty.
1083 // (Except for file: URLs, which do not have port numbers.)
1084 if self.has_authority()
1085 && self.username_end != self.serialization.len() as u32
1086 && self.byte_at(self.username_end) == b':'
1087 {
1088 debug_assert!(self.byte_at(self.host_start - 1) == b'@');
1089 Some(self.slice(self.username_end + 1..self.host_start - 1))
1090 } else {
1091 None
1092 }
1093 }
1094
1095 /// Equivalent to `url.host().is_some()`.
1096 ///
1097 /// # Examples
1098 ///
1099 /// ```
1100 /// use url::Url;
1101 /// # use url::ParseError;
1102 ///
1103 /// # fn run() -> Result<(), ParseError> {
1104 /// let url = Url::parse("ftp://rms@example.com")?;
1105 /// assert!(url.has_host());
1106 ///
1107 /// let url = Url::parse("unix:/run/foo.socket")?;
1108 /// assert!(!url.has_host());
1109 ///
1110 /// let url = Url::parse("data:text/plain,Stuff")?;
1111 /// assert!(!url.has_host());
1112 /// # Ok(())
1113 /// # }
1114 /// # run().unwrap();
1115 /// ```
1116 pub fn has_host(&self) -> bool {
1117 !matches!(self.host, HostInternal::None)
1118 }
1119
1120 /// Return the string representation of the host (domain or IP address) for this URL, if any.
1121 ///
1122 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1123 /// of a special URL, or percent encoded for non-special URLs.
1124 /// IPv6 addresses are given between `[` and `]` brackets.
1125 ///
1126 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1127 /// don’t have a host.
1128 ///
1129 /// See also the `host` method.
1130 ///
1131 /// # Examples
1132 ///
1133 /// ```
1134 /// use url::Url;
1135 /// # use url::ParseError;
1136 ///
1137 /// # fn run() -> Result<(), ParseError> {
1138 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1139 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1140 ///
1141 /// let url = Url::parse("ftp://rms@example.com")?;
1142 /// assert_eq!(url.host_str(), Some("example.com"));
1143 ///
1144 /// let url = Url::parse("unix:/run/foo.socket")?;
1145 /// assert_eq!(url.host_str(), None);
1146 ///
1147 /// let url = Url::parse("data:text/plain,Stuff")?;
1148 /// assert_eq!(url.host_str(), None);
1149 /// # Ok(())
1150 /// # }
1151 /// # run().unwrap();
1152 /// ```
1153 pub fn host_str(&self) -> Option<&str> {
1154 if self.has_host() {
1155 Some(self.slice(self.host_start..self.host_end))
1156 } else {
1157 None
1158 }
1159 }
1160
1161 /// Return the parsed representation of the host for this URL.
1162 /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
1163 /// of a special URL, or percent encoded for non-special URLs.
1164 ///
1165 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1166 /// don’t have a host.
1167 ///
1168 /// See also the `host_str` method.
1169 ///
1170 /// # Examples
1171 ///
1172 /// ```
1173 /// use url::Url;
1174 /// # use url::ParseError;
1175 ///
1176 /// # fn run() -> Result<(), ParseError> {
1177 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1178 /// assert!(url.host().is_some());
1179 ///
1180 /// let url = Url::parse("ftp://rms@example.com")?;
1181 /// assert!(url.host().is_some());
1182 ///
1183 /// let url = Url::parse("unix:/run/foo.socket")?;
1184 /// assert!(url.host().is_none());
1185 ///
1186 /// let url = Url::parse("data:text/plain,Stuff")?;
1187 /// assert!(url.host().is_none());
1188 /// # Ok(())
1189 /// # }
1190 /// # run().unwrap();
1191 /// ```
1192 pub fn host(&self) -> Option<Host<&str>> {
1193 match self.host {
1194 HostInternal::None => None,
1195 HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1196 HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1197 HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1198 }
1199 }
1200
1201 /// If this URL has a host and it is a domain name (not an IP address), return it.
1202 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1203 /// of a special URL, or percent encoded for non-special URLs.
1204 ///
1205 /// # Examples
1206 ///
1207 /// ```
1208 /// use url::Url;
1209 /// # use url::ParseError;
1210 ///
1211 /// # fn run() -> Result<(), ParseError> {
1212 /// let url = Url::parse("https://127.0.0.1/")?;
1213 /// assert_eq!(url.domain(), None);
1214 ///
1215 /// let url = Url::parse("mailto:rms@example.net")?;
1216 /// assert_eq!(url.domain(), None);
1217 ///
1218 /// let url = Url::parse("https://example.com/")?;
1219 /// assert_eq!(url.domain(), Some("example.com"));
1220 /// # Ok(())
1221 /// # }
1222 /// # run().unwrap();
1223 /// ```
1224 pub fn domain(&self) -> Option<&str> {
1225 match self.host {
1226 HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1227 _ => None,
1228 }
1229 }
1230
1231 /// Return the port number for this URL, if any.
1232 ///
1233 /// Note that default port numbers are never reflected by the serialization,
1234 /// use the `port_or_known_default()` method if you want a default port number returned.
1235 ///
1236 /// # Examples
1237 ///
1238 /// ```
1239 /// use url::Url;
1240 /// # use url::ParseError;
1241 ///
1242 /// # fn run() -> Result<(), ParseError> {
1243 /// let url = Url::parse("https://example.com")?;
1244 /// assert_eq!(url.port(), None);
1245 ///
1246 /// let url = Url::parse("https://example.com:443/")?;
1247 /// assert_eq!(url.port(), None);
1248 ///
1249 /// let url = Url::parse("ssh://example.com:22")?;
1250 /// assert_eq!(url.port(), Some(22));
1251 /// # Ok(())
1252 /// # }
1253 /// # run().unwrap();
1254 /// ```
1255 #[inline]
1256 pub fn port(&self) -> Option<u16> {
1257 self.port
1258 }
1259
1260 /// Return the port number for this URL, or the default port number if it is known.
1261 ///
1262 /// This method only knows the default port number
1263 /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1264 ///
1265 /// For URLs in these schemes, this method always returns `Some(_)`.
1266 /// For other schemes, it is the same as `Url::port()`.
1267 ///
1268 /// # Examples
1269 ///
1270 /// ```
1271 /// use url::Url;
1272 /// # use url::ParseError;
1273 ///
1274 /// # fn run() -> Result<(), ParseError> {
1275 /// let url = Url::parse("foo://example.com")?;
1276 /// assert_eq!(url.port_or_known_default(), None);
1277 ///
1278 /// let url = Url::parse("foo://example.com:1456")?;
1279 /// assert_eq!(url.port_or_known_default(), Some(1456));
1280 ///
1281 /// let url = Url::parse("https://example.com")?;
1282 /// assert_eq!(url.port_or_known_default(), Some(443));
1283 /// # Ok(())
1284 /// # }
1285 /// # run().unwrap();
1286 /// ```
1287 #[inline]
1288 pub fn port_or_known_default(&self) -> Option<u16> {
1289 self.port.or_else(|| parser::default_port(self.scheme()))
1290 }
1291
1292 /// Resolve a URL’s host and port number to `SocketAddr`.
1293 ///
1294 /// If the URL has the default port number of a scheme that is unknown to this library,
1295 /// `default_port_number` provides an opportunity to provide the actual port number.
1296 /// In non-example code this should be implemented either simply as `|| None`,
1297 /// or by matching on the URL’s `.scheme()`.
1298 ///
1299 /// If the host is a domain, it is resolved using the standard library’s DNS support.
1300 ///
1301 /// # Examples
1302 ///
1303 /// ```no_run
1304 /// let url = url::Url::parse("https://example.net/").unwrap();
1305 /// let addrs = url.socket_addrs(|| None).unwrap();
1306 /// std::net::TcpStream::connect(&*addrs)
1307 /// # ;
1308 /// ```
1309 ///
1310 /// ```
1311 /// /// With application-specific known default port numbers
1312 /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1313 /// url.socket_addrs(|| match url.scheme() {
1314 /// "socks5" | "socks5h" => Some(1080),
1315 /// _ => None,
1316 /// })
1317 /// }
1318 /// ```
1319 #[cfg(feature = "std")]
1320 #[cfg(any(
1321 unix,
1322 windows,
1323 target_os = "redox",
1324 target_os = "wasi",
1325 target_os = "hermit"
1326 ))]
1327 pub fn socket_addrs(
1328 &self,
1329 default_port_number: impl Fn() -> Option<u16>,
1330 ) -> io::Result<alloc::vec::Vec<SocketAddr>> {
1331 // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1332 // causes borrowck issues because the return value borrows `default_port_number`:
1333 //
1334 // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1335 //
1336 // > This RFC proposes that *all* type parameters are considered in scope
1337 // > for `impl Trait` in return position
1338
1339 fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1340 opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1341 }
1342
1343 let host = io_result(self.host(), "No host name in the URL")?;
1344 let port = io_result(
1345 self.port_or_known_default().or_else(default_port_number),
1346 "No port number in the URL",
1347 )?;
1348 Ok(match host {
1349 Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1350 Host::Ipv4(ip) => vec![(ip, port).into()],
1351 Host::Ipv6(ip) => vec![(ip, port).into()],
1352 })
1353 }
1354
1355 /// Return the path for this URL, as a percent-encoded ASCII string.
1356 /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1357 /// For other URLs, this starts with a '/' slash
1358 /// and continues with slash-separated path segments.
1359 ///
1360 /// # Examples
1361 ///
1362 /// ```rust
1363 /// use url::{Url, ParseError};
1364 ///
1365 /// # fn run() -> Result<(), ParseError> {
1366 /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1367 /// assert_eq!(url.path(), "/api/versions");
1368 ///
1369 /// let url = Url::parse("https://example.com")?;
1370 /// assert_eq!(url.path(), "/");
1371 ///
1372 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1373 /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1374 /// # Ok(())
1375 /// # }
1376 /// # run().unwrap();
1377 /// ```
1378 pub fn path(&self) -> &str {
1379 match (self.query_start, self.fragment_start) {
1380 (None, None) => self.slice(self.path_start..),
1381 (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1382 self.slice(self.path_start..next_component_start)
1383 }
1384 }
1385 }
1386
1387 /// Unless this URL is cannot-be-a-base,
1388 /// return an iterator of '/' slash-separated path segments,
1389 /// each as a percent-encoded ASCII string.
1390 ///
1391 /// Return `None` for cannot-be-a-base URLs.
1392 ///
1393 /// When `Some` is returned, the iterator always contains at least one string
1394 /// (which may be empty).
1395 ///
1396 /// # Examples
1397 ///
1398 /// ```
1399 /// use url::Url;
1400 ///
1401 /// # #[cfg(feature = "std")]
1402 /// # use std::error::Error;
1403 /// # #[cfg(not(feature = "std"))]
1404 /// # use core::error::Error;
1405 ///
1406 /// # fn run() -> Result<(), Box<dyn Error>> {
1407 /// let url = Url::parse("https://example.com/foo/bar")?;
1408 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1409 /// assert_eq!(path_segments.next(), Some("foo"));
1410 /// assert_eq!(path_segments.next(), Some("bar"));
1411 /// assert_eq!(path_segments.next(), None);
1412 ///
1413 /// let url = Url::parse("https://example.com")?;
1414 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1415 /// assert_eq!(path_segments.next(), Some(""));
1416 /// assert_eq!(path_segments.next(), None);
1417 ///
1418 /// let url = Url::parse("data:text/plain,HelloWorld")?;
1419 /// assert!(url.path_segments().is_none());
1420 ///
1421 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1422 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1423 /// assert_eq!(path_segments.next(), Some("countries"));
1424 /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1425 /// # Ok(())
1426 /// # }
1427 /// # run().unwrap();
1428 /// ```
1429 pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1430 let path = self.path();
1431 path.strip_prefix('/').map(|remainder| remainder.split('/'))
1432 }
1433
1434 /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1435 ///
1436 /// # Examples
1437 ///
1438 /// ```rust
1439 /// use url::Url;
1440 /// # use url::ParseError;
1441 ///
1442 /// fn run() -> Result<(), ParseError> {
1443 /// let url = Url::parse("https://example.com/products?page=2")?;
1444 /// let query = url.query();
1445 /// assert_eq!(query, Some("page=2"));
1446 ///
1447 /// let url = Url::parse("https://example.com/products")?;
1448 /// let query = url.query();
1449 /// assert!(query.is_none());
1450 ///
1451 /// let url = Url::parse("https://example.com/?country=español")?;
1452 /// let query = url.query();
1453 /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1454 /// # Ok(())
1455 /// # }
1456 /// # run().unwrap();
1457 /// ```
1458 pub fn query(&self) -> Option<&str> {
1459 match (self.query_start, self.fragment_start) {
1460 (None, _) => None,
1461 (Some(query_start), None) => {
1462 debug_assert!(self.byte_at(query_start) == b'?');
1463 Some(self.slice(query_start + 1..))
1464 }
1465 (Some(query_start), Some(fragment_start)) => {
1466 debug_assert!(self.byte_at(query_start) == b'?');
1467 Some(self.slice(query_start + 1..fragment_start))
1468 }
1469 }
1470 }
1471
1472 /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1473 /// and return an iterator of (key, value) pairs.
1474 ///
1475 /// # Examples
1476 ///
1477 /// ```rust
1478 /// use std::borrow::Cow;
1479 ///
1480 /// use url::Url;
1481 /// # use url::ParseError;
1482 ///
1483 /// # fn run() -> Result<(), ParseError> {
1484 /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1485 /// let mut pairs = url.query_pairs();
1486 ///
1487 /// assert_eq!(pairs.count(), 2);
1488 ///
1489 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1490 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1491 /// # Ok(())
1492 /// # }
1493 /// # run().unwrap();
1494 /// ```
1495
1496 #[inline]
1497 pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1498 form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1499 }
1500
1501 /// Return this URL’s fragment identifier, if any.
1502 ///
1503 /// A fragment is the part of the URL after the `#` symbol.
1504 /// The fragment is optional and, if present, contains a fragment identifier
1505 /// that identifies a secondary resource, such as a section heading
1506 /// of a document.
1507 ///
1508 /// In HTML, the fragment identifier is usually the id attribute of a an element
1509 /// that is scrolled to on load. Browsers typically will not send the fragment portion
1510 /// of a URL to the server.
1511 ///
1512 /// **Note:** the parser did *not* percent-encode this component,
1513 /// but the input may have been percent-encoded already.
1514 ///
1515 /// # Examples
1516 ///
1517 /// ```rust
1518 /// use url::Url;
1519 /// # use url::ParseError;
1520 ///
1521 /// # fn run() -> Result<(), ParseError> {
1522 /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1523 ///
1524 /// assert_eq!(url.fragment(), Some("row=4"));
1525 ///
1526 /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1527 ///
1528 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1529 /// # Ok(())
1530 /// # }
1531 /// # run().unwrap();
1532 /// ```
1533 pub fn fragment(&self) -> Option<&str> {
1534 self.fragment_start.map(|start| {
1535 debug_assert!(self.byte_at(start) == b'#');
1536 self.slice(start + 1..)
1537 })
1538 }
1539
1540 fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1541 let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1542 let result = f(&mut parser);
1543 self.serialization = parser.serialization;
1544 result
1545 }
1546
1547 /// Change this URL’s fragment identifier.
1548 ///
1549 /// # Examples
1550 ///
1551 /// ```rust
1552 /// use url::Url;
1553 /// # use url::ParseError;
1554 ///
1555 /// # fn run() -> Result<(), ParseError> {
1556 /// let mut url = Url::parse("https://example.com/data.csv")?;
1557 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1558
1559 /// url.set_fragment(Some("cell=4,1-6,2"));
1560 /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1561 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1562 ///
1563 /// url.set_fragment(None);
1564 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1565 /// assert!(url.fragment().is_none());
1566 /// # Ok(())
1567 /// # }
1568 /// # run().unwrap();
1569 /// ```
1570 pub fn set_fragment(&mut self, fragment: Option<&str>) {
1571 // Remove any previous fragment
1572 if let Some(start) = self.fragment_start {
1573 debug_assert!(self.byte_at(start) == b'#');
1574 self.serialization.truncate(start as usize);
1575 }
1576 // Write the new one
1577 if let Some(input) = fragment {
1578 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1579 self.serialization.push('#');
1580 self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input)))
1581 } else {
1582 self.fragment_start = None;
1583 self.strip_trailing_spaces_from_opaque_path();
1584 }
1585 }
1586
1587 fn take_fragment(&mut self) -> Option<String> {
1588 self.fragment_start.take().map(|start| {
1589 debug_assert!(self.byte_at(start) == b'#');
1590 let fragment = self.slice(start + 1..).to_owned();
1591 self.serialization.truncate(start as usize);
1592 fragment
1593 })
1594 }
1595
1596 fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1597 if let Some(ref fragment) = fragment {
1598 assert!(self.fragment_start.is_none());
1599 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1600 self.serialization.push('#');
1601 self.serialization.push_str(fragment);
1602 }
1603 }
1604
1605 /// Change this URL’s query string. If `query` is `None`, this URL's
1606 /// query string will be cleared.
1607 ///
1608 /// # Examples
1609 ///
1610 /// ```rust
1611 /// use url::Url;
1612 /// # use url::ParseError;
1613 ///
1614 /// # fn run() -> Result<(), ParseError> {
1615 /// let mut url = Url::parse("https://example.com/products")?;
1616 /// assert_eq!(url.as_str(), "https://example.com/products");
1617 ///
1618 /// url.set_query(Some("page=2"));
1619 /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1620 /// assert_eq!(url.query(), Some("page=2"));
1621 /// # Ok(())
1622 /// # }
1623 /// # run().unwrap();
1624 /// ```
1625 pub fn set_query(&mut self, query: Option<&str>) {
1626 let fragment = self.take_fragment();
1627
1628 // Remove any previous query
1629 if let Some(start) = self.query_start.take() {
1630 debug_assert!(self.byte_at(start) == b'?');
1631 self.serialization.truncate(start as usize);
1632 }
1633 // Write the new query, if any
1634 if let Some(input) = query {
1635 self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1636 self.serialization.push('?');
1637 let scheme_type = SchemeType::from(self.scheme());
1638 let scheme_end = self.scheme_end;
1639 self.mutate(|parser| {
1640 let vfn = parser.violation_fn;
1641 parser.parse_query(
1642 scheme_type,
1643 scheme_end,
1644 parser::Input::new_trim_tab_and_newlines(input, vfn),
1645 )
1646 });
1647 } else {
1648 self.query_start = None;
1649 if fragment.is_none() {
1650 self.strip_trailing_spaces_from_opaque_path();
1651 }
1652 }
1653
1654 self.restore_already_parsed_fragment(fragment);
1655 }
1656
1657 /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1658 /// in `application/x-www-form-urlencoded` syntax.
1659 ///
1660 /// The return value has a method-chaining API:
1661 ///
1662 /// ```rust
1663 /// # use url::{Url, ParseError};
1664 ///
1665 /// # fn run() -> Result<(), ParseError> {
1666 /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1667 /// assert_eq!(url.query(), Some("lang=fr"));
1668 ///
1669 /// url.query_pairs_mut().append_pair("foo", "bar");
1670 /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1671 /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1672 ///
1673 /// url.query_pairs_mut()
1674 /// .clear()
1675 /// .append_pair("foo", "bar & baz")
1676 /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1677 /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1678 /// assert_eq!(url.as_str(),
1679 /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1680 /// # Ok(())
1681 /// # }
1682 /// # run().unwrap();
1683 /// ```
1684 ///
1685 /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1686 /// not `url.set_query(None)`.
1687 ///
1688 /// The state of `Url` is unspecified if this return value is leaked without being dropped.
1689 pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1690 let fragment = self.take_fragment();
1691
1692 let query_start;
1693 if let Some(start) = self.query_start {
1694 debug_assert!(self.byte_at(start) == b'?');
1695 query_start = start as usize;
1696 } else {
1697 query_start = self.serialization.len();
1698 self.query_start = Some(to_u32(query_start).unwrap());
1699 self.serialization.push('?');
1700 }
1701
1702 let query = UrlQuery {
1703 url: Some(self),
1704 fragment,
1705 };
1706 form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1707 }
1708
1709 fn take_after_path(&mut self) -> String {
1710 match (self.query_start, self.fragment_start) {
1711 (Some(i), _) | (None, Some(i)) => {
1712 let after_path = self.slice(i..).to_owned();
1713 self.serialization.truncate(i as usize);
1714 after_path
1715 }
1716 (None, None) => String::new(),
1717 }
1718 }
1719
1720 /// Change this URL’s path.
1721 ///
1722 /// # Examples
1723 ///
1724 /// ```rust
1725 /// use url::Url;
1726 /// # use url::ParseError;
1727 ///
1728 /// # fn run() -> Result<(), ParseError> {
1729 /// let mut url = Url::parse("https://example.com")?;
1730 /// url.set_path("api/comments");
1731 /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1732 /// assert_eq!(url.path(), "/api/comments");
1733 ///
1734 /// let mut url = Url::parse("https://example.com/api")?;
1735 /// url.set_path("data/report.csv");
1736 /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1737 /// assert_eq!(url.path(), "/data/report.csv");
1738 ///
1739 /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1740 /// let mut url = Url::parse("https://example.com")?;
1741 /// url.set_path("api/some comments");
1742 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1743 /// assert_eq!(url.path(), "/api/some%20comments");
1744 ///
1745 /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1746 /// let mut url = Url::parse("https://example.com")?;
1747 /// url.set_path("api/some%20comments");
1748 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1749 /// assert_eq!(url.path(), "/api/some%20comments");
1750 ///
1751 /// # Ok(())
1752 /// # }
1753 /// # run().unwrap();
1754 /// ```
1755 pub fn set_path(&mut self, mut path: &str) {
1756 let after_path = self.take_after_path();
1757 let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1758 let cannot_be_a_base = self.cannot_be_a_base();
1759 let scheme_type = SchemeType::from(self.scheme());
1760 self.serialization.truncate(self.path_start as usize);
1761 self.mutate(|parser| {
1762 if cannot_be_a_base {
1763 if path.starts_with('/') {
1764 parser.serialization.push_str("%2F");
1765 path = &path[1..];
1766 }
1767 parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
1768 } else {
1769 let mut has_host = true; // FIXME
1770 parser.parse_path_start(
1771 scheme_type,
1772 &mut has_host,
1773 parser::Input::new_no_trim(path),
1774 );
1775 }
1776 });
1777 self.restore_after_path(old_after_path_pos, &after_path);
1778 }
1779
1780 /// Return an object with methods to manipulate this URL’s path segments.
1781 ///
1782 /// Return `Err(())` if this URL is cannot-be-a-base.
1783 #[allow(clippy::result_unit_err)]
1784 pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1785 if self.cannot_be_a_base() {
1786 Err(())
1787 } else {
1788 Ok(path_segments::new(self))
1789 }
1790 }
1791
1792 fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1793 let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1794 let adjust = |index: &mut u32| {
1795 *index -= old_after_path_position;
1796 *index += new_after_path_position;
1797 };
1798 if let Some(ref mut index) = self.query_start {
1799 adjust(index)
1800 }
1801 if let Some(ref mut index) = self.fragment_start {
1802 adjust(index)
1803 }
1804 self.serialization.push_str(after_path)
1805 }
1806
1807 /// Change this URL’s port number.
1808 ///
1809 /// Note that default port numbers are not reflected in the serialization.
1810 ///
1811 /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1812 /// do nothing and return `Err`.
1813 ///
1814 /// # Examples
1815 ///
1816 /// ```
1817 /// use url::Url;
1818 ///
1819 /// # #[cfg(feature = "std")]
1820 /// # use std::error::Error;
1821 /// # #[cfg(not(feature = "std"))]
1822 /// # use core::error::Error;
1823 ///
1824 /// # fn run() -> Result<(), Box<dyn Error>> {
1825 /// let mut url = Url::parse("ssh://example.net:2048/")?;
1826 ///
1827 /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1828 /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1829 ///
1830 /// url.set_port(None).map_err(|_| "cannot be base")?;
1831 /// assert_eq!(url.as_str(), "ssh://example.net/");
1832 /// # Ok(())
1833 /// # }
1834 /// # run().unwrap();
1835 /// ```
1836 ///
1837 /// Known default port numbers are not reflected:
1838 ///
1839 /// ```rust
1840 /// use url::Url;
1841 ///
1842 /// # #[cfg(feature = "std")]
1843 /// # use std::error::Error;
1844 /// # #[cfg(not(feature = "std"))]
1845 /// # use core::error::Error;
1846 ///
1847 /// # fn run() -> Result<(), Box<dyn Error>> {
1848 /// let mut url = Url::parse("https://example.org/")?;
1849 ///
1850 /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1851 /// assert!(url.port().is_none());
1852 /// # Ok(())
1853 /// # }
1854 /// # run().unwrap();
1855 /// ```
1856 ///
1857 /// Cannot set port for cannot-be-a-base URLs:
1858 ///
1859 /// ```
1860 /// use url::Url;
1861 /// # use url::ParseError;
1862 ///
1863 /// # fn run() -> Result<(), ParseError> {
1864 /// let mut url = Url::parse("mailto:rms@example.net")?;
1865 ///
1866 /// let result = url.set_port(Some(80));
1867 /// assert!(result.is_err());
1868 ///
1869 /// let result = url.set_port(None);
1870 /// assert!(result.is_err());
1871 /// # Ok(())
1872 /// # }
1873 /// # run().unwrap();
1874 /// ```
1875 #[allow(clippy::result_unit_err)]
1876 pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1877 // has_host implies !cannot_be_a_base
1878 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1879 return Err(());
1880 }
1881 if port.is_some() && port == parser::default_port(self.scheme()) {
1882 port = None
1883 }
1884 self.set_port_internal(port);
1885 Ok(())
1886 }
1887
1888 fn set_port_internal(&mut self, port: Option<u16>) {
1889 match (self.port, port) {
1890 (None, None) => {}
1891 (Some(_), None) => {
1892 self.serialization
1893 .drain(self.host_end as usize..self.path_start as usize);
1894 let offset = self.path_start - self.host_end;
1895 self.path_start = self.host_end;
1896 if let Some(ref mut index) = self.query_start {
1897 *index -= offset
1898 }
1899 if let Some(ref mut index) = self.fragment_start {
1900 *index -= offset
1901 }
1902 }
1903 (Some(old), Some(new)) if old == new => {}
1904 (_, Some(new)) => {
1905 let path_and_after = self.slice(self.path_start..).to_owned();
1906 self.serialization.truncate(self.host_end as usize);
1907 write!(&mut self.serialization, ":{}", new).unwrap();
1908 let old_path_start = self.path_start;
1909 let new_path_start = to_u32(self.serialization.len()).unwrap();
1910 self.path_start = new_path_start;
1911 let adjust = |index: &mut u32| {
1912 *index -= old_path_start;
1913 *index += new_path_start;
1914 };
1915 if let Some(ref mut index) = self.query_start {
1916 adjust(index)
1917 }
1918 if let Some(ref mut index) = self.fragment_start {
1919 adjust(index)
1920 }
1921 self.serialization.push_str(&path_and_after);
1922 }
1923 }
1924 self.port = port;
1925 }
1926
1927 /// Change this URL’s host.
1928 ///
1929 /// Removing the host (calling this with `None`)
1930 /// will also remove any username, password, and port number.
1931 ///
1932 /// # Examples
1933 ///
1934 /// Change host:
1935 ///
1936 /// ```
1937 /// use url::Url;
1938 /// # use url::ParseError;
1939 ///
1940 /// # fn run() -> Result<(), ParseError> {
1941 /// let mut url = Url::parse("https://example.net")?;
1942 /// let result = url.set_host(Some("rust-lang.org"));
1943 /// assert!(result.is_ok());
1944 /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1945 /// # Ok(())
1946 /// # }
1947 /// # run().unwrap();
1948 /// ```
1949 ///
1950 /// Remove host:
1951 ///
1952 /// ```
1953 /// use url::Url;
1954 /// # use url::ParseError;
1955 ///
1956 /// # fn run() -> Result<(), ParseError> {
1957 /// let mut url = Url::parse("foo://example.net")?;
1958 /// let result = url.set_host(None);
1959 /// assert!(result.is_ok());
1960 /// assert_eq!(url.as_str(), "foo:/");
1961 /// # Ok(())
1962 /// # }
1963 /// # run().unwrap();
1964 /// ```
1965 ///
1966 /// Cannot remove host for 'special' schemes (e.g. `http`):
1967 ///
1968 /// ```
1969 /// use url::Url;
1970 /// # use url::ParseError;
1971 ///
1972 /// # fn run() -> Result<(), ParseError> {
1973 /// let mut url = Url::parse("https://example.net")?;
1974 /// let result = url.set_host(None);
1975 /// assert!(result.is_err());
1976 /// assert_eq!(url.as_str(), "https://example.net/");
1977 /// # Ok(())
1978 /// # }
1979 /// # run().unwrap();
1980 /// ```
1981 ///
1982 /// Cannot change or remove host for cannot-be-a-base URLs:
1983 ///
1984 /// ```
1985 /// use url::Url;
1986 /// # use url::ParseError;
1987 ///
1988 /// # fn run() -> Result<(), ParseError> {
1989 /// let mut url = Url::parse("mailto:rms@example.net")?;
1990 ///
1991 /// let result = url.set_host(Some("rust-lang.org"));
1992 /// assert!(result.is_err());
1993 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1994 ///
1995 /// let result = url.set_host(None);
1996 /// assert!(result.is_err());
1997 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1998 /// # Ok(())
1999 /// # }
2000 /// # run().unwrap();
2001 /// ```
2002 ///
2003 /// # Errors
2004 ///
2005 /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
2006 /// a [`ParseError`] variant will be returned.
2007 ///
2008 /// [`ParseError`]: enum.ParseError.html
2009 pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
2010 if self.cannot_be_a_base() {
2011 return Err(ParseError::SetHostOnCannotBeABaseUrl);
2012 }
2013
2014 let scheme_type = SchemeType::from(self.scheme());
2015
2016 if let Some(host) = host {
2017 if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
2018 return Err(ParseError::EmptyHost);
2019 }
2020 let mut host_substr = host;
2021 // Otherwise, if c is U+003A (:) and the [] flag is unset, then
2022 if !host.starts_with('[') || !host.ends_with(']') {
2023 match host.find(':') {
2024 Some(0) => {
2025 // If buffer is the empty string, validation error, return failure.
2026 return Err(ParseError::InvalidDomainCharacter);
2027 }
2028 // Let host be the result of host parsing buffer
2029 Some(colon_index) => {
2030 host_substr = &host[..colon_index];
2031 }
2032 None => {}
2033 }
2034 }
2035 if SchemeType::from(self.scheme()).is_special() {
2036 self.set_host_internal(Host::parse(host_substr)?, None);
2037 } else {
2038 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
2039 }
2040 } else if self.has_host() {
2041 if scheme_type.is_special() && !scheme_type.is_file() {
2042 return Err(ParseError::EmptyHost);
2043 } else if self.serialization.len() == self.path_start as usize {
2044 self.serialization.push('/');
2045 }
2046 debug_assert!(self.byte_at(self.scheme_end) == b':');
2047 debug_assert!(self.byte_at(self.path_start) == b'/');
2048
2049 let new_path_start = if scheme_type.is_file() {
2050 self.scheme_end + 3
2051 } else {
2052 self.scheme_end + 1
2053 };
2054
2055 self.serialization
2056 .drain(new_path_start as usize..self.path_start as usize);
2057 let offset = self.path_start - new_path_start;
2058 self.path_start = new_path_start;
2059 self.username_end = new_path_start;
2060 self.host_start = new_path_start;
2061 self.host_end = new_path_start;
2062 self.port = None;
2063 if let Some(ref mut index) = self.query_start {
2064 *index -= offset
2065 }
2066 if let Some(ref mut index) = self.fragment_start {
2067 *index -= offset
2068 }
2069 }
2070 Ok(())
2071 }
2072
2073 /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
2074 fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
2075 let old_suffix_pos = if opt_new_port.is_some() {
2076 self.path_start
2077 } else {
2078 self.host_end
2079 };
2080 let suffix = self.slice(old_suffix_pos..).to_owned();
2081 self.serialization.truncate(self.host_start as usize);
2082 if !self.has_authority() {
2083 debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
2084 debug_assert!(self.username_end == self.host_start);
2085 self.serialization.push('/');
2086 self.serialization.push('/');
2087 self.username_end += 2;
2088 self.host_start += 2;
2089 }
2090 write!(&mut self.serialization, "{}", host).unwrap();
2091 self.host_end = to_u32(self.serialization.len()).unwrap();
2092 self.host = host.into();
2093
2094 if let Some(new_port) = opt_new_port {
2095 self.port = new_port;
2096 if let Some(port) = new_port {
2097 write!(&mut self.serialization, ":{}", port).unwrap();
2098 }
2099 }
2100 let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
2101 self.serialization.push_str(&suffix);
2102
2103 let adjust = |index: &mut u32| {
2104 *index -= old_suffix_pos;
2105 *index += new_suffix_pos;
2106 };
2107 adjust(&mut self.path_start);
2108 if let Some(ref mut index) = self.query_start {
2109 adjust(index)
2110 }
2111 if let Some(ref mut index) = self.fragment_start {
2112 adjust(index)
2113 }
2114 }
2115
2116 /// Change this URL’s host to the given IP address.
2117 ///
2118 /// If this URL is cannot-be-a-base, do nothing and return `Err`.
2119 ///
2120 /// Compared to `Url::set_host`, this skips the host parser.
2121 ///
2122 /// # Examples
2123 ///
2124 /// ```rust
2125 /// use url::{Url, ParseError};
2126 ///
2127 /// # fn run() -> Result<(), ParseError> {
2128 /// let mut url = Url::parse("http://example.com")?;
2129 /// url.set_ip_host("127.0.0.1".parse().unwrap());
2130 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
2131 /// assert_eq!(url.as_str(), "http://127.0.0.1/");
2132 /// # Ok(())
2133 /// # }
2134 /// # run().unwrap();
2135 /// ```
2136 ///
2137 /// Cannot change URL's from mailto(cannot-be-base) to ip:
2138 ///
2139 /// ```rust
2140 /// use url::{Url, ParseError};
2141 ///
2142 /// # fn run() -> Result<(), ParseError> {
2143 /// let mut url = Url::parse("mailto:rms@example.com")?;
2144 /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
2145 ///
2146 /// assert_eq!(url.as_str(), "mailto:rms@example.com");
2147 /// assert!(result.is_err());
2148 /// # Ok(())
2149 /// # }
2150 /// # run().unwrap();
2151 /// ```
2152 ///
2153 #[allow(clippy::result_unit_err)]
2154 pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
2155 if self.cannot_be_a_base() {
2156 return Err(());
2157 }
2158
2159 let address = match address {
2160 IpAddr::V4(address) => Host::Ipv4(address),
2161 IpAddr::V6(address) => Host::Ipv6(address),
2162 };
2163 self.set_host_internal(address, None);
2164 Ok(())
2165 }
2166
2167 /// Change this URL’s password.
2168 ///
2169 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2170 ///
2171 /// # Examples
2172 ///
2173 /// ```rust
2174 /// use url::{Url, ParseError};
2175 ///
2176 /// # fn run() -> Result<(), ParseError> {
2177 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2178 /// let result = url.set_password(Some("secret_password"));
2179 /// assert!(result.is_err());
2180 ///
2181 /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
2182 /// let result = url.set_password(Some("secret_password"));
2183 /// assert_eq!(url.password(), Some("secret_password"));
2184 ///
2185 /// let mut url = Url::parse("ftp://user2:@example.com")?;
2186 /// let result = url.set_password(Some("secret2"));
2187 /// assert!(result.is_ok());
2188 /// assert_eq!(url.password(), Some("secret2"));
2189 /// # Ok(())
2190 /// # }
2191 /// # run().unwrap();
2192 /// ```
2193 #[allow(clippy::result_unit_err)]
2194 pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
2195 // has_host implies !cannot_be_a_base
2196 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2197 return Err(());
2198 }
2199 let password = password.unwrap_or_default();
2200 if !password.is_empty() {
2201 let host_and_after = self.slice(self.host_start..).to_owned();
2202 self.serialization.truncate(self.username_end as usize);
2203 self.serialization.push(':');
2204 self.serialization
2205 .extend(utf8_percent_encode(password, USERINFO));
2206 self.serialization.push('@');
2207
2208 let old_host_start = self.host_start;
2209 let new_host_start = to_u32(self.serialization.len()).unwrap();
2210 let adjust = |index: &mut u32| {
2211 *index -= old_host_start;
2212 *index += new_host_start;
2213 };
2214 self.host_start = new_host_start;
2215 adjust(&mut self.host_end);
2216 adjust(&mut self.path_start);
2217 if let Some(ref mut index) = self.query_start {
2218 adjust(index)
2219 }
2220 if let Some(ref mut index) = self.fragment_start {
2221 adjust(index)
2222 }
2223
2224 self.serialization.push_str(&host_and_after);
2225 } else if self.byte_at(self.username_end) == b':' {
2226 // If there is a password to remove
2227 let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2228 debug_assert!(has_username_or_password);
2229 let username_start = self.scheme_end + 3;
2230 let empty_username = username_start == self.username_end;
2231 let start = self.username_end; // Remove the ':'
2232 let end = if empty_username {
2233 self.host_start // Remove the '@' as well
2234 } else {
2235 self.host_start - 1 // Keep the '@' to separate the username from the host
2236 };
2237 self.serialization.drain(start as usize..end as usize);
2238 let offset = end - start;
2239 self.host_start -= offset;
2240 self.host_end -= offset;
2241 self.path_start -= offset;
2242 if let Some(ref mut index) = self.query_start {
2243 *index -= offset
2244 }
2245 if let Some(ref mut index) = self.fragment_start {
2246 *index -= offset
2247 }
2248 }
2249 Ok(())
2250 }
2251
2252 /// Change this URL’s username.
2253 ///
2254 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2255 /// # Examples
2256 ///
2257 /// Cannot setup username from mailto(cannot-be-base)
2258 ///
2259 /// ```rust
2260 /// use url::{Url, ParseError};
2261 ///
2262 /// # fn run() -> Result<(), ParseError> {
2263 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2264 /// let result = url.set_username("user1");
2265 /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2266 /// assert!(result.is_err());
2267 /// # Ok(())
2268 /// # }
2269 /// # run().unwrap();
2270 /// ```
2271 ///
2272 /// Setup username to user1
2273 ///
2274 /// ```rust
2275 /// use url::{Url, ParseError};
2276 ///
2277 /// # fn run() -> Result<(), ParseError> {
2278 /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2279 /// let result = url.set_username("user1");
2280 /// assert!(result.is_ok());
2281 /// assert_eq!(url.username(), "user1");
2282 /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2283 /// # Ok(())
2284 /// # }
2285 /// # run().unwrap();
2286 /// ```
2287 #[allow(clippy::result_unit_err)]
2288 pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2289 // has_host implies !cannot_be_a_base
2290 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2291 return Err(());
2292 }
2293 let username_start = self.scheme_end + 3;
2294 debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2295 if self.slice(username_start..self.username_end) == username {
2296 return Ok(());
2297 }
2298 let after_username = self.slice(self.username_end..).to_owned();
2299 self.serialization.truncate(username_start as usize);
2300 self.serialization
2301 .extend(utf8_percent_encode(username, USERINFO));
2302
2303 let mut removed_bytes = self.username_end;
2304 self.username_end = to_u32(self.serialization.len()).unwrap();
2305 let mut added_bytes = self.username_end;
2306
2307 let new_username_is_empty = self.username_end == username_start;
2308 match (new_username_is_empty, after_username.chars().next()) {
2309 (true, Some('@')) => {
2310 removed_bytes += 1;
2311 self.serialization.push_str(&after_username[1..]);
2312 }
2313 (false, Some('@')) | (_, Some(':')) | (true, _) => {
2314 self.serialization.push_str(&after_username);
2315 }
2316 (false, _) => {
2317 added_bytes += 1;
2318 self.serialization.push('@');
2319 self.serialization.push_str(&after_username);
2320 }
2321 }
2322
2323 let adjust = |index: &mut u32| {
2324 *index -= removed_bytes;
2325 *index += added_bytes;
2326 };
2327 adjust(&mut self.host_start);
2328 adjust(&mut self.host_end);
2329 adjust(&mut self.path_start);
2330 if let Some(ref mut index) = self.query_start {
2331 adjust(index)
2332 }
2333 if let Some(ref mut index) = self.fragment_start {
2334 adjust(index)
2335 }
2336 Ok(())
2337 }
2338
2339 /// Change this URL’s scheme.
2340 ///
2341 /// Do nothing and return `Err` under the following circumstances:
2342 ///
2343 /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2344 /// * If this URL is cannot-be-a-base and the new scheme is one of
2345 /// `http`, `https`, `ws`, `wss` or `ftp`
2346 /// * If either the old or new scheme is `http`, `https`, `ws`,
2347 /// `wss` or `ftp` and the other is not one of these
2348 /// * If the new scheme is `file` and this URL includes credentials
2349 /// or has a non-null port
2350 /// * If this URL's scheme is `file` and its host is empty or null
2351 ///
2352 /// See also [the URL specification's section on legal scheme state
2353 /// overrides](https://url.spec.whatwg.org/#scheme-state).
2354 ///
2355 /// # Examples
2356 ///
2357 /// Change the URL’s scheme from `https` to `http`:
2358 ///
2359 /// ```
2360 /// use url::Url;
2361 /// # use url::ParseError;
2362 ///
2363 /// # fn run() -> Result<(), ParseError> {
2364 /// let mut url = Url::parse("https://example.net")?;
2365 /// let result = url.set_scheme("http");
2366 /// assert_eq!(url.as_str(), "http://example.net/");
2367 /// assert!(result.is_ok());
2368 /// # Ok(())
2369 /// # }
2370 /// # run().unwrap();
2371 /// ```
2372 /// Change the URL’s scheme from `foo` to `bar`:
2373 ///
2374 /// ```
2375 /// use url::Url;
2376 /// # use url::ParseError;
2377 ///
2378 /// # fn run() -> Result<(), ParseError> {
2379 /// let mut url = Url::parse("foo://example.net")?;
2380 /// let result = url.set_scheme("bar");
2381 /// assert_eq!(url.as_str(), "bar://example.net");
2382 /// assert!(result.is_ok());
2383 /// # Ok(())
2384 /// # }
2385 /// # run().unwrap();
2386 /// ```
2387 ///
2388 /// Cannot change URL’s scheme from `https` to `foõ`:
2389 ///
2390 /// ```
2391 /// use url::Url;
2392 /// # use url::ParseError;
2393 ///
2394 /// # fn run() -> Result<(), ParseError> {
2395 /// let mut url = Url::parse("https://example.net")?;
2396 /// let result = url.set_scheme("foõ");
2397 /// assert_eq!(url.as_str(), "https://example.net/");
2398 /// assert!(result.is_err());
2399 /// # Ok(())
2400 /// # }
2401 /// # run().unwrap();
2402 /// ```
2403 ///
2404 /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2405 ///
2406 /// ```
2407 /// use url::Url;
2408 /// # use url::ParseError;
2409 ///
2410 /// # fn run() -> Result<(), ParseError> {
2411 /// let mut url = Url::parse("mailto:rms@example.net")?;
2412 /// let result = url.set_scheme("https");
2413 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2414 /// assert!(result.is_err());
2415 /// # Ok(())
2416 /// # }
2417 /// # run().unwrap();
2418 /// ```
2419 /// Cannot change the URL’s scheme from `foo` to `https`:
2420 ///
2421 /// ```
2422 /// use url::Url;
2423 /// # use url::ParseError;
2424 ///
2425 /// # fn run() -> Result<(), ParseError> {
2426 /// let mut url = Url::parse("foo://example.net")?;
2427 /// let result = url.set_scheme("https");
2428 /// assert_eq!(url.as_str(), "foo://example.net");
2429 /// assert!(result.is_err());
2430 /// # Ok(())
2431 /// # }
2432 /// # run().unwrap();
2433 /// ```
2434 /// Cannot change the URL’s scheme from `http` to `foo`:
2435 ///
2436 /// ```
2437 /// use url::Url;
2438 /// # use url::ParseError;
2439 ///
2440 /// # fn run() -> Result<(), ParseError> {
2441 /// let mut url = Url::parse("http://example.net")?;
2442 /// let result = url.set_scheme("foo");
2443 /// assert_eq!(url.as_str(), "http://example.net/");
2444 /// assert!(result.is_err());
2445 /// # Ok(())
2446 /// # }
2447 /// # run().unwrap();
2448 /// ```
2449 #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
2450 pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2451 let mut parser = Parser::for_setter(String::new());
2452 let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?;
2453 let new_scheme_type = SchemeType::from(&parser.serialization);
2454 let old_scheme_type = SchemeType::from(self.scheme());
2455 // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2456 if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2457 // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2458 (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2459 // If url includes credentials or has a non-null port, and buffer is "file", then return.
2460 // If url’s scheme is "file" and its host is an empty host or null, then return.
2461 (new_scheme_type.is_file() && self.has_authority())
2462 {
2463 return Err(());
2464 }
2465
2466 if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2467 return Err(());
2468 }
2469 let old_scheme_end = self.scheme_end;
2470 let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2471 let adjust = |index: &mut u32| {
2472 *index -= old_scheme_end;
2473 *index += new_scheme_end;
2474 };
2475
2476 self.scheme_end = new_scheme_end;
2477 adjust(&mut self.username_end);
2478 adjust(&mut self.host_start);
2479 adjust(&mut self.host_end);
2480 adjust(&mut self.path_start);
2481 if let Some(ref mut index) = self.query_start {
2482 adjust(index)
2483 }
2484 if let Some(ref mut index) = self.fragment_start {
2485 adjust(index)
2486 }
2487
2488 parser.serialization.push_str(self.slice(old_scheme_end..));
2489 self.serialization = parser.serialization;
2490
2491 // Update the port so it can be removed
2492 // If it is the scheme's default
2493 // we don't mind it silently failing
2494 // if there was no port in the first place
2495 let previous_port = self.port();
2496 let _ = self.set_port(previous_port);
2497
2498 Ok(())
2499 }
2500
2501 /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2502 ///
2503 /// This returns `Err` if the given path is not absolute or,
2504 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2505 ///
2506 /// # Examples
2507 ///
2508 /// On Unix-like platforms:
2509 ///
2510 /// ```
2511 /// # if cfg!(unix) {
2512 /// use url::Url;
2513 ///
2514 /// # fn run() -> Result<(), ()> {
2515 /// let url = Url::from_file_path("/tmp/foo.txt")?;
2516 /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2517 ///
2518 /// let url = Url::from_file_path("../foo.txt");
2519 /// assert!(url.is_err());
2520 ///
2521 /// let url = Url::from_file_path("https://google.com/");
2522 /// assert!(url.is_err());
2523 /// # Ok(())
2524 /// # }
2525 /// # run().unwrap();
2526 /// # }
2527 /// ```
2528 ///
2529 /// This method is only available if the `std` Cargo feature is enabled.
2530 #[cfg(all(
2531 feature = "std",
2532 any(
2533 unix,
2534 windows,
2535 target_os = "redox",
2536 target_os = "wasi",
2537 target_os = "hermit"
2538 )
2539 ))]
2540 #[allow(clippy::result_unit_err)]
2541 pub fn from_file_path<P: AsRef<std::path::Path>>(path: P) -> Result<Url, ()> {
2542 let mut serialization = "file://".to_owned();
2543 let host_start = serialization.len() as u32;
2544 let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2545 Ok(Url {
2546 serialization,
2547 scheme_end: "file".len() as u32,
2548 username_end: host_start,
2549 host_start,
2550 host_end,
2551 host,
2552 port: None,
2553 path_start: host_end,
2554 query_start: None,
2555 fragment_start: None,
2556 })
2557 }
2558
2559 /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2560 ///
2561 /// This returns `Err` if the given path is not absolute or,
2562 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2563 ///
2564 /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2565 /// so that the entire path is considered when using this URL as a base URL.
2566 ///
2567 /// For example:
2568 ///
2569 /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2570 /// as the base URL is `file:///var/www/index.html`
2571 /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2572 /// as the base URL is `file:///var/index.html`, which might not be what was intended.
2573 ///
2574 /// Note that `std::path` does not consider trailing slashes significant
2575 /// and usually does not include them (e.g. in `Path::parent()`).
2576 ///
2577 /// This method is only available if the `std` Cargo feature is enabled.
2578 #[cfg(all(
2579 feature = "std",
2580 any(
2581 unix,
2582 windows,
2583 target_os = "redox",
2584 target_os = "wasi",
2585 target_os = "hermit"
2586 )
2587 ))]
2588 #[allow(clippy::result_unit_err)]
2589 pub fn from_directory_path<P: AsRef<std::path::Path>>(path: P) -> Result<Url, ()> {
2590 let mut url = Url::from_file_path(path)?;
2591 if !url.serialization.ends_with('/') {
2592 url.serialization.push('/')
2593 }
2594 Ok(url)
2595 }
2596
2597 /// Serialize with Serde using the internal representation of the `Url` struct.
2598 ///
2599 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2600 /// for speed, compared to the `Deserialize` trait impl.
2601 ///
2602 /// This method is only available if the `serde` Cargo feature is enabled.
2603 #[cfg(feature = "serde")]
2604 #[deny(unused)]
2605 pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2606 where
2607 S: serde::Serializer,
2608 {
2609 use serde::Serialize;
2610 // Destructuring first lets us ensure that adding or removing fields forces this method
2611 // to be updated
2612 let Url {
2613 ref serialization,
2614 ref scheme_end,
2615 ref username_end,
2616 ref host_start,
2617 ref host_end,
2618 ref host,
2619 ref port,
2620 ref path_start,
2621 ref query_start,
2622 ref fragment_start,
2623 } = *self;
2624 (
2625 serialization,
2626 scheme_end,
2627 username_end,
2628 host_start,
2629 host_end,
2630 host,
2631 port,
2632 path_start,
2633 query_start,
2634 fragment_start,
2635 )
2636 .serialize(serializer)
2637 }
2638
2639 /// Serialize with Serde using the internal representation of the `Url` struct.
2640 ///
2641 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2642 /// for speed, compared to the `Deserialize` trait impl.
2643 ///
2644 /// This method is only available if the `serde` Cargo feature is enabled.
2645 #[cfg(feature = "serde")]
2646 #[deny(unused)]
2647 pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2648 where
2649 D: serde::Deserializer<'de>,
2650 {
2651 use serde::de::{Deserialize, Error};
2652 let (
2653 serialization,
2654 scheme_end,
2655 username_end,
2656 host_start,
2657 host_end,
2658 host,
2659 port,
2660 path_start,
2661 query_start,
2662 fragment_start,
2663 ) = Deserialize::deserialize(deserializer)?;
2664 let url = Url {
2665 serialization,
2666 scheme_end,
2667 username_end,
2668 host_start,
2669 host_end,
2670 host,
2671 port,
2672 path_start,
2673 query_start,
2674 fragment_start,
2675 };
2676 if cfg!(debug_assertions) {
2677 url.check_invariants()
2678 .map_err(|reason| Error::custom(reason))?
2679 }
2680 Ok(url)
2681 }
2682
2683 /// Assuming the URL is in the `file` scheme or similar,
2684 /// convert its path to an absolute `std::path::Path`.
2685 ///
2686 /// **Note:** This does not actually check the URL’s `scheme`,
2687 /// and may give nonsensical results for other schemes.
2688 /// It is the user’s responsibility to check the URL’s scheme before calling this.
2689 ///
2690 /// ```
2691 /// # use url::Url;
2692 /// # let url = Url::parse("file:///etc/passwd").unwrap();
2693 /// let path = url.to_file_path();
2694 /// ```
2695 ///
2696 /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2697 /// `file:` URLs may have a non-local host),
2698 /// or if `Path::new_opt()` returns `None`.
2699 /// (That is, if the percent-decoded path contains a NUL byte or,
2700 /// for a Windows path, is not UTF-8.)
2701 ///
2702 /// This method is only available if the `std` Cargo feature is enabled.
2703 #[inline]
2704 #[cfg(all(
2705 feature = "std",
2706 any(
2707 unix,
2708 windows,
2709 target_os = "redox",
2710 target_os = "wasi",
2711 target_os = "hermit"
2712 )
2713 ))]
2714 #[allow(clippy::result_unit_err)]
2715 pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2716 if let Some(segments) = self.path_segments() {
2717 let host = match self.host() {
2718 None | Some(Host::Domain("localhost")) => None,
2719 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2720 Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2721 }
2722 _ => return Err(()),
2723 };
2724
2725 return file_url_segments_to_pathbuf(host, segments);
2726 }
2727 Err(())
2728 }
2729
2730 // Private helper methods:
2731
2732 #[inline]
2733 fn slice<R>(&self, range: R) -> &str
2734 where
2735 R: RangeArg,
2736 {
2737 range.slice_of(&self.serialization)
2738 }
2739
2740 #[inline]
2741 fn byte_at(&self, i: u32) -> u8 {
2742 self.serialization.as_bytes()[i as usize]
2743 }
2744}
2745
2746/// Parse a string as an URL, without a base URL or encoding override.
2747impl str::FromStr for Url {
2748 type Err = ParseError;
2749
2750 #[inline]
2751 fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2752 Url::parse(input)
2753 }
2754}
2755
2756impl<'a> TryFrom<&'a str> for Url {
2757 type Error = ParseError;
2758
2759 fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2760 Url::parse(s)
2761 }
2762}
2763
2764/// Display the serialization of this URL.
2765impl fmt::Display for Url {
2766 #[inline]
2767 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2768 fmt::Display::fmt(&self.serialization, formatter)
2769 }
2770}
2771
2772/// String conversion.
2773impl From<Url> for String {
2774 fn from(value: Url) -> String {
2775 value.serialization
2776 }
2777}
2778
2779/// Debug the serialization of this URL.
2780impl fmt::Debug for Url {
2781 #[inline]
2782 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2783 formatter
2784 .debug_struct("Url")
2785 .field("scheme", &self.scheme())
2786 .field("cannot_be_a_base", &self.cannot_be_a_base())
2787 .field("username", &self.username())
2788 .field("password", &self.password())
2789 .field("host", &self.host())
2790 .field("port", &self.port())
2791 .field("path", &self.path())
2792 .field("query", &self.query())
2793 .field("fragment", &self.fragment())
2794 .finish()
2795 }
2796}
2797
2798/// URLs compare like their serialization.
2799impl Eq for Url {}
2800
2801/// URLs compare like their serialization.
2802impl PartialEq for Url {
2803 #[inline]
2804 fn eq(&self, other: &Self) -> bool {
2805 self.serialization == other.serialization
2806 }
2807}
2808
2809/// URLs compare like their serialization.
2810impl Ord for Url {
2811 #[inline]
2812 fn cmp(&self, other: &Self) -> cmp::Ordering {
2813 self.serialization.cmp(&other.serialization)
2814 }
2815}
2816
2817/// URLs compare like their serialization.
2818impl PartialOrd for Url {
2819 #[inline]
2820 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2821 Some(self.cmp(other))
2822 }
2823}
2824
2825/// URLs hash like their serialization.
2826impl hash::Hash for Url {
2827 #[inline]
2828 fn hash<H>(&self, state: &mut H)
2829 where
2830 H: hash::Hasher,
2831 {
2832 hash::Hash::hash(&self.serialization, state)
2833 }
2834}
2835
2836/// Return the serialization of this URL.
2837impl AsRef<str> for Url {
2838 #[inline]
2839 fn as_ref(&self) -> &str {
2840 &self.serialization
2841 }
2842}
2843
2844trait RangeArg {
2845 fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2846}
2847
2848impl RangeArg for Range<u32> {
2849 #[inline]
2850 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2851 &s[self.start as usize..self.end as usize]
2852 }
2853}
2854
2855impl RangeArg for RangeFrom<u32> {
2856 #[inline]
2857 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2858 &s[self.start as usize..]
2859 }
2860}
2861
2862impl RangeArg for RangeTo<u32> {
2863 #[inline]
2864 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2865 &s[..self.end as usize]
2866 }
2867}
2868
2869/// Serializes this URL into a `serde` stream.
2870///
2871/// This implementation is only available if the `serde` Cargo feature is enabled.
2872#[cfg(feature = "serde")]
2873impl serde::Serialize for Url {
2874 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2875 where
2876 S: serde::Serializer,
2877 {
2878 serializer.serialize_str(self.as_str())
2879 }
2880}
2881
2882/// Deserializes this URL from a `serde` stream.
2883///
2884/// This implementation is only available if the `serde` Cargo feature is enabled.
2885#[cfg(feature = "serde")]
2886impl<'de> serde::Deserialize<'de> for Url {
2887 fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2888 where
2889 D: serde::Deserializer<'de>,
2890 {
2891 use serde::de::{Error, Visitor};
2892
2893 struct UrlVisitor;
2894
2895 impl<'de> Visitor<'de> for UrlVisitor {
2896 type Value = Url;
2897
2898 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2899 formatter.write_str("a string representing an URL")
2900 }
2901
2902 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2903 where
2904 E: Error,
2905 {
2906 Url::parse(s).map_err(|err| Error::custom(format!("{}: {:?}", err, s)))
2907 }
2908 }
2909
2910 deserializer.deserialize_str(UrlVisitor)
2911 }
2912}
2913
2914#[cfg(all(
2915 feature = "std",
2916 any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
2917))]
2918fn path_to_file_url_segments(
2919 path: &Path,
2920 serialization: &mut String,
2921) -> Result<(u32, HostInternal), ()> {
2922 use parser::SPECIAL_PATH_SEGMENT;
2923 use percent_encoding::percent_encode;
2924 #[cfg(target_os = "hermit")]
2925 use std::os::hermit::ffi::OsStrExt;
2926 #[cfg(any(unix, target_os = "redox"))]
2927 use std::os::unix::prelude::OsStrExt;
2928 if !path.is_absolute() {
2929 return Err(());
2930 }
2931 let host_end = to_u32(serialization.len()).unwrap();
2932 let mut empty = true;
2933 // skip the root component
2934 for component in path.components().skip(1) {
2935 empty = false;
2936 serialization.push('/');
2937 #[cfg(not(target_os = "wasi"))]
2938 serialization.extend(percent_encode(
2939 component.as_os_str().as_bytes(),
2940 SPECIAL_PATH_SEGMENT,
2941 ));
2942 #[cfg(target_os = "wasi")]
2943 serialization.extend(percent_encode(
2944 component.as_os_str().to_string_lossy().as_bytes(),
2945 SPECIAL_PATH_SEGMENT,
2946 ));
2947 }
2948 if empty {
2949 // An URL’s path must not be empty.
2950 serialization.push('/');
2951 }
2952 Ok((host_end, HostInternal::None))
2953}
2954
2955#[cfg(all(feature = "std", windows))]
2956fn path_to_file_url_segments(
2957 path: &Path,
2958 serialization: &mut String,
2959) -> Result<(u32, HostInternal), ()> {
2960 path_to_file_url_segments_windows(path, serialization)
2961}
2962
2963// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2964#[cfg(feature = "std")]
2965#[cfg_attr(not(windows), allow(dead_code))]
2966fn path_to_file_url_segments_windows(
2967 path: &Path,
2968 serialization: &mut String,
2969) -> Result<(u32, HostInternal), ()> {
2970 use crate::parser::PATH_SEGMENT;
2971 use percent_encoding::percent_encode;
2972 use std::path::{Component, Prefix};
2973 if !path.is_absolute() {
2974 return Err(());
2975 }
2976 let mut components = path.components();
2977
2978 let host_start = serialization.len() + 1;
2979 let host_end;
2980 let host_internal;
2981
2982 match components.next() {
2983 Some(Component::Prefix(ref p)) => match p.kind() {
2984 Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2985 host_end = to_u32(serialization.len()).unwrap();
2986 host_internal = HostInternal::None;
2987 serialization.push('/');
2988 serialization.push(letter as char);
2989 serialization.push(':');
2990 }
2991 Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2992 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2993 write!(serialization, "{}", host).unwrap();
2994 host_end = to_u32(serialization.len()).unwrap();
2995 host_internal = host.into();
2996 serialization.push('/');
2997 let share = share.to_str().ok_or(())?;
2998 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2999 }
3000 _ => return Err(()),
3001 },
3002 _ => return Err(()),
3003 }
3004
3005 let mut path_only_has_prefix = true;
3006 for component in components {
3007 if component == Component::RootDir {
3008 continue;
3009 }
3010
3011 path_only_has_prefix = false;
3012 // FIXME: somehow work with non-unicode?
3013 let component = component.as_os_str().to_str().ok_or(())?;
3014
3015 serialization.push('/');
3016 serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
3017 }
3018
3019 // A windows drive letter must end with a slash.
3020 if serialization.len() > host_start
3021 && parser::is_windows_drive_letter(&serialization[host_start..])
3022 && path_only_has_prefix
3023 {
3024 serialization.push('/');
3025 }
3026
3027 Ok((host_end, host_internal))
3028}
3029
3030#[cfg(all(
3031 feature = "std",
3032 any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
3033))]
3034fn file_url_segments_to_pathbuf(
3035 host: Option<&str>,
3036 segments: str::Split<'_, char>,
3037) -> Result<PathBuf, ()> {
3038 use alloc::vec::Vec;
3039 use percent_encoding::percent_decode;
3040 #[cfg(not(target_os = "wasi"))]
3041 use std::ffi::OsStr;
3042 #[cfg(target_os = "hermit")]
3043 use std::os::hermit::ffi::OsStrExt;
3044 #[cfg(any(unix, target_os = "redox"))]
3045 use std::os::unix::prelude::OsStrExt;
3046 use std::path::PathBuf;
3047
3048 if host.is_some() {
3049 return Err(());
3050 }
3051
3052 let mut bytes = if cfg!(target_os = "redox") {
3053 b"file:".to_vec()
3054 } else {
3055 Vec::new()
3056 };
3057
3058 for segment in segments {
3059 bytes.push(b'/');
3060 bytes.extend(percent_decode(segment.as_bytes()));
3061 }
3062
3063 // A windows drive letter must end with a slash.
3064 if bytes.len() > 2
3065 && bytes[bytes.len() - 2].is_ascii_alphabetic()
3066 && matches!(bytes[bytes.len() - 1], b':' | b'|')
3067 {
3068 bytes.push(b'/');
3069 }
3070
3071 #[cfg(not(target_os = "wasi"))]
3072 let path = PathBuf::from(OsStr::from_bytes(&bytes));
3073 #[cfg(target_os = "wasi")]
3074 let path = String::from_utf8(bytes)
3075 .map(|path| PathBuf::from(path))
3076 .map_err(|_| ())?;
3077
3078 debug_assert!(
3079 path.is_absolute(),
3080 "to_file_path() failed to produce an absolute Path"
3081 );
3082
3083 Ok(path)
3084}
3085
3086#[cfg(all(feature = "std", windows))]
3087fn file_url_segments_to_pathbuf(
3088 host: Option<&str>,
3089 segments: str::Split<char>,
3090) -> Result<PathBuf, ()> {
3091 file_url_segments_to_pathbuf_windows(host, segments)
3092}
3093
3094// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
3095#[cfg(feature = "std")]
3096#[cfg_attr(not(windows), allow(dead_code))]
3097fn file_url_segments_to_pathbuf_windows(
3098 host: Option<&str>,
3099 mut segments: str::Split<'_, char>,
3100) -> Result<PathBuf, ()> {
3101 use percent_encoding::percent_decode;
3102 let mut string = if let Some(host) = host {
3103 r"\\".to_owned() + host
3104 } else {
3105 let first = segments.next().ok_or(())?;
3106
3107 match first.len() {
3108 2 => {
3109 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
3110 return Err(());
3111 }
3112
3113 first.to_owned()
3114 }
3115
3116 4 => {
3117 if !first.starts_with(parser::ascii_alpha) {
3118 return Err(());
3119 }
3120 let bytes = first.as_bytes();
3121 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
3122 return Err(());
3123 }
3124
3125 first[0..1].to_owned() + ":"
3126 }
3127
3128 _ => return Err(()),
3129 }
3130 };
3131
3132 for segment in segments {
3133 string.push('\\');
3134
3135 // Currently non-unicode windows paths cannot be represented
3136 match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
3137 Ok(s) => string.push_str(&s),
3138 Err(..) => return Err(()),
3139 }
3140 }
3141 let path = PathBuf::from(string);
3142 debug_assert!(
3143 path.is_absolute(),
3144 "to_file_path() failed to produce an absolute Path"
3145 );
3146 Ok(path)
3147}
3148
3149/// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
3150#[derive(Debug)]
3151pub struct UrlQuery<'a> {
3152 url: Option<&'a mut Url>,
3153 fragment: Option<String>,
3154}
3155
3156// `as_mut_string` string here exposes the internal serialization of an `Url`,
3157// which should not be exposed to users.
3158// We achieve that by not giving users direct access to `UrlQuery`:
3159// * Its fields are private
3160// (and so can not be constructed with struct literal syntax outside of this crate),
3161// * It has no constructor
3162// * It is only visible (on the type level) to users in the return type of
3163// `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
3164// * `Serializer` keeps its target in a private field
3165// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
3166impl<'a> form_urlencoded::Target for UrlQuery<'a> {
3167 fn as_mut_string(&mut self) -> &mut String {
3168 &mut self.url.as_mut().unwrap().serialization
3169 }
3170
3171 fn finish(mut self) -> &'a mut Url {
3172 let url = self.url.take().unwrap();
3173 url.restore_already_parsed_fragment(self.fragment.take());
3174 url
3175 }
3176
3177 type Finished = &'a mut Url;
3178}
3179
3180impl<'a> Drop for UrlQuery<'a> {
3181 fn drop(&mut self) {
3182 if let Some(url) = self.url.take() {
3183 url.restore_already_parsed_fragment(self.fragment.take())
3184 }
3185 }
3186}