clap_lex/
lib.rs

1//! Minimal, flexible command-line parser
2//!
3//! As opposed to a declarative parser, this processes arguments as a stream of tokens.  As lexing
4//! a command-line is not context-free, we rely on the caller to decide how to interpret the
5//! arguments.
6//!
7//! # Examples
8//!
9//! ```rust
10//! use std::path::PathBuf;
11//! use std::ffi::OsStr;
12//!
13//! type BoxedError = Box<dyn std::error::Error + Send + Sync>;
14//!
15//! #[derive(Debug)]
16//! struct Args {
17//!     paths: Vec<PathBuf>,
18//!     color: Color,
19//!     verbosity: usize,
20//! }
21//!
22//! #[derive(Debug)]
23//! enum Color {
24//!     Always,
25//!     Auto,
26//!     Never,
27//! }
28//!
29//! impl Color {
30//!     fn parse(s: Option<&OsStr>) -> Result<Self, BoxedError> {
31//!         let s = s.map(|s| s.to_str().ok_or(s));
32//!         match s {
33//!             Some(Ok("always")) | Some(Ok("")) | None => {
34//!                 Ok(Color::Always)
35//!             }
36//!             Some(Ok("auto")) => {
37//!                 Ok(Color::Auto)
38//!             }
39//!             Some(Ok("never")) => {
40//!                 Ok(Color::Never)
41//!             }
42//!             Some(invalid) => {
43//!                 Err(format!("Invalid value for `--color`, {invalid:?}").into())
44//!             }
45//!         }
46//!     }
47//! }
48//!
49//! fn parse_args(
50//!     raw: impl IntoIterator<Item=impl Into<std::ffi::OsString>>
51//! ) -> Result<Args, BoxedError> {
52//!     let mut args = Args {
53//!         paths: Vec::new(),
54//!         color: Color::Auto,
55//!         verbosity: 0,
56//!     };
57//!
58//!     let raw = clap_lex::RawArgs::new(raw);
59//!     let mut cursor = raw.cursor();
60//!     raw.next(&mut cursor);  // Skip the bin
61//!     while let Some(arg) = raw.next(&mut cursor) {
62//!         if arg.is_escape() {
63//!             args.paths.extend(raw.remaining(&mut cursor).map(PathBuf::from));
64//!         } else if arg.is_stdio() {
65//!             args.paths.push(PathBuf::from("-"));
66//!         } else if let Some((long, value)) = arg.to_long() {
67//!             match long {
68//!                 Ok("verbose") => {
69//!                     if let Some(value) = value {
70//!                         return Err(format!("`--verbose` does not take a value, got `{value:?}`").into());
71//!                     }
72//!                     args.verbosity += 1;
73//!                 }
74//!                 Ok("color") => {
75//!                     args.color = Color::parse(value)?;
76//!                 }
77//!                 _ => {
78//!                     return Err(
79//!                         format!("Unexpected flag: --{}", arg.display()).into()
80//!                     );
81//!                 }
82//!             }
83//!         } else if let Some(mut shorts) = arg.to_short() {
84//!             while let Some(short) = shorts.next_flag() {
85//!                 match short {
86//!                     Ok('v') => {
87//!                         args.verbosity += 1;
88//!                     }
89//!                     Ok('c') => {
90//!                         let value = shorts.next_value_os();
91//!                         args.color = Color::parse(value)?;
92//!                     }
93//!                     Ok(c) => {
94//!                         return Err(format!("Unexpected flag: -{c}").into());
95//!                     }
96//!                     Err(e) => {
97//!                         return Err(format!("Unexpected flag: -{}", e.to_string_lossy()).into());
98//!                     }
99//!                 }
100//!             }
101//!         } else {
102//!             args.paths.push(PathBuf::from(arg.to_value_os().to_owned()));
103//!         }
104//!     }
105//!
106//!     Ok(args)
107//! }
108//!
109//! let args = parse_args(["bin", "--hello", "world"]);
110//! println!("{args:?}");
111//! ```
112
113#![cfg_attr(docsrs, feature(doc_auto_cfg))]
114#![warn(missing_docs)]
115#![warn(clippy::print_stderr)]
116#![warn(clippy::print_stdout)]
117
118mod ext;
119
120use std::ffi::OsStr;
121use std::ffi::OsString;
122
123pub use std::io::SeekFrom;
124
125pub use ext::OsStrExt;
126
127/// Command-line arguments
128#[derive(Default, Clone, Debug, PartialEq, Eq)]
129pub struct RawArgs {
130    items: Vec<OsString>,
131}
132
133impl RawArgs {
134    //// Create an argument list to parse
135    ///
136    /// <div class="warning">
137    ///
138    /// **NOTE:** The argument returned will be the current binary.
139    ///
140    /// </div>
141    ///
142    /// # Example
143    ///
144    /// ```rust,no_run
145    /// # use std::path::PathBuf;
146    /// let raw = clap_lex::RawArgs::from_args();
147    /// let mut cursor = raw.cursor();
148    /// let _bin = raw.next_os(&mut cursor);
149    ///
150    /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
151    /// println!("{paths:?}");
152    /// ```
153    pub fn from_args() -> Self {
154        Self::new(std::env::args_os())
155    }
156
157    //// Create an argument list to parse
158    ///
159    /// # Example
160    ///
161    /// ```rust,no_run
162    /// # use std::path::PathBuf;
163    /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
164    /// let mut cursor = raw.cursor();
165    /// let _bin = raw.next_os(&mut cursor);
166    ///
167    /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
168    /// println!("{paths:?}");
169    /// ```
170    pub fn new(iter: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
171        let iter = iter.into_iter();
172        Self::from(iter)
173    }
174
175    /// Create a cursor for walking the arguments
176    ///
177    /// # Example
178    ///
179    /// ```rust,no_run
180    /// # use std::path::PathBuf;
181    /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
182    /// let mut cursor = raw.cursor();
183    /// let _bin = raw.next_os(&mut cursor);
184    ///
185    /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
186    /// println!("{paths:?}");
187    /// ```
188    pub fn cursor(&self) -> ArgCursor {
189        ArgCursor::new()
190    }
191
192    /// Advance the cursor, returning the next [`ParsedArg`]
193    pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> {
194        self.next_os(cursor).map(ParsedArg::new)
195    }
196
197    /// Advance the cursor, returning a raw argument value.
198    pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> {
199        let next = self.items.get(cursor.cursor).map(|s| s.as_os_str());
200        cursor.cursor = cursor.cursor.saturating_add(1);
201        next
202    }
203
204    /// Return the next [`ParsedArg`]
205    pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> {
206        self.peek_os(cursor).map(ParsedArg::new)
207    }
208
209    /// Return a raw argument value.
210    pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> {
211        self.items.get(cursor.cursor).map(|s| s.as_os_str())
212    }
213
214    /// Return all remaining raw arguments, advancing the cursor to the end
215    ///
216    /// # Example
217    ///
218    /// ```rust,no_run
219    /// # use std::path::PathBuf;
220    /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
221    /// let mut cursor = raw.cursor();
222    /// let _bin = raw.next_os(&mut cursor);
223    ///
224    /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
225    /// println!("{paths:?}");
226    /// ```
227    pub fn remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr> {
228        let remaining = self.items[cursor.cursor..].iter().map(|s| s.as_os_str());
229        cursor.cursor = self.items.len();
230        remaining
231    }
232
233    /// Adjust the cursor's position
234    pub fn seek(&self, cursor: &mut ArgCursor, pos: SeekFrom) {
235        let pos = match pos {
236            SeekFrom::Start(pos) => pos,
237            SeekFrom::End(pos) => (self.items.len() as i64).saturating_add(pos).max(0) as u64,
238            SeekFrom::Current(pos) => (cursor.cursor as i64).saturating_add(pos).max(0) as u64,
239        };
240        let pos = (pos as usize).min(self.items.len());
241        cursor.cursor = pos;
242    }
243
244    /// Inject arguments before the [`RawArgs::next`]
245    pub fn insert(
246        &mut self,
247        cursor: &ArgCursor,
248        insert_items: impl IntoIterator<Item = impl Into<OsString>>,
249    ) {
250        self.items.splice(
251            cursor.cursor..cursor.cursor,
252            insert_items.into_iter().map(Into::into),
253        );
254    }
255
256    /// Any remaining args?
257    pub fn is_end(&self, cursor: &ArgCursor) -> bool {
258        self.peek_os(cursor).is_none()
259    }
260}
261
262impl<I, T> From<I> for RawArgs
263where
264    I: Iterator<Item = T>,
265    T: Into<OsString>,
266{
267    fn from(val: I) -> Self {
268        Self {
269            items: val.map(|x| x.into()).collect(),
270        }
271    }
272}
273
274/// Position within [`RawArgs`]
275#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
276pub struct ArgCursor {
277    cursor: usize,
278}
279
280impl ArgCursor {
281    fn new() -> Self {
282        Self { cursor: 0 }
283    }
284}
285
286/// Command-line Argument
287#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
288pub struct ParsedArg<'s> {
289    inner: &'s OsStr,
290}
291
292impl<'s> ParsedArg<'s> {
293    fn new(inner: &'s OsStr) -> Self {
294        Self { inner }
295    }
296
297    /// Argument is length of 0
298    pub fn is_empty(&self) -> bool {
299        self.inner.is_empty()
300    }
301
302    /// Does the argument look like a stdio argument (`-`)
303    pub fn is_stdio(&self) -> bool {
304        self.inner == "-"
305    }
306
307    /// Does the argument look like an argument escape (`--`)
308    pub fn is_escape(&self) -> bool {
309        self.inner == "--"
310    }
311
312    /// Does the argument look like a negative number?
313    ///
314    /// This won't parse the number in full but attempts to see if this looks
315    /// like something along the lines of `-3`, `-0.3`, or `-33.03`
316    pub fn is_negative_number(&self) -> bool {
317        self.to_value()
318            .ok()
319            .and_then(|s| Some(is_number(s.strip_prefix('-')?)))
320            .unwrap_or_default()
321    }
322
323    /// Treat as a long-flag
324    pub fn to_long(&self) -> Option<(Result<&str, &OsStr>, Option<&OsStr>)> {
325        let raw = self.inner;
326        let remainder = raw.strip_prefix("--")?;
327        if remainder.is_empty() {
328            debug_assert!(self.is_escape());
329            return None;
330        }
331
332        let (flag, value) = if let Some((p0, p1)) = remainder.split_once("=") {
333            (p0, Some(p1))
334        } else {
335            (remainder, None)
336        };
337        let flag = flag.to_str().ok_or(flag);
338        Some((flag, value))
339    }
340
341    /// Can treat as a long-flag
342    pub fn is_long(&self) -> bool {
343        self.inner.starts_with("--") && !self.is_escape()
344    }
345
346    /// Treat as a short-flag
347    pub fn to_short(&self) -> Option<ShortFlags<'_>> {
348        if let Some(remainder_os) = self.inner.strip_prefix("-") {
349            if remainder_os.starts_with("-") {
350                None
351            } else if remainder_os.is_empty() {
352                debug_assert!(self.is_stdio());
353                None
354            } else {
355                Some(ShortFlags::new(remainder_os))
356            }
357        } else {
358            None
359        }
360    }
361
362    /// Can treat as a short-flag
363    pub fn is_short(&self) -> bool {
364        self.inner.starts_with("-") && !self.is_stdio() && !self.inner.starts_with("--")
365    }
366
367    /// Treat as a value
368    ///
369    /// <div class="warning">
370    ///
371    /// **NOTE:** May return a flag or an escape.
372    ///
373    /// </div>
374    pub fn to_value_os(&self) -> &OsStr {
375        self.inner
376    }
377
378    /// Treat as a value
379    ///
380    /// <div class="warning">
381    ///
382    /// **NOTE:** May return a flag or an escape.
383    ///
384    /// </div>
385    pub fn to_value(&self) -> Result<&str, &OsStr> {
386        self.inner.to_str().ok_or(self.inner)
387    }
388
389    /// Safely print an argument that may contain non-UTF8 content
390    ///
391    /// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead.
392    pub fn display(&self) -> impl std::fmt::Display + '_ {
393        self.inner.to_string_lossy()
394    }
395}
396
397/// Walk through short flags within a [`ParsedArg`]
398#[derive(Clone, Debug)]
399pub struct ShortFlags<'s> {
400    inner: &'s OsStr,
401    utf8_prefix: std::str::CharIndices<'s>,
402    invalid_suffix: Option<&'s OsStr>,
403}
404
405impl<'s> ShortFlags<'s> {
406    fn new(inner: &'s OsStr) -> Self {
407        let (utf8_prefix, invalid_suffix) = split_nonutf8_once(inner);
408        let utf8_prefix = utf8_prefix.char_indices();
409        Self {
410            inner,
411            utf8_prefix,
412            invalid_suffix,
413        }
414    }
415
416    /// Move the iterator forward by `n` short flags
417    pub fn advance_by(&mut self, n: usize) -> Result<(), usize> {
418        for i in 0..n {
419            self.next().ok_or(i)?.map_err(|_| i)?;
420        }
421        Ok(())
422    }
423
424    /// No short flags left
425    pub fn is_empty(&self) -> bool {
426        self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty()
427    }
428
429    /// Does the short flag look like a number
430    ///
431    /// Ideally call this before doing any iterator
432    pub fn is_negative_number(&self) -> bool {
433        self.invalid_suffix.is_none() && is_number(self.utf8_prefix.as_str())
434    }
435
436    /// Advance the iterator, returning the next short flag on success
437    ///
438    /// On error, returns the invalid-UTF8 value
439    pub fn next_flag(&mut self) -> Option<Result<char, &'s OsStr>> {
440        if let Some((_, flag)) = self.utf8_prefix.next() {
441            return Some(Ok(flag));
442        }
443
444        if let Some(suffix) = self.invalid_suffix {
445            self.invalid_suffix = None;
446            return Some(Err(suffix));
447        }
448
449        None
450    }
451
452    /// Advance the iterator, returning everything left as a value
453    pub fn next_value_os(&mut self) -> Option<&'s OsStr> {
454        if let Some((index, _)) = self.utf8_prefix.next() {
455            self.utf8_prefix = "".char_indices();
456            self.invalid_suffix = None;
457            // SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary
458            let remainder = unsafe { ext::split_at(self.inner, index).1 };
459            return Some(remainder);
460        }
461
462        if let Some(suffix) = self.invalid_suffix {
463            self.invalid_suffix = None;
464            return Some(suffix);
465        }
466
467        None
468    }
469}
470
471impl<'s> Iterator for ShortFlags<'s> {
472    type Item = Result<char, &'s OsStr>;
473
474    fn next(&mut self) -> Option<Self::Item> {
475        self.next_flag()
476    }
477}
478
479fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) {
480    match b.try_str() {
481        Ok(s) => (s, None),
482        Err(err) => {
483            // SAFETY: `err.valid_up_to()`, which came from str::from_utf8(), is guaranteed
484            // to be a valid UTF8 boundary
485            let (valid, after_valid) = unsafe { ext::split_at(b, err.valid_up_to()) };
486            let valid = valid.try_str().unwrap();
487            (valid, Some(after_valid))
488        }
489    }
490}
491
492fn is_number(arg: &str) -> bool {
493    // Return true if this looks like an integer or a float where it's all
494    // digits plus an optional single dot after some digits.
495    //
496    // For floats allow forms such as `1.`, `1.2`, `1.2e10`, etc.
497    let mut seen_dot = false;
498    let mut position_of_e = None;
499    for (i, c) in arg.as_bytes().iter().enumerate() {
500        match c {
501            // Digits are always valid
502            b'0'..=b'9' => {}
503
504            // Allow a `.`, but only one, only if it comes before an
505            // optional exponent, and only if it's not the first character.
506            b'.' if !seen_dot && position_of_e.is_none() && i > 0 => seen_dot = true,
507
508            // Allow an exponent `e`/`E` but only at most one after the first
509            // character.
510            b'e' | b'E' if position_of_e.is_none() && i > 0 => position_of_e = Some(i),
511
512            _ => return false,
513        }
514    }
515
516    // Disallow `-1e` which isn't a valid float since it doesn't actually have
517    // an exponent.
518    match position_of_e {
519        Some(i) => i != arg.len() - 1,
520        None => true,
521    }
522}