utf8_iter/
indices.rs

1// The code in this file was adapted from the CharIndices implementation of
2// the Rust standard library at revision ab32548539ec38a939c1b58599249f3b54130026
3// (https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/library/core/src/str/iter.rs).
4//
5// Excerpt from https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/COPYRIGHT ,
6// which refers to
7// https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-APACHE
8// and
9// https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-MIT
10// :
11//
12// For full authorship information, see the version control history or
13// https://thanks.rust-lang.org
14//
15// Except as otherwise noted (below and/or in individual files), Rust is
16// licensed under the Apache License, Version 2.0 <LICENSE-APACHE> or
17// <http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
18// <LICENSE-MIT> or <http://opensource.org/licenses/MIT>, at your option.
19
20use super::Utf8Chars;
21use core::iter::FusedIterator;
22
23/// An iterator over the [`char`]s  and their positions.
24#[derive(Clone, Debug)]
25#[must_use = "iterators are lazy and do nothing unless consumed"]
26pub struct Utf8CharIndices<'a> {
27    front_offset: usize,
28    iter: Utf8Chars<'a>,
29}
30
31impl<'a> Iterator for Utf8CharIndices<'a> {
32    type Item = (usize, char);
33
34    #[inline]
35    fn next(&mut self) -> Option<(usize, char)> {
36        let pre_len = self.as_slice().len();
37        match self.iter.next() {
38            None => None,
39            Some(ch) => {
40                let index = self.front_offset;
41                let len = self.as_slice().len();
42                self.front_offset += pre_len - len;
43                Some((index, ch))
44            }
45        }
46    }
47
48    #[inline]
49    fn count(self) -> usize {
50        self.iter.count()
51    }
52
53    #[inline]
54    fn size_hint(&self) -> (usize, Option<usize>) {
55        self.iter.size_hint()
56    }
57
58    #[inline]
59    fn last(mut self) -> Option<(usize, char)> {
60        // No need to go through the entire string.
61        self.next_back()
62    }
63}
64
65impl<'a> DoubleEndedIterator for Utf8CharIndices<'a> {
66    #[inline]
67    fn next_back(&mut self) -> Option<(usize, char)> {
68        self.iter.next_back().map(|ch| {
69            let index = self.front_offset + self.as_slice().len();
70            (index, ch)
71        })
72    }
73}
74
75impl FusedIterator for Utf8CharIndices<'_> {}
76
77impl<'a> Utf8CharIndices<'a> {
78    #[inline(always)]
79    /// Creates the iterator from a byte slice.
80    pub fn new(bytes: &'a [u8]) -> Self {
81        Utf8CharIndices::<'a> {
82            front_offset: 0,
83            iter: Utf8Chars::new(bytes),
84        }
85    }
86
87    /// Views the underlying data as a subslice of the original data.
88    ///
89    /// This has the same lifetime as the original slice, and so the
90    /// iterator can continue to be used while this exists.
91    #[must_use]
92    #[inline]
93    pub fn as_slice(&self) -> &'a [u8] {
94        self.iter.as_slice()
95    }
96
97    /// Returns the byte position of the next character, or the length
98    /// of the underlying string if there are no more characters.
99    ///
100    /// # Examples
101    ///
102    /// ```
103    /// use utf8_iter::Utf8CharsEx;
104    /// let mut chars = "a楽".as_bytes().char_indices();
105    ///
106    /// assert_eq!(chars.offset(), 0);
107    /// assert_eq!(chars.next(), Some((0, 'a')));
108    ///
109    /// assert_eq!(chars.offset(), 1);
110    /// assert_eq!(chars.next(), Some((1, '楽')));
111    ///
112    /// assert_eq!(chars.offset(), 4);
113    /// assert_eq!(chars.next(), None);
114    /// ```
115    #[inline]
116    #[must_use]
117    pub fn offset(&self) -> usize {
118        self.front_offset
119    }
120}