utf8_iter/indices.rs
1// The code in this file was adapted from the CharIndices implementation of
2// the Rust standard library at revision ab32548539ec38a939c1b58599249f3b54130026
3// (https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/library/core/src/str/iter.rs).
4//
5// Excerpt from https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/COPYRIGHT ,
6// which refers to
7// https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-APACHE
8// and
9// https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-MIT
10// :
11//
12// For full authorship information, see the version control history or
13// https://thanks.rust-lang.org
14//
15// Except as otherwise noted (below and/or in individual files), Rust is
16// licensed under the Apache License, Version 2.0 <LICENSE-APACHE> or
17// <http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
18// <LICENSE-MIT> or <http://opensource.org/licenses/MIT>, at your option.
19
20use super::Utf8Chars;
21use core::iter::FusedIterator;
22
23/// An iterator over the [`char`]s and their positions.
24#[derive(Clone, Debug)]
25#[must_use = "iterators are lazy and do nothing unless consumed"]
26pub struct Utf8CharIndices<'a> {
27 front_offset: usize,
28 iter: Utf8Chars<'a>,
29}
30
31impl<'a> Iterator for Utf8CharIndices<'a> {
32 type Item = (usize, char);
33
34 #[inline]
35 fn next(&mut self) -> Option<(usize, char)> {
36 let pre_len = self.as_slice().len();
37 match self.iter.next() {
38 None => None,
39 Some(ch) => {
40 let index = self.front_offset;
41 let len = self.as_slice().len();
42 self.front_offset += pre_len - len;
43 Some((index, ch))
44 }
45 }
46 }
47
48 #[inline]
49 fn count(self) -> usize {
50 self.iter.count()
51 }
52
53 #[inline]
54 fn size_hint(&self) -> (usize, Option<usize>) {
55 self.iter.size_hint()
56 }
57
58 #[inline]
59 fn last(mut self) -> Option<(usize, char)> {
60 // No need to go through the entire string.
61 self.next_back()
62 }
63}
64
65impl<'a> DoubleEndedIterator for Utf8CharIndices<'a> {
66 #[inline]
67 fn next_back(&mut self) -> Option<(usize, char)> {
68 self.iter.next_back().map(|ch| {
69 let index = self.front_offset + self.as_slice().len();
70 (index, ch)
71 })
72 }
73}
74
75impl FusedIterator for Utf8CharIndices<'_> {}
76
77impl<'a> Utf8CharIndices<'a> {
78 #[inline(always)]
79 /// Creates the iterator from a byte slice.
80 pub fn new(bytes: &'a [u8]) -> Self {
81 Utf8CharIndices::<'a> {
82 front_offset: 0,
83 iter: Utf8Chars::new(bytes),
84 }
85 }
86
87 /// Views the underlying data as a subslice of the original data.
88 ///
89 /// This has the same lifetime as the original slice, and so the
90 /// iterator can continue to be used while this exists.
91 #[must_use]
92 #[inline]
93 pub fn as_slice(&self) -> &'a [u8] {
94 self.iter.as_slice()
95 }
96
97 /// Returns the byte position of the next character, or the length
98 /// of the underlying string if there are no more characters.
99 ///
100 /// # Examples
101 ///
102 /// ```
103 /// use utf8_iter::Utf8CharsEx;
104 /// let mut chars = "a楽".as_bytes().char_indices();
105 ///
106 /// assert_eq!(chars.offset(), 0);
107 /// assert_eq!(chars.next(), Some((0, 'a')));
108 ///
109 /// assert_eq!(chars.offset(), 1);
110 /// assert_eq!(chars.next(), Some((1, '楽')));
111 ///
112 /// assert_eq!(chars.offset(), 4);
113 /// assert_eq!(chars.next(), None);
114 /// ```
115 #[inline]
116 #[must_use]
117 pub fn offset(&self) -> usize {
118 self.front_offset
119 }
120}