1/*!
2Provides architecture independent implementations of `memchr` and friends.
34The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
5searching for one, two or three distinct bytes, respectively, in a haystack.
6Each type also has corresponding double ended iterators. These searchers
7are typically slower than hand-coded vector routines accomplishing the same
8task, but are also typically faster than naive scalar code. These routines
9effectively work by treating a `usize` as a vector of 8-bit lanes, and thus
10achieves some level of data parallelism even without explicit vector support.
1112The `One` searcher also provides a [`One::count`] routine for efficiently
13counting the number of times a single byte occurs in a haystack. This is
14useful, for example, for counting the number of lines in a haystack. This
15routine exists because it is usually faster, especially with a high match
16count, than using [`One::find`] repeatedly. ([`OneIter`] specializes its
17`Iterator::count` implementation to use this routine.)
1819Only one, two and three bytes are supported because three bytes is about
20the point where one sees diminishing returns. Beyond this point and it's
21probably (but not necessarily) better to just use a simple `[bool; 256]` array
22or similar. However, it depends mightily on the specific work-load and the
23expected match frequency.
24*/
2526use crate::{arch::generic::memchras generic, ext::Pointer};
2728/// The number of bytes in a single `usize` value.
29const USIZE_BYTES: usize = (usize::BITS / 8) as usize;
30/// The bits that must be zero for a `*const usize` to be properly aligned.
31const USIZE_ALIGN: usize = USIZE_BYTES - 1;
3233/// Finds all occurrences of a single byte in a haystack.
34#[derive(#[automatically_derived]
impl ::core::clone::Clone for One {
#[inline]
fn clone(&self) -> One {
let _: ::core::clone::AssertParamIsClone<u8>;
let _: ::core::clone::AssertParamIsClone<usize>;
*self
}
}Clone, #[automatically_derived]
impl ::core::marker::Copy for One { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for One {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "One", "s1",
&self.s1, "v1", &&self.v1)
}
}Debug)]
35pub struct One {
36 s1: u8,
37 v1: usize,
38}
3940impl One {
41/// The number of bytes we examine per each iteration of our search loop.
42const LOOP_BYTES: usize = 2 * USIZE_BYTES;
4344/// Create a new searcher that finds occurrences of the byte given.
45#[inline]
46pub fn new(needle: u8) -> One {
47One { s1: needle, v1: splat(needle) }
48 }
4950/// A test-only routine so that we can bundle a bunch of quickcheck
51 /// properties into a single macro. Basically, this provides a constructor
52 /// that makes it identical to most other memchr implementations, which
53 /// have fallible constructors.
54#[cfg(test)]
55pub(crate) fn try_new(needle: u8) -> Option<One> {
56Some(One::new(needle))
57 }
5859/// Return the first occurrence of the needle in the given haystack. If no
60 /// such occurrence exists, then `None` is returned.
61 ///
62 /// The occurrence is reported as an offset into `haystack`. Its maximum
63 /// value for a non-empty haystack is `haystack.len() - 1`.
64#[inline]
65pub fn find(&self, haystack: &[u8]) -> Option<usize> {
66// SAFETY: `find_raw` guarantees that if a pointer is returned, it
67 // falls within the bounds of the start and end pointers.
68unsafe {
69 generic::search_slice_with_raw(haystack, |s, e| {
70self.find_raw(s, e)
71 })
72 }
73 }
7475/// Return the last occurrence of the needle in the given haystack. If no
76 /// such occurrence exists, then `None` is returned.
77 ///
78 /// The occurrence is reported as an offset into `haystack`. Its maximum
79 /// value for a non-empty haystack is `haystack.len() - 1`.
80#[inline]
81pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
82// SAFETY: `find_raw` guarantees that if a pointer is returned, it
83 // falls within the bounds of the start and end pointers.
84unsafe {
85 generic::search_slice_with_raw(haystack, |s, e| {
86self.rfind_raw(s, e)
87 })
88 }
89 }
9091/// Counts all occurrences of this byte in the given haystack.
92#[inline]
93pub fn count(&self, haystack: &[u8]) -> usize {
94// SAFETY: All of our pointers are derived directly from a borrowed
95 // slice, which is guaranteed to be valid.
96unsafe {
97let start = haystack.as_ptr();
98let end = start.add(haystack.len());
99self.count_raw(start, end)
100 }
101 }
102103/// Like `find`, but accepts and returns raw pointers.
104 ///
105 /// When a match is found, the pointer returned is guaranteed to be
106 /// `>= start` and `< end`.
107 ///
108 /// This routine is useful if you're already using raw pointers and would
109 /// like to avoid converting back to a slice before executing a search.
110 ///
111 /// # Safety
112 ///
113 /// * Both `start` and `end` must be valid for reads.
114 /// * Both `start` and `end` must point to an initialized value.
115 /// * Both `start` and `end` must point to the same allocated object and
116 /// must either be in bounds or at most one byte past the end of the
117 /// allocated object.
118 /// * Both `start` and `end` must be _derived from_ a pointer to the same
119 /// object.
120 /// * The distance between `start` and `end` must not overflow `isize`.
121 /// * The distance being in bounds must not rely on "wrapping around" the
122 /// address space.
123 ///
124 /// Note that callers may pass a pair of pointers such that `start >= end`.
125 /// In that case, `None` will always be returned.
126#[inline]
127pub unsafe fn find_raw(
128&self,
129 start: *const u8,
130 end: *const u8,
131 ) -> Option<*const u8> {
132if start >= end {
133return None;
134 }
135let confirm = |b| self.confirm(b);
136let len = end.distance(start);
137if len < USIZE_BYTES {
138return generic::fwd_byte_by_byte(start, end, confirm);
139 }
140141// The start of the search may not be aligned to `*const usize`,
142 // so we do an unaligned load here.
143let chunk = start.cast::<usize>().read_unaligned();
144if self.has_needle(chunk) {
145return generic::fwd_byte_by_byte(start, end, confirm);
146 }
147148// And now we start our search at a guaranteed aligned position.
149 // The first iteration of the loop below will overlap with the the
150 // unaligned chunk above in cases where the search starts at an
151 // unaligned offset, but that's okay as we're only here if that
152 // above didn't find a match.
153let mut cur =
154start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN));
155if true {
if !(cur > start) {
::core::panicking::panic("assertion failed: cur > start")
};
};debug_assert!(cur > start);
156if len <= One::LOOP_BYTES {
157return generic::fwd_byte_by_byte(cur, end, confirm);
158 }
159if true {
if !(end.sub(One::LOOP_BYTES) >= start) {
::core::panicking::panic("assertion failed: end.sub(One::LOOP_BYTES) >= start")
};
};debug_assert!(end.sub(One::LOOP_BYTES) >= start);
160while cur <= end.sub(One::LOOP_BYTES) {
161if true {
match (&0, &(cur.as_usize() % USIZE_BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
162163let a = cur.cast::<usize>().read();
164let b = cur.add(USIZE_BYTES).cast::<usize>().read();
165if self.has_needle(a) || self.has_needle(b) {
166break;
167 }
168 cur = cur.add(One::LOOP_BYTES);
169 }
170 generic::fwd_byte_by_byte(cur, end, confirm)
171 }
172173/// Like `rfind`, but accepts and returns raw pointers.
174 ///
175 /// When a match is found, the pointer returned is guaranteed to be
176 /// `>= start` and `< end`.
177 ///
178 /// This routine is useful if you're already using raw pointers and would
179 /// like to avoid converting back to a slice before executing a search.
180 ///
181 /// # Safety
182 ///
183 /// * Both `start` and `end` must be valid for reads.
184 /// * Both `start` and `end` must point to an initialized value.
185 /// * Both `start` and `end` must point to the same allocated object and
186 /// must either be in bounds or at most one byte past the end of the
187 /// allocated object.
188 /// * Both `start` and `end` must be _derived from_ a pointer to the same
189 /// object.
190 /// * The distance between `start` and `end` must not overflow `isize`.
191 /// * The distance being in bounds must not rely on "wrapping around" the
192 /// address space.
193 ///
194 /// Note that callers may pass a pair of pointers such that `start >= end`.
195 /// In that case, `None` will always be returned.
196#[inline]
197pub unsafe fn rfind_raw(
198&self,
199 start: *const u8,
200 end: *const u8,
201 ) -> Option<*const u8> {
202if start >= end {
203return None;
204 }
205let confirm = |b| self.confirm(b);
206let len = end.distance(start);
207if len < USIZE_BYTES {
208return generic::rev_byte_by_byte(start, end, confirm);
209 }
210211let chunk = end.sub(USIZE_BYTES).cast::<usize>().read_unaligned();
212if self.has_needle(chunk) {
213return generic::rev_byte_by_byte(start, end, confirm);
214 }
215216let mut cur = end.sub(end.as_usize() & USIZE_ALIGN);
217if true {
if !(start <= cur && cur <= end) {
::core::panicking::panic("assertion failed: start <= cur && cur <= end")
};
};debug_assert!(start <= cur && cur <= end);
218if len <= One::LOOP_BYTES {
219return generic::rev_byte_by_byte(start, cur, confirm);
220 }
221while cur >= start.add(One::LOOP_BYTES) {
222if true {
match (&0, &(cur.as_usize() % USIZE_BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
223224let a = cur.sub(2 * USIZE_BYTES).cast::<usize>().read();
225let b = cur.sub(1 * USIZE_BYTES).cast::<usize>().read();
226if self.has_needle(a) || self.has_needle(b) {
227break;
228 }
229 cur = cur.sub(One::LOOP_BYTES);
230 }
231 generic::rev_byte_by_byte(start, cur, confirm)
232 }
233234/// Counts all occurrences of this byte in the given haystack represented
235 /// by raw pointers.
236 ///
237 /// This routine is useful if you're already using raw pointers and would
238 /// like to avoid converting back to a slice before executing a search.
239 ///
240 /// # Safety
241 ///
242 /// * Both `start` and `end` must be valid for reads.
243 /// * Both `start` and `end` must point to an initialized value.
244 /// * Both `start` and `end` must point to the same allocated object and
245 /// must either be in bounds or at most one byte past the end of the
246 /// allocated object.
247 /// * Both `start` and `end` must be _derived from_ a pointer to the same
248 /// object.
249 /// * The distance between `start` and `end` must not overflow `isize`.
250 /// * The distance being in bounds must not rely on "wrapping around" the
251 /// address space.
252 ///
253 /// Note that callers may pass a pair of pointers such that `start >= end`.
254 /// In that case, `0` will always be returned.
255#[inline]
256pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
257if start >= end {
258return 0;
259 }
260// Sadly I couldn't get the SWAR approach to work here, so we just do
261 // one byte at a time for now. PRs to improve this are welcome.
262let mut ptr = start;
263let mut count = 0;
264while ptr < end {
265 count += (ptr.read() == self.s1) as usize;
266 ptr = ptr.offset(1);
267 }
268count269 }
270271/// Returns an iterator over all occurrences of the needle byte in the
272 /// given haystack.
273 ///
274 /// The iterator returned implements `DoubleEndedIterator`. This means it
275 /// can also be used to find occurrences in reverse order.
276pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
277OneIter { searcher: self, it: generic::Iter::new(haystack) }
278 }
279280#[inline(always)]
281fn has_needle(&self, chunk: usize) -> bool {
282has_zero_byte(self.v1 ^ chunk)
283 }
284285#[inline(always)]
286fn confirm(&self, haystack_byte: u8) -> bool {
287self.s1 == haystack_byte288 }
289}
290291/// An iterator over all occurrences of a single byte in a haystack.
292///
293/// This iterator implements `DoubleEndedIterator`, which means it can also be
294/// used to find occurrences in reverse order.
295///
296/// This iterator is created by the [`One::iter`] method.
297///
298/// The lifetime parameters are as follows:
299///
300/// * `'a` refers to the lifetime of the underlying [`One`] searcher.
301/// * `'h` refers to the lifetime of the haystack being searched.
302#[derive(#[automatically_derived]
impl<'a, 'h> ::core::clone::Clone for OneIter<'a, 'h> {
#[inline]
fn clone(&self) -> OneIter<'a, 'h> {
OneIter {
searcher: ::core::clone::Clone::clone(&self.searcher),
it: ::core::clone::Clone::clone(&self.it),
}
}
}Clone, #[automatically_derived]
impl<'a, 'h> ::core::fmt::Debug for OneIter<'a, 'h> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "OneIter",
"searcher", &self.searcher, "it", &&self.it)
}
}Debug)]
303pub struct OneIter<'a, 'h> {
304/// The underlying memchr searcher.
305searcher: &'a One,
306/// Generic iterator implementation.
307it: generic::Iter<'h>,
308}
309310impl<'a, 'h> Iteratorfor OneIter<'a, 'h> {
311type Item = usize;
312313#[inline]
314fn next(&mut self) -> Option<usize> {
315// SAFETY: We rely on the generic iterator to provide valid start
316 // and end pointers, but we guarantee that any pointer returned by
317 // 'find_raw' falls within the bounds of the start and end pointer.
318unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
319 }
320321#[inline]
322fn count(self) -> usize {
323self.it.count(|s, e| {
324// SAFETY: We rely on our generic iterator to return valid start
325 // and end pointers.
326unsafe { self.searcher.count_raw(s, e) }
327 })
328 }
329330#[inline]
331fn size_hint(&self) -> (usize, Option<usize>) {
332self.it.size_hint()
333 }
334}
335336impl<'a, 'h> DoubleEndedIteratorfor OneIter<'a, 'h> {
337#[inline]
338fn next_back(&mut self) -> Option<usize> {
339// SAFETY: We rely on the generic iterator to provide valid start
340 // and end pointers, but we guarantee that any pointer returned by
341 // 'rfind_raw' falls within the bounds of the start and end pointer.
342unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
343 }
344}
345346/// Finds all occurrences of two bytes in a haystack.
347///
348/// That is, this reports matches of one of two possible bytes. For example,
349/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
350/// `4` and `5`.
351#[derive(#[automatically_derived]
impl ::core::clone::Clone for Two {
#[inline]
fn clone(&self) -> Two {
let _: ::core::clone::AssertParamIsClone<u8>;
let _: ::core::clone::AssertParamIsClone<usize>;
*self
}
}Clone, #[automatically_derived]
impl ::core::marker::Copy for Two { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for Two {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field4_finish(f, "Two", "s1",
&self.s1, "s2", &self.s2, "v1", &self.v1, "v2", &&self.v2)
}
}Debug)]
352pub struct Two {
353 s1: u8,
354 s2: u8,
355 v1: usize,
356 v2: usize,
357}
358359impl Two {
360/// Create a new searcher that finds occurrences of the two needle bytes
361 /// given.
362#[inline]
363pub fn new(needle1: u8, needle2: u8) -> Two {
364Two {
365 s1: needle1,
366 s2: needle2,
367 v1: splat(needle1),
368 v2: splat(needle2),
369 }
370 }
371372/// A test-only routine so that we can bundle a bunch of quickcheck
373 /// properties into a single macro. Basically, this provides a constructor
374 /// that makes it identical to most other memchr implementations, which
375 /// have fallible constructors.
376#[cfg(test)]
377pub(crate) fn try_new(needle1: u8, needle2: u8) -> Option<Two> {
378Some(Two::new(needle1, needle2))
379 }
380381/// Return the first occurrence of one of the needle bytes in the given
382 /// haystack. If no such occurrence exists, then `None` is returned.
383 ///
384 /// The occurrence is reported as an offset into `haystack`. Its maximum
385 /// value for a non-empty haystack is `haystack.len() - 1`.
386#[inline]
387pub fn find(&self, haystack: &[u8]) -> Option<usize> {
388// SAFETY: `find_raw` guarantees that if a pointer is returned, it
389 // falls within the bounds of the start and end pointers.
390unsafe {
391 generic::search_slice_with_raw(haystack, |s, e| {
392self.find_raw(s, e)
393 })
394 }
395 }
396397/// Return the last occurrence of one of the needle bytes in the given
398 /// haystack. If no such occurrence exists, then `None` is returned.
399 ///
400 /// The occurrence is reported as an offset into `haystack`. Its maximum
401 /// value for a non-empty haystack is `haystack.len() - 1`.
402#[inline]
403pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
404// SAFETY: `find_raw` guarantees that if a pointer is returned, it
405 // falls within the bounds of the start and end pointers.
406unsafe {
407 generic::search_slice_with_raw(haystack, |s, e| {
408self.rfind_raw(s, e)
409 })
410 }
411 }
412413/// Like `find`, but accepts and returns raw pointers.
414 ///
415 /// When a match is found, the pointer returned is guaranteed to be
416 /// `>= start` and `< end`.
417 ///
418 /// This routine is useful if you're already using raw pointers and would
419 /// like to avoid converting back to a slice before executing a search.
420 ///
421 /// # Safety
422 ///
423 /// * Both `start` and `end` must be valid for reads.
424 /// * Both `start` and `end` must point to an initialized value.
425 /// * Both `start` and `end` must point to the same allocated object and
426 /// must either be in bounds or at most one byte past the end of the
427 /// allocated object.
428 /// * Both `start` and `end` must be _derived from_ a pointer to the same
429 /// object.
430 /// * The distance between `start` and `end` must not overflow `isize`.
431 /// * The distance being in bounds must not rely on "wrapping around" the
432 /// address space.
433 ///
434 /// Note that callers may pass a pair of pointers such that `start >= end`.
435 /// In that case, `None` will always be returned.
436#[inline]
437pub unsafe fn find_raw(
438&self,
439 start: *const u8,
440 end: *const u8,
441 ) -> Option<*const u8> {
442if start >= end {
443return None;
444 }
445let confirm = |b| self.confirm(b);
446let len = end.distance(start);
447if len < USIZE_BYTES {
448return generic::fwd_byte_by_byte(start, end, confirm);
449 }
450451// The start of the search may not be aligned to `*const usize`,
452 // so we do an unaligned load here.
453let chunk = start.cast::<usize>().read_unaligned();
454if self.has_needle(chunk) {
455return generic::fwd_byte_by_byte(start, end, confirm);
456 }
457458// And now we start our search at a guaranteed aligned position.
459 // The first iteration of the loop below will overlap with the
460 // unaligned chunk above in cases where the search starts at an
461 // unaligned offset, but that's okay as we're only here if that
462 // above didn't find a match.
463let mut cur =
464start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN));
465if true {
if !(cur > start) {
::core::panicking::panic("assertion failed: cur > start")
};
};debug_assert!(cur > start);
466if true {
if !(end.sub(USIZE_BYTES) >= start) {
::core::panicking::panic("assertion failed: end.sub(USIZE_BYTES) >= start")
};
};debug_assert!(end.sub(USIZE_BYTES) >= start);
467while cur <= end.sub(USIZE_BYTES) {
468if true {
match (&0, &(cur.as_usize() % USIZE_BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
469470let chunk = cur.cast::<usize>().read();
471if self.has_needle(chunk) {
472break;
473 }
474 cur = cur.add(USIZE_BYTES);
475 }
476 generic::fwd_byte_by_byte(cur, end, confirm)
477 }
478479/// Like `rfind`, but accepts and returns raw pointers.
480 ///
481 /// When a match is found, the pointer returned is guaranteed to be
482 /// `>= start` and `< end`.
483 ///
484 /// This routine is useful if you're already using raw pointers and would
485 /// like to avoid converting back to a slice before executing a search.
486 ///
487 /// # Safety
488 ///
489 /// * Both `start` and `end` must be valid for reads.
490 /// * Both `start` and `end` must point to an initialized value.
491 /// * Both `start` and `end` must point to the same allocated object and
492 /// must either be in bounds or at most one byte past the end of the
493 /// allocated object.
494 /// * Both `start` and `end` must be _derived from_ a pointer to the same
495 /// object.
496 /// * The distance between `start` and `end` must not overflow `isize`.
497 /// * The distance being in bounds must not rely on "wrapping around" the
498 /// address space.
499 ///
500 /// Note that callers may pass a pair of pointers such that `start >= end`.
501 /// In that case, `None` will always be returned.
502#[inline]
503pub unsafe fn rfind_raw(
504&self,
505 start: *const u8,
506 end: *const u8,
507 ) -> Option<*const u8> {
508if start >= end {
509return None;
510 }
511let confirm = |b| self.confirm(b);
512let len = end.distance(start);
513if len < USIZE_BYTES {
514return generic::rev_byte_by_byte(start, end, confirm);
515 }
516517let chunk = end.sub(USIZE_BYTES).cast::<usize>().read_unaligned();
518if self.has_needle(chunk) {
519return generic::rev_byte_by_byte(start, end, confirm);
520 }
521522let mut cur = end.sub(end.as_usize() & USIZE_ALIGN);
523if true {
if !(start <= cur && cur <= end) {
::core::panicking::panic("assertion failed: start <= cur && cur <= end")
};
};debug_assert!(start <= cur && cur <= end);
524while cur >= start.add(USIZE_BYTES) {
525if true {
match (&0, &(cur.as_usize() % USIZE_BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
526527let chunk = cur.sub(USIZE_BYTES).cast::<usize>().read();
528if self.has_needle(chunk) {
529break;
530 }
531 cur = cur.sub(USIZE_BYTES);
532 }
533 generic::rev_byte_by_byte(start, cur, confirm)
534 }
535536/// Returns an iterator over all occurrences of one of the needle bytes in
537 /// the given haystack.
538 ///
539 /// The iterator returned implements `DoubleEndedIterator`. This means it
540 /// can also be used to find occurrences in reverse order.
541pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
542TwoIter { searcher: self, it: generic::Iter::new(haystack) }
543 }
544545#[inline(always)]
546fn has_needle(&self, chunk: usize) -> bool {
547has_zero_byte(self.v1 ^ chunk) || has_zero_byte(self.v2 ^ chunk)
548 }
549550#[inline(always)]
551fn confirm(&self, haystack_byte: u8) -> bool {
552self.s1 == haystack_byte || self.s2 == haystack_byte553 }
554}
555556/// An iterator over all occurrences of two possible bytes in a haystack.
557///
558/// This iterator implements `DoubleEndedIterator`, which means it can also be
559/// used to find occurrences in reverse order.
560///
561/// This iterator is created by the [`Two::iter`] method.
562///
563/// The lifetime parameters are as follows:
564///
565/// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
566/// * `'h` refers to the lifetime of the haystack being searched.
567#[derive(#[automatically_derived]
impl<'a, 'h> ::core::clone::Clone for TwoIter<'a, 'h> {
#[inline]
fn clone(&self) -> TwoIter<'a, 'h> {
TwoIter {
searcher: ::core::clone::Clone::clone(&self.searcher),
it: ::core::clone::Clone::clone(&self.it),
}
}
}Clone, #[automatically_derived]
impl<'a, 'h> ::core::fmt::Debug for TwoIter<'a, 'h> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "TwoIter",
"searcher", &self.searcher, "it", &&self.it)
}
}Debug)]
568pub struct TwoIter<'a, 'h> {
569/// The underlying memchr searcher.
570searcher: &'a Two,
571/// Generic iterator implementation.
572it: generic::Iter<'h>,
573}
574575impl<'a, 'h> Iteratorfor TwoIter<'a, 'h> {
576type Item = usize;
577578#[inline]
579fn next(&mut self) -> Option<usize> {
580// SAFETY: We rely on the generic iterator to provide valid start
581 // and end pointers, but we guarantee that any pointer returned by
582 // 'find_raw' falls within the bounds of the start and end pointer.
583unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
584 }
585586#[inline]
587fn size_hint(&self) -> (usize, Option<usize>) {
588self.it.size_hint()
589 }
590}
591592impl<'a, 'h> DoubleEndedIteratorfor TwoIter<'a, 'h> {
593#[inline]
594fn next_back(&mut self) -> Option<usize> {
595// SAFETY: We rely on the generic iterator to provide valid start
596 // and end pointers, but we guarantee that any pointer returned by
597 // 'rfind_raw' falls within the bounds of the start and end pointer.
598unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
599 }
600}
601602/// Finds all occurrences of three bytes in a haystack.
603///
604/// That is, this reports matches of one of three possible bytes. For example,
605/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
606/// `0`, `2`, `3`, `4` and `5`.
607#[derive(#[automatically_derived]
impl ::core::clone::Clone for Three {
#[inline]
fn clone(&self) -> Three {
let _: ::core::clone::AssertParamIsClone<u8>;
let _: ::core::clone::AssertParamIsClone<usize>;
*self
}
}Clone, #[automatically_derived]
impl ::core::marker::Copy for Three { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for Three {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
let names: &'static _ = &["s1", "s2", "s3", "v1", "v2", "v3"];
let values: &[&dyn ::core::fmt::Debug] =
&[&self.s1, &self.s2, &self.s3, &self.v1, &self.v2, &&self.v3];
::core::fmt::Formatter::debug_struct_fields_finish(f, "Three", names,
values)
}
}Debug)]
608pub struct Three {
609 s1: u8,
610 s2: u8,
611 s3: u8,
612 v1: usize,
613 v2: usize,
614 v3: usize,
615}
616617impl Three {
618/// Create a new searcher that finds occurrences of the three needle bytes
619 /// given.
620#[inline]
621pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Three {
622Three {
623 s1: needle1,
624 s2: needle2,
625 s3: needle3,
626 v1: splat(needle1),
627 v2: splat(needle2),
628 v3: splat(needle3),
629 }
630 }
631632/// A test-only routine so that we can bundle a bunch of quickcheck
633 /// properties into a single macro. Basically, this provides a constructor
634 /// that makes it identical to most other memchr implementations, which
635 /// have fallible constructors.
636#[cfg(test)]
637pub(crate) fn try_new(
638 needle1: u8,
639 needle2: u8,
640 needle3: u8,
641 ) -> Option<Three> {
642Some(Three::new(needle1, needle2, needle3))
643 }
644645/// Return the first occurrence of one of the needle bytes in the given
646 /// haystack. If no such occurrence exists, then `None` is returned.
647 ///
648 /// The occurrence is reported as an offset into `haystack`. Its maximum
649 /// value for a non-empty haystack is `haystack.len() - 1`.
650#[inline]
651pub fn find(&self, haystack: &[u8]) -> Option<usize> {
652// SAFETY: `find_raw` guarantees that if a pointer is returned, it
653 // falls within the bounds of the start and end pointers.
654unsafe {
655 generic::search_slice_with_raw(haystack, |s, e| {
656self.find_raw(s, e)
657 })
658 }
659 }
660661/// Return the last occurrence of one of the needle bytes in the given
662 /// haystack. If no such occurrence exists, then `None` is returned.
663 ///
664 /// The occurrence is reported as an offset into `haystack`. Its maximum
665 /// value for a non-empty haystack is `haystack.len() - 1`.
666#[inline]
667pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
668// SAFETY: `find_raw` guarantees that if a pointer is returned, it
669 // falls within the bounds of the start and end pointers.
670unsafe {
671 generic::search_slice_with_raw(haystack, |s, e| {
672self.rfind_raw(s, e)
673 })
674 }
675 }
676677/// Like `find`, but accepts and returns raw pointers.
678 ///
679 /// When a match is found, the pointer returned is guaranteed to be
680 /// `>= start` and `< end`.
681 ///
682 /// This routine is useful if you're already using raw pointers and would
683 /// like to avoid converting back to a slice before executing a search.
684 ///
685 /// # Safety
686 ///
687 /// * Both `start` and `end` must be valid for reads.
688 /// * Both `start` and `end` must point to an initialized value.
689 /// * Both `start` and `end` must point to the same allocated object and
690 /// must either be in bounds or at most one byte past the end of the
691 /// allocated object.
692 /// * Both `start` and `end` must be _derived from_ a pointer to the same
693 /// object.
694 /// * The distance between `start` and `end` must not overflow `isize`.
695 /// * The distance being in bounds must not rely on "wrapping around" the
696 /// address space.
697 ///
698 /// Note that callers may pass a pair of pointers such that `start >= end`.
699 /// In that case, `None` will always be returned.
700#[inline]
701pub unsafe fn find_raw(
702&self,
703 start: *const u8,
704 end: *const u8,
705 ) -> Option<*const u8> {
706if start >= end {
707return None;
708 }
709let confirm = |b| self.confirm(b);
710let len = end.distance(start);
711if len < USIZE_BYTES {
712return generic::fwd_byte_by_byte(start, end, confirm);
713 }
714715// The start of the search may not be aligned to `*const usize`,
716 // so we do an unaligned load here.
717let chunk = start.cast::<usize>().read_unaligned();
718if self.has_needle(chunk) {
719return generic::fwd_byte_by_byte(start, end, confirm);
720 }
721722// And now we start our search at a guaranteed aligned position.
723 // The first iteration of the loop below will overlap with the
724 // unaligned chunk above in cases where the search starts at an
725 // unaligned offset, but that's okay as we're only here if that
726 // above didn't find a match.
727let mut cur =
728start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN));
729if true {
if !(cur > start) {
::core::panicking::panic("assertion failed: cur > start")
};
};debug_assert!(cur > start);
730if true {
if !(end.sub(USIZE_BYTES) >= start) {
::core::panicking::panic("assertion failed: end.sub(USIZE_BYTES) >= start")
};
};debug_assert!(end.sub(USIZE_BYTES) >= start);
731while cur <= end.sub(USIZE_BYTES) {
732if true {
match (&0, &(cur.as_usize() % USIZE_BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
733734let chunk = cur.cast::<usize>().read();
735if self.has_needle(chunk) {
736break;
737 }
738 cur = cur.add(USIZE_BYTES);
739 }
740 generic::fwd_byte_by_byte(cur, end, confirm)
741 }
742743/// Like `rfind`, but accepts and returns raw pointers.
744 ///
745 /// When a match is found, the pointer returned is guaranteed to be
746 /// `>= start` and `< end`.
747 ///
748 /// This routine is useful if you're already using raw pointers and would
749 /// like to avoid converting back to a slice before executing a search.
750 ///
751 /// # Safety
752 ///
753 /// * Both `start` and `end` must be valid for reads.
754 /// * Both `start` and `end` must point to an initialized value.
755 /// * Both `start` and `end` must point to the same allocated object and
756 /// must either be in bounds or at most one byte past the end of the
757 /// allocated object.
758 /// * Both `start` and `end` must be _derived from_ a pointer to the same
759 /// object.
760 /// * The distance between `start` and `end` must not overflow `isize`.
761 /// * The distance being in bounds must not rely on "wrapping around" the
762 /// address space.
763 ///
764 /// Note that callers may pass a pair of pointers such that `start >= end`.
765 /// In that case, `None` will always be returned.
766#[inline]
767pub unsafe fn rfind_raw(
768&self,
769 start: *const u8,
770 end: *const u8,
771 ) -> Option<*const u8> {
772if start >= end {
773return None;
774 }
775let confirm = |b| self.confirm(b);
776let len = end.distance(start);
777if len < USIZE_BYTES {
778return generic::rev_byte_by_byte(start, end, confirm);
779 }
780781let chunk = end.sub(USIZE_BYTES).cast::<usize>().read_unaligned();
782if self.has_needle(chunk) {
783return generic::rev_byte_by_byte(start, end, confirm);
784 }
785786let mut cur = end.sub(end.as_usize() & USIZE_ALIGN);
787if true {
if !(start <= cur && cur <= end) {
::core::panicking::panic("assertion failed: start <= cur && cur <= end")
};
};debug_assert!(start <= cur && cur <= end);
788while cur >= start.add(USIZE_BYTES) {
789if true {
match (&0, &(cur.as_usize() % USIZE_BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
790791let chunk = cur.sub(USIZE_BYTES).cast::<usize>().read();
792if self.has_needle(chunk) {
793break;
794 }
795 cur = cur.sub(USIZE_BYTES);
796 }
797 generic::rev_byte_by_byte(start, cur, confirm)
798 }
799800/// Returns an iterator over all occurrences of one of the needle bytes in
801 /// the given haystack.
802 ///
803 /// The iterator returned implements `DoubleEndedIterator`. This means it
804 /// can also be used to find occurrences in reverse order.
805pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
806ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
807 }
808809#[inline(always)]
810fn has_needle(&self, chunk: usize) -> bool {
811has_zero_byte(self.v1 ^ chunk)
812 || has_zero_byte(self.v2 ^ chunk)
813 || has_zero_byte(self.v3 ^ chunk)
814 }
815816#[inline(always)]
817fn confirm(&self, haystack_byte: u8) -> bool {
818self.s1 == haystack_byte819 || self.s2 == haystack_byte820 || self.s3 == haystack_byte821 }
822}
823824/// An iterator over all occurrences of three possible bytes in a haystack.
825///
826/// This iterator implements `DoubleEndedIterator`, which means it can also be
827/// used to find occurrences in reverse order.
828///
829/// This iterator is created by the [`Three::iter`] method.
830///
831/// The lifetime parameters are as follows:
832///
833/// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
834/// * `'h` refers to the lifetime of the haystack being searched.
835#[derive(#[automatically_derived]
impl<'a, 'h> ::core::clone::Clone for ThreeIter<'a, 'h> {
#[inline]
fn clone(&self) -> ThreeIter<'a, 'h> {
ThreeIter {
searcher: ::core::clone::Clone::clone(&self.searcher),
it: ::core::clone::Clone::clone(&self.it),
}
}
}Clone, #[automatically_derived]
impl<'a, 'h> ::core::fmt::Debug for ThreeIter<'a, 'h> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "ThreeIter",
"searcher", &self.searcher, "it", &&self.it)
}
}Debug)]
836pub struct ThreeIter<'a, 'h> {
837/// The underlying memchr searcher.
838searcher: &'a Three,
839/// Generic iterator implementation.
840it: generic::Iter<'h>,
841}
842843impl<'a, 'h> Iteratorfor ThreeIter<'a, 'h> {
844type Item = usize;
845846#[inline]
847fn next(&mut self) -> Option<usize> {
848// SAFETY: We rely on the generic iterator to provide valid start
849 // and end pointers, but we guarantee that any pointer returned by
850 // 'find_raw' falls within the bounds of the start and end pointer.
851unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
852 }
853854#[inline]
855fn size_hint(&self) -> (usize, Option<usize>) {
856self.it.size_hint()
857 }
858}
859860impl<'a, 'h> DoubleEndedIteratorfor ThreeIter<'a, 'h> {
861#[inline]
862fn next_back(&mut self) -> Option<usize> {
863// SAFETY: We rely on the generic iterator to provide valid start
864 // and end pointers, but we guarantee that any pointer returned by
865 // 'rfind_raw' falls within the bounds of the start and end pointer.
866unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
867 }
868}
869870/// Return `true` if `x` contains any zero byte.
871///
872/// That is, this routine treats `x` as a register of 8-bit lanes and returns
873/// true when any of those lanes is `0`.
874///
875/// From "Matters Computational" by J. Arndt.
876#[inline(always)]
877fn has_zero_byte(x: usize) -> bool {
878// "The idea is to subtract one from each of the bytes and then look for
879 // bytes where the borrow propagated all the way to the most significant
880 // bit."
881const LO: usize = splat(0x01);
882const HI: usize = splat(0x80);
883884 (x.wrapping_sub(LO) & !x & HI) != 0
885}
886887/// Repeat the given byte into a word size number. That is, every 8 bits
888/// is equivalent to the given byte. For example, if `b` is `\x4E` or
889/// `01001110` in binary, then the returned value on a 32-bit system would be:
890/// `01001110_01001110_01001110_01001110`.
891#[inline(always)]
892const fn splat(b: u8) -> usize {
893// TODO: use `usize::from` once it can be used in const context.
894(bas usize) * (usize::MAX / 255)
895}
896897#[cfg(test)]
898mod tests {
899use super::*;
900901define_memchr_quickcheck!(super, try_new);
902903#[test]
904fn forward_one() {
905crate::tests::memchr::Runner::new(1).forward_iter(
906 |haystack, needles| {
907Some(One::new(needles[0]).iter(haystack).collect())
908 },
909 )
910 }
911912#[test]
913fn reverse_one() {
914crate::tests::memchr::Runner::new(1).reverse_iter(
915 |haystack, needles| {
916Some(One::new(needles[0]).iter(haystack).rev().collect())
917 },
918 )
919 }
920921#[test]
922fn count_one() {
923crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
924Some(One::new(needles[0]).iter(haystack).count())
925 })
926 }
927928#[test]
929fn forward_two() {
930crate::tests::memchr::Runner::new(2).forward_iter(
931 |haystack, needles| {
932let n1 = needles.get(0).copied()?;
933let n2 = needles.get(1).copied()?;
934Some(Two::new(n1, n2).iter(haystack).collect())
935 },
936 )
937 }
938939#[test]
940fn reverse_two() {
941crate::tests::memchr::Runner::new(2).reverse_iter(
942 |haystack, needles| {
943let n1 = needles.get(0).copied()?;
944let n2 = needles.get(1).copied()?;
945Some(Two::new(n1, n2).iter(haystack).rev().collect())
946 },
947 )
948 }
949950#[test]
951fn forward_three() {
952crate::tests::memchr::Runner::new(3).forward_iter(
953 |haystack, needles| {
954let n1 = needles.get(0).copied()?;
955let n2 = needles.get(1).copied()?;
956let n3 = needles.get(2).copied()?;
957Some(Three::new(n1, n2, n3).iter(haystack).collect())
958 },
959 )
960 }
961962#[test]
963fn reverse_three() {
964crate::tests::memchr::Runner::new(3).reverse_iter(
965 |haystack, needles| {
966let n1 = needles.get(0).copied()?;
967let n2 = needles.get(1).copied()?;
968let n3 = needles.get(2).copied()?;
969Some(Three::new(n1, n2, n3).iter(haystack).rev().collect())
970 },
971 )
972 }
973974// This was found by quickcheck in the course of refactoring this crate
975 // after memchr 2.5.0.
976#[test]
977fn regression_double_ended_iterator() {
978let finder = One::new(b'a');
979let haystack = "a";
980let mut it = finder.iter(haystack.as_bytes());
981assert_eq!(Some(0), it.next());
982assert_eq!(None, it.next_back());
983 }
984985// This regression test was caught by ripgrep's test suite on i686 when
986 // upgrading to memchr 2.6. Namely, something about the \x0B bytes here
987 // screws with the SWAR counting approach I was using. This regression test
988 // prompted me to remove the SWAR counting approach and just replace it
989 // with a byte-at-a-time loop.
990#[test]
991fn regression_count_new_lines() {
992let haystack = "01234567\x0b\n\x0b\n\x0b\n\x0b\nx";
993let count = One::new(b'\n').count(haystack.as_bytes());
994assert_eq!(4, count);
995 }
996997// A test[1] that failed on some big endian targets after a perf
998 // improvement was merged[2].
999 //
1000 // At first it seemed like the test suite somehow missed the regression,
1001 // but in actuality, CI was not running tests with `cross` but instead with
1002 // `cargo` specifically. This is because those steps were using `cargo`
1003 // instead of `${{ env.CARGO }}`. So adding this regression test doesn't
1004 // really help catch that class of failure, but we add it anyway for good
1005 // measure.
1006 //
1007 // [1]: https://github.com/BurntSushi/memchr/issues/152
1008 // [2]: https://github.com/BurntSushi/memchr/pull/151
1009#[test]
1010fn regression_big_endian1() {
1011assert_eq!(One::new(b':').find(b"1:23"), Some(1));
1012 }
10131014// Interestingly, I couldn't get `regression_big_endian1` to fail for me
1015 // on the `powerpc64-unknown-linux-gnu` target. But I found another case
1016 // through quickcheck that does.
1017#[test]
1018fn regression_big_endian2() {
1019let data = [0, 0, 0, 0, 0, 0, 0, 0];
1020assert_eq!(One::new(b'\x00').find(&data), Some(0));
1021 }
1022}