1/*!
2Generic crate-internal routines for the `memchr` family of functions.
3*/
45// What follows is a vector algorithm generic over the specific vector
6// type to detect the position of one, two or three needles in a haystack.
7// From what I know, this is a "classic" algorithm, although I don't
8// believe it has been published in any peer reviewed journal. I believe
9// it can be found in places like glibc and Go's standard library. It
10// appears to be well known and is elaborated on in more detail here:
11// https://gms.tf/stdfind-and-memchr-optimizations.html
12//
13// While the routine below is fairly long and perhaps intimidating, the basic
14// idea is actually very simple and can be expressed straight-forwardly in
15// pseudo code. The pseudo code below is written for 128 bit vectors, but the
16// actual code below works for anything that implements the Vector trait.
17//
18// needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1
19// // Note: shift amount is in bytes
20//
21// while i <= haystack.len() - 16:
22// // A 16 byte vector. Each byte in chunk corresponds to a byte in
23// // the haystack.
24// chunk = haystack[i:i+16]
25// // Compare bytes in needle with bytes in chunk. The result is a 16
26// // byte chunk where each byte is 0xFF if the corresponding bytes
27// // in needle and chunk were equal, or 0x00 otherwise.
28// eqs = cmpeq(needle, chunk)
29// // Return a 32 bit integer where the most significant 16 bits
30// // are always 0 and the lower 16 bits correspond to whether the
31// // most significant bit in the correspond byte in `eqs` is set.
32// // In other words, `mask as u16` has bit i set if and only if
33// // needle[i] == chunk[i].
34// mask = movemask(eqs)
35//
36// // Mask is 0 if there is no match, and non-zero otherwise.
37// if mask != 0:
38// // trailing_zeros tells us the position of the least significant
39// // bit that is set.
40// return i + trailing_zeros(mask)
41//
42// // haystack length may not be a multiple of 16, so search the rest.
43// while i < haystack.len():
44// if haystack[i] == n1:
45// return i
46//
47// // No match found.
48// return NULL
49//
50// In fact, we could loosely translate the above code to Rust line-for-line
51// and it would be a pretty fast algorithm. But, we pull out all the stops
52// to go as fast as possible:
53//
54// 1. We use aligned loads. That is, we do some finagling to make sure our
55// primary loop not only proceeds in increments of 16 bytes, but that
56// the address of haystack's pointer that we dereference is aligned to
57// 16 bytes. 16 is a magic number here because it is the size of SSE2
58// 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.)
59// Therefore, to get aligned loads, our pointer's address must be evenly
60// divisible by 16.
61// 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's
62// kind of like loop unrolling, but we combine the equality comparisons
63// using a vector OR such that we only need to extract a single mask to
64// determine whether a match exists or not. If so, then we do some
65// book-keeping to determine the precise location but otherwise mush on.
66// 3. We use our "chunk" comparison routine in as many places as possible,
67// even if it means using unaligned loads. In particular, if haystack
68// starts with an unaligned address, then we do an unaligned load to
69// search the first 16 bytes. We then start our primary loop at the
70// smallest subsequent aligned address, which will actually overlap with
71// previously searched bytes. But we're OK with that. We do a similar
72// dance at the end of our primary loop. Finally, to avoid a
73// byte-at-a-time loop at the end, we do a final 16 byte unaligned load
74// that may overlap with a previous load. This is OK because it converts
75// a loop into a small number of very fast vector instructions. The overlap
76// is OK because we know the place where the overlap occurs does not
77// contain a match.
78//
79// And that's pretty all there is to it. Note that since the below is
80// generic and since it's meant to be inlined into routines with a
81// `#[target_feature(enable = "...")]` annotation, we must mark all routines as
82// both unsafe and `#[inline(always)]`.
83//
84// The fact that the code below is generic does somewhat inhibit us. For
85// example, I've noticed that introducing an unlineable `#[cold]` function to
86// handle the match case in the loop generates tighter assembly, but there is
87// no way to do this in the generic code below because the generic code doesn't
88// know what `target_feature` annotation to apply to the unlineable function.
89// We could make such functions part of the `Vector` trait, but we instead live
90// with the slightly sub-optimal codegen for now since it doesn't seem to have
91// a noticeable perf difference.
9293use crate::{
94ext::Pointer,
95 vector::{MoveMask, Vector},
96};
9798/// Finds all occurrences of a single byte in a haystack.
99#[derive(#[automatically_derived]
impl<V: ::core::clone::Clone> ::core::clone::Clone for One<V> {
#[inline]
fn clone(&self) -> One<V> {
One {
s1: ::core::clone::Clone::clone(&self.s1),
v1: ::core::clone::Clone::clone(&self.v1),
}
}
}Clone, #[automatically_derived]
impl<V: ::core::marker::Copy> ::core::marker::Copy for One<V> { }Copy, #[automatically_derived]
impl<V: ::core::fmt::Debug> ::core::fmt::Debug for One<V> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "One", "s1",
&self.s1, "v1", &&self.v1)
}
}Debug)]
100pub(crate) struct One<V> {
101 s1: u8,
102 v1: V,
103}
104105impl<V: Vector> One<V> {
106/// The number of bytes we examine per each iteration of our search loop.
107const LOOP_SIZE: usize = 4 * V::BYTES;
108109/// Create a new searcher that finds occurrences of the byte given.
110#[inline(always)]
111pub(crate) unsafe fn new(needle: u8) -> One<V> {
112One { s1: needle, v1: V::splat(needle) }
113 }
114115/// Returns the needle given to `One::new`.
116#[inline(always)]
117pub(crate) fn needle1(&self) -> u8 {
118self.s1
119 }
120121/// Return a pointer to the first occurrence of the needle in the given
122 /// haystack. If no such occurrence exists, then `None` is returned.
123 ///
124 /// When a match is found, the pointer returned is guaranteed to be
125 /// `>= start` and `< end`.
126 ///
127 /// # Safety
128 ///
129 /// * It must be the case that `start < end` and that the distance between
130 /// them is at least equal to `V::BYTES`. That is, it must always be valid
131 /// to do at least an unaligned load of `V` at `start`.
132 /// * Both `start` and `end` must be valid for reads.
133 /// * Both `start` and `end` must point to an initialized value.
134 /// * Both `start` and `end` must point to the same allocated object and
135 /// must either be in bounds or at most one byte past the end of the
136 /// allocated object.
137 /// * Both `start` and `end` must be _derived from_ a pointer to the same
138 /// object.
139 /// * The distance between `start` and `end` must not overflow `isize`.
140 /// * The distance being in bounds must not rely on "wrapping around" the
141 /// address space.
142#[inline(always)]
143pub(crate) unsafe fn find_raw(
144&self,
145 start: *const u8,
146 end: *const u8,
147 ) -> Option<*const u8> {
148// If we want to support vectors bigger than 256 bits, we probably
149 // need to move up to using a u64 for the masks used below. Currently
150 // they are 32 bits, which means we're SOL for vectors that need masks
151 // bigger than 32 bits. Overall unclear until there's a use case.
152if true {
if !(V::BYTES <= 32) {
{
::core::panicking::panic_fmt(format_args!("vector cannot be bigger than 32 bytes"));
}
};
};debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
153154let topos = V::Mask::first_offset;
155let len = end.distance(start);
156if true {
if !(len >= V::BYTES) {
{
::core::panicking::panic_fmt(format_args!("haystack has length {0}, but must be at least {1}",
len, V::BYTES));
}
};
};debug_assert!(
157 len >= V::BYTES,
158"haystack has length {}, but must be at least {}",
159 len,
160 V::BYTES
161 );
162163// Search a possibly unaligned chunk at `start`. This covers any part
164 // of the haystack prior to where aligned loads can start.
165if let Some(cur) = self.search_chunk(start, topos) {
166return Some(cur);
167 }
168// Set `cur` to the first V-aligned pointer greater than `start`.
169let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
170if true {
if !(cur > start && end.sub(V::BYTES) >= start) {
::core::panicking::panic("assertion failed: cur > start && end.sub(V::BYTES) >= start")
};
};debug_assert!(cur > start && end.sub(V::BYTES) >= start);
171if len >= Self::LOOP_SIZE {
172while cur <= end.sub(Self::LOOP_SIZE) {
173if true {
match (&0, &(cur.as_usize() % V::BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % V::BYTES);
174175let a = V::load_aligned(cur);
176let b = V::load_aligned(cur.add(1 * V::BYTES));
177let c = V::load_aligned(cur.add(2 * V::BYTES));
178let d = V::load_aligned(cur.add(3 * V::BYTES));
179let eqa = self.v1.cmpeq(a);
180let eqb = self.v1.cmpeq(b);
181let eqc = self.v1.cmpeq(c);
182let eqd = self.v1.cmpeq(d);
183let or1 = eqa.or(eqb);
184let or2 = eqc.or(eqd);
185let or3 = or1.or(or2);
186if or3.movemask_will_have_non_zero() {
187let mask = eqa.movemask();
188if mask.has_non_zero() {
189return Some(cur.add(topos(mask)));
190 }
191192let mask = eqb.movemask();
193if mask.has_non_zero() {
194return Some(cur.add(1 * V::BYTES).add(topos(mask)));
195 }
196197let mask = eqc.movemask();
198if mask.has_non_zero() {
199return Some(cur.add(2 * V::BYTES).add(topos(mask)));
200 }
201202let mask = eqd.movemask();
203if true {
if !mask.has_non_zero() {
::core::panicking::panic("assertion failed: mask.has_non_zero()")
};
};debug_assert!(mask.has_non_zero());
204return Some(cur.add(3 * V::BYTES).add(topos(mask)));
205 }
206 cur = cur.add(Self::LOOP_SIZE);
207 }
208 }
209// Handle any leftovers after the aligned loop above. We use unaligned
210 // loads here, but I believe we are guaranteed that they are aligned
211 // since `cur` is aligned.
212while cur <= end.sub(V::BYTES) {
213if true {
if !(end.distance(cur) >= V::BYTES) {
::core::panicking::panic("assertion failed: end.distance(cur) >= V::BYTES")
};
};debug_assert!(end.distance(cur) >= V::BYTES);
214if let Some(cur) = self.search_chunk(cur, topos) {
215return Some(cur);
216 }
217 cur = cur.add(V::BYTES);
218 }
219// Finally handle any remaining bytes less than the size of V. In this
220 // case, our pointer may indeed be unaligned and the load may overlap
221 // with the previous one. But that's okay since we know the previous
222 // load didn't lead to a match (otherwise we wouldn't be here).
223if cur < end {
224if true {
if !(end.distance(cur) < V::BYTES) {
::core::panicking::panic("assertion failed: end.distance(cur) < V::BYTES")
};
};debug_assert!(end.distance(cur) < V::BYTES);
225cur = cur.sub(V::BYTES - end.distance(cur));
226if true {
match (&end.distance(cur), &V::BYTES) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(end.distance(cur), V::BYTES);
227return self.search_chunk(cur, topos);
228 }
229None230 }
231232/// Return a pointer to the last occurrence of the needle in the given
233 /// haystack. If no such occurrence exists, then `None` is returned.
234 ///
235 /// When a match is found, the pointer returned is guaranteed to be
236 /// `>= start` and `< end`.
237 ///
238 /// # Safety
239 ///
240 /// * It must be the case that `start < end` and that the distance between
241 /// them is at least equal to `V::BYTES`. That is, it must always be valid
242 /// to do at least an unaligned load of `V` at `start`.
243 /// * Both `start` and `end` must be valid for reads.
244 /// * Both `start` and `end` must point to an initialized value.
245 /// * Both `start` and `end` must point to the same allocated object and
246 /// must either be in bounds or at most one byte past the end of the
247 /// allocated object.
248 /// * Both `start` and `end` must be _derived from_ a pointer to the same
249 /// object.
250 /// * The distance between `start` and `end` must not overflow `isize`.
251 /// * The distance being in bounds must not rely on "wrapping around" the
252 /// address space.
253#[inline(always)]
254pub(crate) unsafe fn rfind_raw(
255&self,
256 start: *const u8,
257 end: *const u8,
258 ) -> Option<*const u8> {
259// If we want to support vectors bigger than 256 bits, we probably
260 // need to move up to using a u64 for the masks used below. Currently
261 // they are 32 bits, which means we're SOL for vectors that need masks
262 // bigger than 32 bits. Overall unclear until there's a use case.
263if true {
if !(V::BYTES <= 32) {
{
::core::panicking::panic_fmt(format_args!("vector cannot be bigger than 32 bytes"));
}
};
};debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
264265let topos = V::Mask::last_offset;
266let len = end.distance(start);
267if true {
if !(len >= V::BYTES) {
{
::core::panicking::panic_fmt(format_args!("haystack has length {0}, but must be at least {1}",
len, V::BYTES));
}
};
};debug_assert!(
268 len >= V::BYTES,
269"haystack has length {}, but must be at least {}",
270 len,
271 V::BYTES
272 );
273274if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
275return Some(cur);
276 }
277let mut cur = end.sub(end.as_usize() & V::ALIGN);
278if true {
if !(start <= cur && cur <= end) {
::core::panicking::panic("assertion failed: start <= cur && cur <= end")
};
};debug_assert!(start <= cur && cur <= end);
279if len >= Self::LOOP_SIZE {
280while cur >= start.add(Self::LOOP_SIZE) {
281if true {
match (&0, &(cur.as_usize() % V::BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % V::BYTES);
282283 cur = cur.sub(Self::LOOP_SIZE);
284let a = V::load_aligned(cur);
285let b = V::load_aligned(cur.add(1 * V::BYTES));
286let c = V::load_aligned(cur.add(2 * V::BYTES));
287let d = V::load_aligned(cur.add(3 * V::BYTES));
288let eqa = self.v1.cmpeq(a);
289let eqb = self.v1.cmpeq(b);
290let eqc = self.v1.cmpeq(c);
291let eqd = self.v1.cmpeq(d);
292let or1 = eqa.or(eqb);
293let or2 = eqc.or(eqd);
294let or3 = or1.or(or2);
295if or3.movemask_will_have_non_zero() {
296let mask = eqd.movemask();
297if mask.has_non_zero() {
298return Some(cur.add(3 * V::BYTES).add(topos(mask)));
299 }
300301let mask = eqc.movemask();
302if mask.has_non_zero() {
303return Some(cur.add(2 * V::BYTES).add(topos(mask)));
304 }
305306let mask = eqb.movemask();
307if mask.has_non_zero() {
308return Some(cur.add(1 * V::BYTES).add(topos(mask)));
309 }
310311let mask = eqa.movemask();
312if true {
if !mask.has_non_zero() {
::core::panicking::panic("assertion failed: mask.has_non_zero()")
};
};debug_assert!(mask.has_non_zero());
313return Some(cur.add(topos(mask)));
314 }
315 }
316 }
317while cur >= start.add(V::BYTES) {
318if true {
if !(cur.distance(start) >= V::BYTES) {
::core::panicking::panic("assertion failed: cur.distance(start) >= V::BYTES")
};
};debug_assert!(cur.distance(start) >= V::BYTES);
319 cur = cur.sub(V::BYTES);
320if let Some(cur) = self.search_chunk(cur, topos) {
321return Some(cur);
322 }
323 }
324if cur > start {
325if true {
if !(cur.distance(start) < V::BYTES) {
::core::panicking::panic("assertion failed: cur.distance(start) < V::BYTES")
};
};debug_assert!(cur.distance(start) < V::BYTES);
326return self.search_chunk(start, topos);
327 }
328None329 }
330331/// Return a count of all matching bytes in the given haystack.
332 ///
333 /// # Safety
334 ///
335 /// * It must be the case that `start < end` and that the distance between
336 /// them is at least equal to `V::BYTES`. That is, it must always be valid
337 /// to do at least an unaligned load of `V` at `start`.
338 /// * Both `start` and `end` must be valid for reads.
339 /// * Both `start` and `end` must point to an initialized value.
340 /// * Both `start` and `end` must point to the same allocated object and
341 /// must either be in bounds or at most one byte past the end of the
342 /// allocated object.
343 /// * Both `start` and `end` must be _derived from_ a pointer to the same
344 /// object.
345 /// * The distance between `start` and `end` must not overflow `isize`.
346 /// * The distance being in bounds must not rely on "wrapping around" the
347 /// address space.
348#[inline(always)]
349pub(crate) unsafe fn count_raw(
350&self,
351 start: *const u8,
352 end: *const u8,
353 ) -> usize {
354if true {
if !(V::BYTES <= 32) {
{
::core::panicking::panic_fmt(format_args!("vector cannot be bigger than 32 bytes"));
}
};
};debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
355356let confirm = |b| b == self.needle1();
357let len = end.distance(start);
358if true {
if !(len >= V::BYTES) {
{
::core::panicking::panic_fmt(format_args!("haystack has length {0}, but must be at least {1}",
len, V::BYTES));
}
};
};debug_assert!(
359 len >= V::BYTES,
360"haystack has length {}, but must be at least {}",
361 len,
362 V::BYTES
363 );
364365// Set `cur` to the first V-aligned pointer greater than `start`.
366let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
367// Count any matching bytes before we start our aligned loop.
368let mut count = count_byte_by_byte(start, cur, confirm);
369if true {
if !(cur > start && end.sub(V::BYTES) >= start) {
::core::panicking::panic("assertion failed: cur > start && end.sub(V::BYTES) >= start")
};
};debug_assert!(cur > start && end.sub(V::BYTES) >= start);
370if len >= Self::LOOP_SIZE {
371while cur <= end.sub(Self::LOOP_SIZE) {
372if true {
match (&0, &(cur.as_usize() % V::BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % V::BYTES);
373374let a = V::load_aligned(cur);
375let b = V::load_aligned(cur.add(1 * V::BYTES));
376let c = V::load_aligned(cur.add(2 * V::BYTES));
377let d = V::load_aligned(cur.add(3 * V::BYTES));
378let eqa = self.v1.cmpeq(a);
379let eqb = self.v1.cmpeq(b);
380let eqc = self.v1.cmpeq(c);
381let eqd = self.v1.cmpeq(d);
382 count += eqa.movemask().count_ones();
383 count += eqb.movemask().count_ones();
384 count += eqc.movemask().count_ones();
385 count += eqd.movemask().count_ones();
386 cur = cur.add(Self::LOOP_SIZE);
387 }
388 }
389// Handle any leftovers after the aligned loop above. We use unaligned
390 // loads here, but I believe we are guaranteed that they are aligned
391 // since `cur` is aligned.
392while cur <= end.sub(V::BYTES) {
393if true {
if !(end.distance(cur) >= V::BYTES) {
::core::panicking::panic("assertion failed: end.distance(cur) >= V::BYTES")
};
};debug_assert!(end.distance(cur) >= V::BYTES);
394let chunk = V::load_unaligned(cur);
395 count += self.v1.cmpeq(chunk).movemask().count_ones();
396 cur = cur.add(V::BYTES);
397 }
398// And finally count any leftovers that weren't caught above.
399count += count_byte_by_byte(cur, end, confirm);
400count401 }
402403/// Search `V::BYTES` starting at `cur` via an unaligned load.
404 ///
405 /// `mask_to_offset` should be a function that converts a `movemask` to
406 /// an offset such that `cur.add(offset)` corresponds to a pointer to the
407 /// match location if one is found. Generally it is expected to use either
408 /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
409 /// one is implementing a forward or reverse search, respectively.
410 ///
411 /// # Safety
412 ///
413 /// `cur` must be a valid pointer and it must be valid to do an unaligned
414 /// load of size `V::BYTES` at `cur`.
415#[inline(always)]
416unsafe fn search_chunk(
417&self,
418 cur: *const u8,
419 mask_to_offset: impl Fn(V::Mask) -> usize,
420 ) -> Option<*const u8> {
421let chunk = V::load_unaligned(cur);
422let mask = self.v1.cmpeq(chunk).movemask();
423if mask.has_non_zero() {
424Some(cur.add(mask_to_offset(mask)))
425 } else {
426None427 }
428 }
429}
430431/// Finds all occurrences of two bytes in a haystack.
432///
433/// That is, this reports matches of one of two possible bytes. For example,
434/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
435/// `4` and `5`.
436#[derive(#[automatically_derived]
impl<V: ::core::clone::Clone> ::core::clone::Clone for Two<V> {
#[inline]
fn clone(&self) -> Two<V> {
Two {
s1: ::core::clone::Clone::clone(&self.s1),
s2: ::core::clone::Clone::clone(&self.s2),
v1: ::core::clone::Clone::clone(&self.v1),
v2: ::core::clone::Clone::clone(&self.v2),
}
}
}Clone, #[automatically_derived]
impl<V: ::core::marker::Copy> ::core::marker::Copy for Two<V> { }Copy, #[automatically_derived]
impl<V: ::core::fmt::Debug> ::core::fmt::Debug for Two<V> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field4_finish(f, "Two", "s1",
&self.s1, "s2", &self.s2, "v1", &self.v1, "v2", &&self.v2)
}
}Debug)]
437pub(crate) struct Two<V> {
438 s1: u8,
439 s2: u8,
440 v1: V,
441 v2: V,
442}
443444impl<V: Vector> Two<V> {
445/// The number of bytes we examine per each iteration of our search loop.
446const LOOP_SIZE: usize = 2 * V::BYTES;
447448/// Create a new searcher that finds occurrences of the byte given.
449#[inline(always)]
450pub(crate) unsafe fn new(needle1: u8, needle2: u8) -> Two<V> {
451Two {
452 s1: needle1,
453 s2: needle2,
454 v1: V::splat(needle1),
455 v2: V::splat(needle2),
456 }
457 }
458459/// Returns the first needle given to `Two::new`.
460#[inline(always)]
461pub(crate) fn needle1(&self) -> u8 {
462self.s1
463 }
464465/// Returns the second needle given to `Two::new`.
466#[inline(always)]
467pub(crate) fn needle2(&self) -> u8 {
468self.s2
469 }
470471/// Return a pointer to the first occurrence of one of the needles in the
472 /// given haystack. If no such occurrence exists, then `None` is returned.
473 ///
474 /// When a match is found, the pointer returned is guaranteed to be
475 /// `>= start` and `< end`.
476 ///
477 /// # Safety
478 ///
479 /// * It must be the case that `start < end` and that the distance between
480 /// them is at least equal to `V::BYTES`. That is, it must always be valid
481 /// to do at least an unaligned load of `V` at `start`.
482 /// * Both `start` and `end` must be valid for reads.
483 /// * Both `start` and `end` must point to an initialized value.
484 /// * Both `start` and `end` must point to the same allocated object and
485 /// must either be in bounds or at most one byte past the end of the
486 /// allocated object.
487 /// * Both `start` and `end` must be _derived from_ a pointer to the same
488 /// object.
489 /// * The distance between `start` and `end` must not overflow `isize`.
490 /// * The distance being in bounds must not rely on "wrapping around" the
491 /// address space.
492#[inline(always)]
493pub(crate) unsafe fn find_raw(
494&self,
495 start: *const u8,
496 end: *const u8,
497 ) -> Option<*const u8> {
498// If we want to support vectors bigger than 256 bits, we probably
499 // need to move up to using a u64 for the masks used below. Currently
500 // they are 32 bits, which means we're SOL for vectors that need masks
501 // bigger than 32 bits. Overall unclear until there's a use case.
502if true {
if !(V::BYTES <= 32) {
{
::core::panicking::panic_fmt(format_args!("vector cannot be bigger than 32 bytes"));
}
};
};debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
503504let topos = V::Mask::first_offset;
505let len = end.distance(start);
506if true {
if !(len >= V::BYTES) {
{
::core::panicking::panic_fmt(format_args!("haystack has length {0}, but must be at least {1}",
len, V::BYTES));
}
};
};debug_assert!(
507 len >= V::BYTES,
508"haystack has length {}, but must be at least {}",
509 len,
510 V::BYTES
511 );
512513// Search a possibly unaligned chunk at `start`. This covers any part
514 // of the haystack prior to where aligned loads can start.
515if let Some(cur) = self.search_chunk(start, topos) {
516return Some(cur);
517 }
518// Set `cur` to the first V-aligned pointer greater than `start`.
519let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
520if true {
if !(cur > start && end.sub(V::BYTES) >= start) {
::core::panicking::panic("assertion failed: cur > start && end.sub(V::BYTES) >= start")
};
};debug_assert!(cur > start && end.sub(V::BYTES) >= start);
521if len >= Self::LOOP_SIZE {
522while cur <= end.sub(Self::LOOP_SIZE) {
523if true {
match (&0, &(cur.as_usize() % V::BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % V::BYTES);
524525let a = V::load_aligned(cur);
526let b = V::load_aligned(cur.add(V::BYTES));
527let eqa1 = self.v1.cmpeq(a);
528let eqb1 = self.v1.cmpeq(b);
529let eqa2 = self.v2.cmpeq(a);
530let eqb2 = self.v2.cmpeq(b);
531let or1 = eqa1.or(eqb1);
532let or2 = eqa2.or(eqb2);
533let or3 = or1.or(or2);
534if or3.movemask_will_have_non_zero() {
535let mask = eqa1.movemask().or(eqa2.movemask());
536if mask.has_non_zero() {
537return Some(cur.add(topos(mask)));
538 }
539540let mask = eqb1.movemask().or(eqb2.movemask());
541if true {
if !mask.has_non_zero() {
::core::panicking::panic("assertion failed: mask.has_non_zero()")
};
};debug_assert!(mask.has_non_zero());
542return Some(cur.add(V::BYTES).add(topos(mask)));
543 }
544 cur = cur.add(Self::LOOP_SIZE);
545 }
546 }
547// Handle any leftovers after the aligned loop above. We use unaligned
548 // loads here, but I believe we are guaranteed that they are aligned
549 // since `cur` is aligned.
550while cur <= end.sub(V::BYTES) {
551if true {
if !(end.distance(cur) >= V::BYTES) {
::core::panicking::panic("assertion failed: end.distance(cur) >= V::BYTES")
};
};debug_assert!(end.distance(cur) >= V::BYTES);
552if let Some(cur) = self.search_chunk(cur, topos) {
553return Some(cur);
554 }
555 cur = cur.add(V::BYTES);
556 }
557// Finally handle any remaining bytes less than the size of V. In this
558 // case, our pointer may indeed be unaligned and the load may overlap
559 // with the previous one. But that's okay since we know the previous
560 // load didn't lead to a match (otherwise we wouldn't be here).
561if cur < end {
562if true {
if !(end.distance(cur) < V::BYTES) {
::core::panicking::panic("assertion failed: end.distance(cur) < V::BYTES")
};
};debug_assert!(end.distance(cur) < V::BYTES);
563cur = cur.sub(V::BYTES - end.distance(cur));
564if true {
match (&end.distance(cur), &V::BYTES) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(end.distance(cur), V::BYTES);
565return self.search_chunk(cur, topos);
566 }
567None568 }
569570/// Return a pointer to the last occurrence of the needle in the given
571 /// haystack. If no such occurrence exists, then `None` is returned.
572 ///
573 /// When a match is found, the pointer returned is guaranteed to be
574 /// `>= start` and `< end`.
575 ///
576 /// # Safety
577 ///
578 /// * It must be the case that `start < end` and that the distance between
579 /// them is at least equal to `V::BYTES`. That is, it must always be valid
580 /// to do at least an unaligned load of `V` at `start`.
581 /// * Both `start` and `end` must be valid for reads.
582 /// * Both `start` and `end` must point to an initialized value.
583 /// * Both `start` and `end` must point to the same allocated object and
584 /// must either be in bounds or at most one byte past the end of the
585 /// allocated object.
586 /// * Both `start` and `end` must be _derived from_ a pointer to the same
587 /// object.
588 /// * The distance between `start` and `end` must not overflow `isize`.
589 /// * The distance being in bounds must not rely on "wrapping around" the
590 /// address space.
591#[inline(always)]
592pub(crate) unsafe fn rfind_raw(
593&self,
594 start: *const u8,
595 end: *const u8,
596 ) -> Option<*const u8> {
597// If we want to support vectors bigger than 256 bits, we probably
598 // need to move up to using a u64 for the masks used below. Currently
599 // they are 32 bits, which means we're SOL for vectors that need masks
600 // bigger than 32 bits. Overall unclear until there's a use case.
601if true {
if !(V::BYTES <= 32) {
{
::core::panicking::panic_fmt(format_args!("vector cannot be bigger than 32 bytes"));
}
};
};debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
602603let topos = V::Mask::last_offset;
604let len = end.distance(start);
605if true {
if !(len >= V::BYTES) {
{
::core::panicking::panic_fmt(format_args!("haystack has length {0}, but must be at least {1}",
len, V::BYTES));
}
};
};debug_assert!(
606 len >= V::BYTES,
607"haystack has length {}, but must be at least {}",
608 len,
609 V::BYTES
610 );
611612if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
613return Some(cur);
614 }
615let mut cur = end.sub(end.as_usize() & V::ALIGN);
616if true {
if !(start <= cur && cur <= end) {
::core::panicking::panic("assertion failed: start <= cur && cur <= end")
};
};debug_assert!(start <= cur && cur <= end);
617if len >= Self::LOOP_SIZE {
618while cur >= start.add(Self::LOOP_SIZE) {
619if true {
match (&0, &(cur.as_usize() % V::BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % V::BYTES);
620621 cur = cur.sub(Self::LOOP_SIZE);
622let a = V::load_aligned(cur);
623let b = V::load_aligned(cur.add(V::BYTES));
624let eqa1 = self.v1.cmpeq(a);
625let eqb1 = self.v1.cmpeq(b);
626let eqa2 = self.v2.cmpeq(a);
627let eqb2 = self.v2.cmpeq(b);
628let or1 = eqa1.or(eqb1);
629let or2 = eqa2.or(eqb2);
630let or3 = or1.or(or2);
631if or3.movemask_will_have_non_zero() {
632let mask = eqb1.movemask().or(eqb2.movemask());
633if mask.has_non_zero() {
634return Some(cur.add(V::BYTES).add(topos(mask)));
635 }
636637let mask = eqa1.movemask().or(eqa2.movemask());
638if true {
if !mask.has_non_zero() {
::core::panicking::panic("assertion failed: mask.has_non_zero()")
};
};debug_assert!(mask.has_non_zero());
639return Some(cur.add(topos(mask)));
640 }
641 }
642 }
643while cur >= start.add(V::BYTES) {
644if true {
if !(cur.distance(start) >= V::BYTES) {
::core::panicking::panic("assertion failed: cur.distance(start) >= V::BYTES")
};
};debug_assert!(cur.distance(start) >= V::BYTES);
645 cur = cur.sub(V::BYTES);
646if let Some(cur) = self.search_chunk(cur, topos) {
647return Some(cur);
648 }
649 }
650if cur > start {
651if true {
if !(cur.distance(start) < V::BYTES) {
::core::panicking::panic("assertion failed: cur.distance(start) < V::BYTES")
};
};debug_assert!(cur.distance(start) < V::BYTES);
652return self.search_chunk(start, topos);
653 }
654None655 }
656657/// Search `V::BYTES` starting at `cur` via an unaligned load.
658 ///
659 /// `mask_to_offset` should be a function that converts a `movemask` to
660 /// an offset such that `cur.add(offset)` corresponds to a pointer to the
661 /// match location if one is found. Generally it is expected to use either
662 /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
663 /// one is implementing a forward or reverse search, respectively.
664 ///
665 /// # Safety
666 ///
667 /// `cur` must be a valid pointer and it must be valid to do an unaligned
668 /// load of size `V::BYTES` at `cur`.
669#[inline(always)]
670unsafe fn search_chunk(
671&self,
672 cur: *const u8,
673 mask_to_offset: impl Fn(V::Mask) -> usize,
674 ) -> Option<*const u8> {
675let chunk = V::load_unaligned(cur);
676let eq1 = self.v1.cmpeq(chunk);
677let eq2 = self.v2.cmpeq(chunk);
678let mask = eq1.or(eq2).movemask();
679if mask.has_non_zero() {
680let mask1 = eq1.movemask();
681let mask2 = eq2.movemask();
682Some(cur.add(mask_to_offset(mask1.or(mask2))))
683 } else {
684None685 }
686 }
687}
688689/// Finds all occurrences of two bytes in a haystack.
690///
691/// That is, this reports matches of one of two possible bytes. For example,
692/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
693/// `4` and `5`.
694#[derive(#[automatically_derived]
impl<V: ::core::clone::Clone> ::core::clone::Clone for Three<V> {
#[inline]
fn clone(&self) -> Three<V> {
Three {
s1: ::core::clone::Clone::clone(&self.s1),
s2: ::core::clone::Clone::clone(&self.s2),
s3: ::core::clone::Clone::clone(&self.s3),
v1: ::core::clone::Clone::clone(&self.v1),
v2: ::core::clone::Clone::clone(&self.v2),
v3: ::core::clone::Clone::clone(&self.v3),
}
}
}Clone, #[automatically_derived]
impl<V: ::core::marker::Copy> ::core::marker::Copy for Three<V> { }Copy, #[automatically_derived]
impl<V: ::core::fmt::Debug> ::core::fmt::Debug for Three<V> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
let names: &'static _ = &["s1", "s2", "s3", "v1", "v2", "v3"];
let values: &[&dyn ::core::fmt::Debug] =
&[&self.s1, &self.s2, &self.s3, &self.v1, &self.v2, &&self.v3];
::core::fmt::Formatter::debug_struct_fields_finish(f, "Three", names,
values)
}
}Debug)]
695pub(crate) struct Three<V> {
696 s1: u8,
697 s2: u8,
698 s3: u8,
699 v1: V,
700 v2: V,
701 v3: V,
702}
703704impl<V: Vector> Three<V> {
705/// The number of bytes we examine per each iteration of our search loop.
706const LOOP_SIZE: usize = 2 * V::BYTES;
707708/// Create a new searcher that finds occurrences of the byte given.
709#[inline(always)]
710pub(crate) unsafe fn new(
711 needle1: u8,
712 needle2: u8,
713 needle3: u8,
714 ) -> Three<V> {
715Three {
716 s1: needle1,
717 s2: needle2,
718 s3: needle3,
719 v1: V::splat(needle1),
720 v2: V::splat(needle2),
721 v3: V::splat(needle3),
722 }
723 }
724725/// Returns the first needle given to `Three::new`.
726#[inline(always)]
727pub(crate) fn needle1(&self) -> u8 {
728self.s1
729 }
730731/// Returns the second needle given to `Three::new`.
732#[inline(always)]
733pub(crate) fn needle2(&self) -> u8 {
734self.s2
735 }
736737/// Returns the third needle given to `Three::new`.
738#[inline(always)]
739pub(crate) fn needle3(&self) -> u8 {
740self.s3
741 }
742743/// Return a pointer to the first occurrence of one of the needles in the
744 /// given haystack. If no such occurrence exists, then `None` is returned.
745 ///
746 /// When a match is found, the pointer returned is guaranteed to be
747 /// `>= start` and `< end`.
748 ///
749 /// # Safety
750 ///
751 /// * It must be the case that `start < end` and that the distance between
752 /// them is at least equal to `V::BYTES`. That is, it must always be valid
753 /// to do at least an unaligned load of `V` at `start`.
754 /// * Both `start` and `end` must be valid for reads.
755 /// * Both `start` and `end` must point to an initialized value.
756 /// * Both `start` and `end` must point to the same allocated object and
757 /// must either be in bounds or at most one byte past the end of the
758 /// allocated object.
759 /// * Both `start` and `end` must be _derived from_ a pointer to the same
760 /// object.
761 /// * The distance between `start` and `end` must not overflow `isize`.
762 /// * The distance being in bounds must not rely on "wrapping around" the
763 /// address space.
764#[inline(always)]
765pub(crate) unsafe fn find_raw(
766&self,
767 start: *const u8,
768 end: *const u8,
769 ) -> Option<*const u8> {
770// If we want to support vectors bigger than 256 bits, we probably
771 // need to move up to using a u64 for the masks used below. Currently
772 // they are 32 bits, which means we're SOL for vectors that need masks
773 // bigger than 32 bits. Overall unclear until there's a use case.
774if true {
if !(V::BYTES <= 32) {
{
::core::panicking::panic_fmt(format_args!("vector cannot be bigger than 32 bytes"));
}
};
};debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
775776let topos = V::Mask::first_offset;
777let len = end.distance(start);
778if true {
if !(len >= V::BYTES) {
{
::core::panicking::panic_fmt(format_args!("haystack has length {0}, but must be at least {1}",
len, V::BYTES));
}
};
};debug_assert!(
779 len >= V::BYTES,
780"haystack has length {}, but must be at least {}",
781 len,
782 V::BYTES
783 );
784785// Search a possibly unaligned chunk at `start`. This covers any part
786 // of the haystack prior to where aligned loads can start.
787if let Some(cur) = self.search_chunk(start, topos) {
788return Some(cur);
789 }
790// Set `cur` to the first V-aligned pointer greater than `start`.
791let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
792if true {
if !(cur > start && end.sub(V::BYTES) >= start) {
::core::panicking::panic("assertion failed: cur > start && end.sub(V::BYTES) >= start")
};
};debug_assert!(cur > start && end.sub(V::BYTES) >= start);
793if len >= Self::LOOP_SIZE {
794while cur <= end.sub(Self::LOOP_SIZE) {
795if true {
match (&0, &(cur.as_usize() % V::BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % V::BYTES);
796797let a = V::load_aligned(cur);
798let b = V::load_aligned(cur.add(V::BYTES));
799let eqa1 = self.v1.cmpeq(a);
800let eqb1 = self.v1.cmpeq(b);
801let eqa2 = self.v2.cmpeq(a);
802let eqb2 = self.v2.cmpeq(b);
803let eqa3 = self.v3.cmpeq(a);
804let eqb3 = self.v3.cmpeq(b);
805let or1 = eqa1.or(eqb1);
806let or2 = eqa2.or(eqb2);
807let or3 = eqa3.or(eqb3);
808let or4 = or1.or(or2);
809let or5 = or3.or(or4);
810if or5.movemask_will_have_non_zero() {
811let mask = eqa1
812 .movemask()
813 .or(eqa2.movemask())
814 .or(eqa3.movemask());
815if mask.has_non_zero() {
816return Some(cur.add(topos(mask)));
817 }
818819let mask = eqb1
820 .movemask()
821 .or(eqb2.movemask())
822 .or(eqb3.movemask());
823if true {
if !mask.has_non_zero() {
::core::panicking::panic("assertion failed: mask.has_non_zero()")
};
};debug_assert!(mask.has_non_zero());
824return Some(cur.add(V::BYTES).add(topos(mask)));
825 }
826 cur = cur.add(Self::LOOP_SIZE);
827 }
828 }
829// Handle any leftovers after the aligned loop above. We use unaligned
830 // loads here, but I believe we are guaranteed that they are aligned
831 // since `cur` is aligned.
832while cur <= end.sub(V::BYTES) {
833if true {
if !(end.distance(cur) >= V::BYTES) {
::core::panicking::panic("assertion failed: end.distance(cur) >= V::BYTES")
};
};debug_assert!(end.distance(cur) >= V::BYTES);
834if let Some(cur) = self.search_chunk(cur, topos) {
835return Some(cur);
836 }
837 cur = cur.add(V::BYTES);
838 }
839// Finally handle any remaining bytes less than the size of V. In this
840 // case, our pointer may indeed be unaligned and the load may overlap
841 // with the previous one. But that's okay since we know the previous
842 // load didn't lead to a match (otherwise we wouldn't be here).
843if cur < end {
844if true {
if !(end.distance(cur) < V::BYTES) {
::core::panicking::panic("assertion failed: end.distance(cur) < V::BYTES")
};
};debug_assert!(end.distance(cur) < V::BYTES);
845cur = cur.sub(V::BYTES - end.distance(cur));
846if true {
match (&end.distance(cur), &V::BYTES) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(end.distance(cur), V::BYTES);
847return self.search_chunk(cur, topos);
848 }
849None850 }
851852/// Return a pointer to the last occurrence of the needle in the given
853 /// haystack. If no such occurrence exists, then `None` is returned.
854 ///
855 /// When a match is found, the pointer returned is guaranteed to be
856 /// `>= start` and `< end`.
857 ///
858 /// # Safety
859 ///
860 /// * It must be the case that `start < end` and that the distance between
861 /// them is at least equal to `V::BYTES`. That is, it must always be valid
862 /// to do at least an unaligned load of `V` at `start`.
863 /// * Both `start` and `end` must be valid for reads.
864 /// * Both `start` and `end` must point to an initialized value.
865 /// * Both `start` and `end` must point to the same allocated object and
866 /// must either be in bounds or at most one byte past the end of the
867 /// allocated object.
868 /// * Both `start` and `end` must be _derived from_ a pointer to the same
869 /// object.
870 /// * The distance between `start` and `end` must not overflow `isize`.
871 /// * The distance being in bounds must not rely on "wrapping around" the
872 /// address space.
873#[inline(always)]
874pub(crate) unsafe fn rfind_raw(
875&self,
876 start: *const u8,
877 end: *const u8,
878 ) -> Option<*const u8> {
879// If we want to support vectors bigger than 256 bits, we probably
880 // need to move up to using a u64 for the masks used below. Currently
881 // they are 32 bits, which means we're SOL for vectors that need masks
882 // bigger than 32 bits. Overall unclear until there's a use case.
883if true {
if !(V::BYTES <= 32) {
{
::core::panicking::panic_fmt(format_args!("vector cannot be bigger than 32 bytes"));
}
};
};debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
884885let topos = V::Mask::last_offset;
886let len = end.distance(start);
887if true {
if !(len >= V::BYTES) {
{
::core::panicking::panic_fmt(format_args!("haystack has length {0}, but must be at least {1}",
len, V::BYTES));
}
};
};debug_assert!(
888 len >= V::BYTES,
889"haystack has length {}, but must be at least {}",
890 len,
891 V::BYTES
892 );
893894if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
895return Some(cur);
896 }
897let mut cur = end.sub(end.as_usize() & V::ALIGN);
898if true {
if !(start <= cur && cur <= end) {
::core::panicking::panic("assertion failed: start <= cur && cur <= end")
};
};debug_assert!(start <= cur && cur <= end);
899if len >= Self::LOOP_SIZE {
900while cur >= start.add(Self::LOOP_SIZE) {
901if true {
match (&0, &(cur.as_usize() % V::BYTES)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(0, cur.as_usize() % V::BYTES);
902903 cur = cur.sub(Self::LOOP_SIZE);
904let a = V::load_aligned(cur);
905let b = V::load_aligned(cur.add(V::BYTES));
906let eqa1 = self.v1.cmpeq(a);
907let eqb1 = self.v1.cmpeq(b);
908let eqa2 = self.v2.cmpeq(a);
909let eqb2 = self.v2.cmpeq(b);
910let eqa3 = self.v3.cmpeq(a);
911let eqb3 = self.v3.cmpeq(b);
912let or1 = eqa1.or(eqb1);
913let or2 = eqa2.or(eqb2);
914let or3 = eqa3.or(eqb3);
915let or4 = or1.or(or2);
916let or5 = or3.or(or4);
917if or5.movemask_will_have_non_zero() {
918let mask = eqb1
919 .movemask()
920 .or(eqb2.movemask())
921 .or(eqb3.movemask());
922if mask.has_non_zero() {
923return Some(cur.add(V::BYTES).add(topos(mask)));
924 }
925926let mask = eqa1
927 .movemask()
928 .or(eqa2.movemask())
929 .or(eqa3.movemask());
930if true {
if !mask.has_non_zero() {
::core::panicking::panic("assertion failed: mask.has_non_zero()")
};
};debug_assert!(mask.has_non_zero());
931return Some(cur.add(topos(mask)));
932 }
933 }
934 }
935while cur >= start.add(V::BYTES) {
936if true {
if !(cur.distance(start) >= V::BYTES) {
::core::panicking::panic("assertion failed: cur.distance(start) >= V::BYTES")
};
};debug_assert!(cur.distance(start) >= V::BYTES);
937 cur = cur.sub(V::BYTES);
938if let Some(cur) = self.search_chunk(cur, topos) {
939return Some(cur);
940 }
941 }
942if cur > start {
943if true {
if !(cur.distance(start) < V::BYTES) {
::core::panicking::panic("assertion failed: cur.distance(start) < V::BYTES")
};
};debug_assert!(cur.distance(start) < V::BYTES);
944return self.search_chunk(start, topos);
945 }
946None947 }
948949/// Search `V::BYTES` starting at `cur` via an unaligned load.
950 ///
951 /// `mask_to_offset` should be a function that converts a `movemask` to
952 /// an offset such that `cur.add(offset)` corresponds to a pointer to the
953 /// match location if one is found. Generally it is expected to use either
954 /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
955 /// one is implementing a forward or reverse search, respectively.
956 ///
957 /// # Safety
958 ///
959 /// `cur` must be a valid pointer and it must be valid to do an unaligned
960 /// load of size `V::BYTES` at `cur`.
961#[inline(always)]
962unsafe fn search_chunk(
963&self,
964 cur: *const u8,
965 mask_to_offset: impl Fn(V::Mask) -> usize,
966 ) -> Option<*const u8> {
967let chunk = V::load_unaligned(cur);
968let eq1 = self.v1.cmpeq(chunk);
969let eq2 = self.v2.cmpeq(chunk);
970let eq3 = self.v3.cmpeq(chunk);
971let mask = eq1.or(eq2).or(eq3).movemask();
972if mask.has_non_zero() {
973let mask1 = eq1.movemask();
974let mask2 = eq2.movemask();
975let mask3 = eq3.movemask();
976Some(cur.add(mask_to_offset(mask1.or(mask2).or(mask3))))
977 } else {
978None979 }
980 }
981}
982983/// An iterator over all occurrences of a set of bytes in a haystack.
984///
985/// This iterator implements the routines necessary to provide a
986/// `DoubleEndedIterator` impl, which means it can also be used to find
987/// occurrences in reverse order.
988///
989/// The lifetime parameters are as follows:
990///
991/// * `'h` refers to the lifetime of the haystack being searched.
992///
993/// This type is intended to be used to implement all iterators for the
994/// `memchr` family of functions. It handles a tiny bit of marginally tricky
995/// raw pointer math, but otherwise expects the caller to provide `find_raw`
996/// and `rfind_raw` routines for each call of `next` and `next_back`,
997/// respectively.
998#[derive(#[automatically_derived]
impl<'h> ::core::clone::Clone for Iter<'h> {
#[inline]
fn clone(&self) -> Iter<'h> {
Iter {
original_start: ::core::clone::Clone::clone(&self.original_start),
start: ::core::clone::Clone::clone(&self.start),
end: ::core::clone::Clone::clone(&self.end),
haystack: ::core::clone::Clone::clone(&self.haystack),
}
}
}Clone, #[automatically_derived]
impl<'h> ::core::fmt::Debug for Iter<'h> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field4_finish(f, "Iter",
"original_start", &self.original_start, "start", &self.start,
"end", &self.end, "haystack", &&self.haystack)
}
}Debug)]
999pub(crate) struct Iter<'h> {
1000/// The original starting point into the haystack. We use this to convert
1001 /// pointers to offsets.
1002original_start: *const u8,
1003/// The current starting point into the haystack. That is, where the next
1004 /// search will begin.
1005start: *const u8,
1006/// The current ending point into the haystack. That is, where the next
1007 /// reverse search will begin.
1008end: *const u8,
1009/// A marker for tracking the lifetime of the start/cur_start/cur_end
1010 /// pointers above, which all point into the haystack.
1011haystack: core::marker::PhantomData<&'h [u8]>,
1012}
10131014// SAFETY: Iter contains no shared references to anything that performs any
1015// interior mutations. Also, the lifetime guarantees that Iter will not outlive
1016// the haystack.
1017unsafe impl<'h> Sendfor Iter<'h> {}
10181019// SAFETY: Iter perform no interior mutations, therefore no explicit
1020// synchronization is necessary. Also, the lifetime guarantees that Iter will
1021// not outlive the haystack.
1022unsafe impl<'h> Syncfor Iter<'h> {}
10231024impl<'h> Iter<'h> {
1025/// Create a new generic memchr iterator.
1026#[inline(always)]
1027pub(crate) fn new(haystack: &'h [u8]) -> Iter<'h> {
1028Iter {
1029 original_start: haystack.as_ptr(),
1030 start: haystack.as_ptr(),
1031 end: haystack.as_ptr().wrapping_add(haystack.len()),
1032 haystack: core::marker::PhantomData,
1033 }
1034 }
10351036/// Returns the next occurrence in the forward direction.
1037 ///
1038 /// # Safety
1039 ///
1040 /// Callers must ensure that if a pointer is returned from the closure
1041 /// provided, then it must be greater than or equal to the start pointer
1042 /// and less than the end pointer.
1043#[inline(always)]
1044pub(crate) unsafe fn next(
1045&mut self,
1046mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1047 ) -> Option<usize> {
1048// SAFETY: Pointers are derived directly from the same &[u8] haystack.
1049 // We only ever modify start/end corresponding to a matching offset
1050 // found between start and end. Thus all changes to start/end maintain
1051 // our safety requirements.
1052 //
1053 // The only other assumption we rely on is that the pointer returned
1054 // by `find_raw` satisfies `self.start <= found < self.end`, and that
1055 // safety contract is forwarded to the caller.
1056let found = find_raw(self.start, self.end)?;
1057let result = found.distance(self.original_start);
1058self.start = found.add(1);
1059Some(result)
1060 }
10611062/// Returns the number of remaining elements in this iterator.
1063#[inline(always)]
1064pub(crate) fn count(
1065self,
1066mut count_raw: impl FnMut(*const u8, *const u8) -> usize,
1067 ) -> usize {
1068// SAFETY: Pointers are derived directly from the same &[u8] haystack.
1069 // We only ever modify start/end corresponding to a matching offset
1070 // found between start and end. Thus all changes to start/end maintain
1071 // our safety requirements.
1072count_raw(self.start, self.end)
1073 }
10741075/// Returns the next occurrence in reverse.
1076 ///
1077 /// # Safety
1078 ///
1079 /// Callers must ensure that if a pointer is returned from the closure
1080 /// provided, then it must be greater than or equal to the start pointer
1081 /// and less than the end pointer.
1082#[inline(always)]
1083pub(crate) unsafe fn next_back(
1084&mut self,
1085mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1086 ) -> Option<usize> {
1087// SAFETY: Pointers are derived directly from the same &[u8] haystack.
1088 // We only ever modify start/end corresponding to a matching offset
1089 // found between start and end. Thus all changes to start/end maintain
1090 // our safety requirements.
1091 //
1092 // The only other assumption we rely on is that the pointer returned
1093 // by `rfind_raw` satisfies `self.start <= found < self.end`, and that
1094 // safety contract is forwarded to the caller.
1095let found = rfind_raw(self.start, self.end)?;
1096let result = found.distance(self.original_start);
1097self.end = found;
1098Some(result)
1099 }
11001101/// Provides an implementation of `Iterator::size_hint`.
1102#[inline(always)]
1103pub(crate) fn size_hint(&self) -> (usize, Option<usize>) {
1104 (0, Some(self.end.as_usize().saturating_sub(self.start.as_usize())))
1105 }
1106}
11071108/// Search a slice using a function that operates on raw pointers.
1109///
1110/// Given a function to search a contiguous sequence of memory for the location
1111/// of a non-empty set of bytes, this will execute that search on a slice of
1112/// bytes. The pointer returned by the given function will be converted to an
1113/// offset relative to the starting point of the given slice. That is, if a
1114/// match is found, the offset returned by this routine is guaranteed to be a
1115/// valid index into `haystack`.
1116///
1117/// Callers may use this for a forward or reverse search.
1118///
1119/// # Safety
1120///
1121/// Callers must ensure that if a pointer is returned by `find_raw`, then the
1122/// pointer must be greater than or equal to the starting pointer and less than
1123/// the end pointer.
1124#[inline(always)]
1125pub(crate) unsafe fn search_slice_with_raw(
1126 haystack: &[u8],
1127mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1128) -> Option<usize> {
1129// SAFETY: We rely on `find_raw` to return a correct and valid pointer, but
1130 // otherwise, `start` and `end` are valid due to the guarantees provided by
1131 // a &[u8].
1132let start = haystack.as_ptr();
1133let end = start.add(haystack.len());
1134let found = find_raw(start, end)?;
1135Some(found.distance(start))
1136}
11371138/// Performs a forward byte-at-a-time loop until either `ptr >= end_ptr` or
1139/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is
1140/// returned. If the latter occurs, then the pointer at which `confirm` returns
1141/// `true` is returned.
1142///
1143/// # Safety
1144///
1145/// Callers must provide valid pointers and they must satisfy `start_ptr <=
1146/// ptr` and `ptr <= end_ptr`.
1147#[inline(always)]
1148pub(crate) unsafe fn fwd_byte_by_byte<F: Fn(u8) -> bool>(
1149 start: *const u8,
1150 end: *const u8,
1151 confirm: F,
1152) -> Option<*const u8> {
1153if true {
if !(start <= end) {
::core::panicking::panic("assertion failed: start <= end")
};
};debug_assert!(start <= end);
1154let mut ptr = start;
1155while ptr < end {
1156if confirm(*ptr) {
1157return Some(ptr);
1158 }
1159 ptr = ptr.offset(1);
1160 }
1161None1162}
11631164/// Performs a reverse byte-at-a-time loop until either `ptr < start_ptr` or
1165/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is
1166/// returned. If the latter occurs, then the pointer at which `confirm` returns
1167/// `true` is returned.
1168///
1169/// # Safety
1170///
1171/// Callers must provide valid pointers and they must satisfy `start_ptr <=
1172/// ptr` and `ptr <= end_ptr`.
1173#[inline(always)]
1174pub(crate) unsafe fn rev_byte_by_byte<F: Fn(u8) -> bool>(
1175 start: *const u8,
1176 end: *const u8,
1177 confirm: F,
1178) -> Option<*const u8> {
1179if true {
if !(start <= end) {
::core::panicking::panic("assertion failed: start <= end")
};
};debug_assert!(start <= end);
11801181let mut ptr = end;
1182while ptr > start {
1183 ptr = ptr.offset(-1);
1184if confirm(*ptr) {
1185return Some(ptr);
1186 }
1187 }
1188None1189}
11901191/// Performs a forward byte-at-a-time loop until `ptr >= end_ptr` and returns
1192/// the number of times `confirm(*ptr)` returns `true`.
1193///
1194/// # Safety
1195///
1196/// Callers must provide valid pointers and they must satisfy `start_ptr <=
1197/// ptr` and `ptr <= end_ptr`.
1198#[inline(always)]
1199pub(crate) unsafe fn count_byte_by_byte<F: Fn(u8) -> bool>(
1200 start: *const u8,
1201 end: *const u8,
1202 confirm: F,
1203) -> usize {
1204if true {
if !(start <= end) {
::core::panicking::panic("assertion failed: start <= end")
};
};debug_assert!(start <= end);
1205let mut ptr = start;
1206let mut count = 0;
1207while ptr < end {
1208if confirm(*ptr) {
1209 count += 1;
1210 }
1211 ptr = ptr.offset(1);
1212 }
1213count1214}