1use core::fmt::Debug;
23use alloc::{
4boxed::Box, collections::BTreeMap, format, sync::Arc, vec, vec::Vec,
5};
67use crate::{
8 packed::{
9ext::Pointer,
10pattern::Patterns,
11 vector::{FatVector, Vector},
12 },
13util::int::U32,
14PatternID,
15};
1617/// A match type specialized to the Teddy implementations below.
18///
19/// Essentially, instead of representing a match at byte offsets, we use
20/// raw pointers. This is because the implementations below operate on raw
21/// pointers, and so this is a more natural return type based on how the
22/// implementation works.
23///
24/// Also, the `PatternID` used here is a `u16`.
25#[derive(#[automatically_derived]
impl ::core::clone::Clone for Match {
#[inline]
fn clone(&self) -> Match {
let _: ::core::clone::AssertParamIsClone<PatternID>;
let _: ::core::clone::AssertParamIsClone<*const u8>;
let _: ::core::clone::AssertParamIsClone<*const u8>;
*self
}
}Clone, #[automatically_derived]
impl ::core::marker::Copy for Match { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for Match {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field3_finish(f, "Match", "pid",
&self.pid, "start", &self.start, "end", &&self.end)
}
}Debug)]
26pub(crate) struct Match {
27 pid: PatternID,
28 start: *const u8,
29 end: *const u8,
30}
3132impl Match {
33/// Returns the ID of the pattern that matched.
34pub(crate) fn pattern(&self) -> PatternID {
35self.pid
36 }
3738/// Returns a pointer into the haystack at which the match starts.
39pub(crate) fn start(&self) -> *const u8 {
40self.start
41 }
4243/// Returns a pointer into the haystack at which the match ends.
44pub(crate) fn end(&self) -> *const u8 {
45self.end
46 }
47}
4849/// A "slim" Teddy implementation that is generic over both the vector type
50/// and the minimum length of the patterns being searched for.
51///
52/// Only 1, 2, 3 and 4 bytes are supported as minimum lengths.
53#[derive(#[automatically_derived]
impl<V: ::core::clone::Clone, const BYTES : usize> ::core::clone::Clone for
Slim<V, BYTES> {
#[inline]
fn clone(&self) -> Slim<V, BYTES> {
Slim {
teddy: ::core::clone::Clone::clone(&self.teddy),
masks: ::core::clone::Clone::clone(&self.masks),
}
}
}Clone, #[automatically_derived]
impl<V: ::core::fmt::Debug, const BYTES : usize> ::core::fmt::Debug for
Slim<V, BYTES> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "Slim", "teddy",
&self.teddy, "masks", &&self.masks)
}
}Debug)]
54pub(crate) struct Slim<V, const BYTES: usize> {
55/// A generic data structure for doing "slim" Teddy verification.
56teddy: Teddy<8>,
57/// The masks used as inputs to the shuffle operation to generate
58 /// candidates (which are fed into the verification routines).
59masks: [Mask<V>; BYTES],
60}
6162impl<V: Vector, const BYTES: usize> Slim<V, BYTES> {
63/// Create a new "slim" Teddy searcher for the given patterns.
64 ///
65 /// # Panics
66 ///
67 /// This panics when `BYTES` is any value other than 1, 2, 3 or 4.
68 ///
69 /// # Safety
70 ///
71 /// Callers must ensure that this is okay to call in the current target for
72 /// the current CPU.
73#[inline(always)]
74pub(crate) unsafe fn new(patterns: Arc<Patterns>) -> Slim<V, BYTES> {
75if !(1 <= BYTES && BYTES <= 4) {
{
::core::panicking::panic_fmt(format_args!("only 1, 2, 3 or 4 bytes are supported"));
}
};assert!(
761 <= BYTES && BYTES <= 4,
77"only 1, 2, 3 or 4 bytes are supported"
78);
79let teddy = Teddy::new(patterns);
80let masks = SlimMaskBuilder::from_teddy(&teddy);
81Slim { teddy, masks }
82 }
8384/// Returns the approximate total amount of heap used by this type, in
85 /// units of bytes.
86#[inline(always)]
87pub(crate) fn memory_usage(&self) -> usize {
88self.teddy.memory_usage()
89 }
9091/// Returns the minimum length, in bytes, that a haystack must be in order
92 /// to use it with this searcher.
93#[inline(always)]
94pub(crate) fn minimum_len(&self) -> usize {
95 V::BYTES + (BYTES - 1)
96 }
97}
9899impl<V: Vector> Slim<V, 1> {
100/// Look for an occurrences of the patterns in this finder in the haystack
101 /// given by the `start` and `end` pointers.
102 ///
103 /// If no match could be found, then `None` is returned.
104 ///
105 /// # Safety
106 ///
107 /// The given pointers representing the haystack must be valid to read
108 /// from. They must also point to a region of memory that is at least the
109 /// minimum length required by this searcher.
110 ///
111 /// Callers must ensure that this is okay to call in the current target for
112 /// the current CPU.
113#[inline(always)]
114pub(crate) unsafe fn find(
115&self,
116 start: *const u8,
117 end: *const u8,
118 ) -> Option<Match> {
119let len = end.distance(start);
120if true {
if !(len >= self.minimum_len()) {
::core::panicking::panic("assertion failed: len >= self.minimum_len()")
};
};debug_assert!(len >= self.minimum_len());
121let mut cur = start;
122while cur <= end.sub(V::BYTES) {
123if let Some(m) = self.find_one(cur, end) {
124return Some(m);
125 }
126 cur = cur.add(V::BYTES);
127 }
128if cur < end {
129cur = end.sub(V::BYTES);
130if let Some(m) = self.find_one(cur, end) {
131return Some(m);
132 }
133 }
134None135 }
136137/// Look for a match starting at the `V::BYTES` at and after `cur`. If
138 /// there isn't one, then `None` is returned.
139 ///
140 /// # Safety
141 ///
142 /// The given pointers representing the haystack must be valid to read
143 /// from. They must also point to a region of memory that is at least the
144 /// minimum length required by this searcher.
145 ///
146 /// Callers must ensure that this is okay to call in the current target for
147 /// the current CPU.
148#[inline(always)]
149unsafe fn find_one(
150&self,
151 cur: *const u8,
152 end: *const u8,
153 ) -> Option<Match> {
154let c = self.candidate(cur);
155if !c.is_zero() {
156if let Some(m) = self.teddy.verify(cur, end, c) {
157return Some(m);
158 }
159 }
160None161 }
162163/// Look for a candidate match (represented as a vector) starting at the
164 /// `V::BYTES` at and after `cur`. If there isn't one, then a vector with
165 /// all bits set to zero is returned.
166 ///
167 /// # Safety
168 ///
169 /// The given pointer representing the haystack must be valid to read
170 /// from.
171 ///
172 /// Callers must ensure that this is okay to call in the current target for
173 /// the current CPU.
174#[inline(always)]
175unsafe fn candidate(&self, cur: *const u8) -> V {
176let chunk = V::load_unaligned(cur);
177Mask::members1(chunk, self.masks)
178 }
179}
180181impl<V: Vector> Slim<V, 2> {
182/// See Slim<V, 1>::find.
183#[inline(always)]
184pub(crate) unsafe fn find(
185&self,
186 start: *const u8,
187 end: *const u8,
188 ) -> Option<Match> {
189let len = end.distance(start);
190if true {
if !(len >= self.minimum_len()) {
::core::panicking::panic("assertion failed: len >= self.minimum_len()")
};
};debug_assert!(len >= self.minimum_len());
191let mut cur = start.add(1);
192let mut prev0 = V::splat(0xFF);
193while cur <= end.sub(V::BYTES) {
194if let Some(m) = self.find_one(cur, end, &mut prev0) {
195return Some(m);
196 }
197 cur = cur.add(V::BYTES);
198 }
199if cur < end {
200cur = end.sub(V::BYTES);
201prev0 = V::splat(0xFF);
202if let Some(m) = self.find_one(cur, end, &mut prev0) {
203return Some(m);
204 }
205 }
206None207 }
208209/// See Slim<V, 1>::find_one.
210#[inline(always)]
211unsafe fn find_one(
212&self,
213 cur: *const u8,
214 end: *const u8,
215 prev0: &mut V,
216 ) -> Option<Match> {
217let c = self.candidate(cur, prev0);
218if !c.is_zero() {
219if let Some(m) = self.teddy.verify(cur.sub(1), end, c) {
220return Some(m);
221 }
222 }
223None224 }
225226/// See Slim<V, 1>::candidate.
227#[inline(always)]
228unsafe fn candidate(&self, cur: *const u8, prev0: &mut V) -> V {
229let chunk = V::load_unaligned(cur);
230let (res0, res1) = Mask::members2(chunk, self.masks);
231let res0prev0 = res0.shift_in_one_byte(*prev0);
232let res = res0prev0.and(res1);
233*prev0 = res0;
234res235 }
236}
237238impl<V: Vector> Slim<V, 3> {
239/// See Slim<V, 1>::find.
240#[inline(always)]
241pub(crate) unsafe fn find(
242&self,
243 start: *const u8,
244 end: *const u8,
245 ) -> Option<Match> {
246let len = end.distance(start);
247if true {
if !(len >= self.minimum_len()) {
::core::panicking::panic("assertion failed: len >= self.minimum_len()")
};
};debug_assert!(len >= self.minimum_len());
248let mut cur = start.add(2);
249let mut prev0 = V::splat(0xFF);
250let mut prev1 = V::splat(0xFF);
251while cur <= end.sub(V::BYTES) {
252if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) {
253return Some(m);
254 }
255 cur = cur.add(V::BYTES);
256 }
257if cur < end {
258cur = end.sub(V::BYTES);
259prev0 = V::splat(0xFF);
260prev1 = V::splat(0xFF);
261if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) {
262return Some(m);
263 }
264 }
265None266 }
267268/// See Slim<V, 1>::find_one.
269#[inline(always)]
270unsafe fn find_one(
271&self,
272 cur: *const u8,
273 end: *const u8,
274 prev0: &mut V,
275 prev1: &mut V,
276 ) -> Option<Match> {
277let c = self.candidate(cur, prev0, prev1);
278if !c.is_zero() {
279if let Some(m) = self.teddy.verify(cur.sub(2), end, c) {
280return Some(m);
281 }
282 }
283None284 }
285286/// See Slim<V, 1>::candidate.
287#[inline(always)]
288unsafe fn candidate(
289&self,
290 cur: *const u8,
291 prev0: &mut V,
292 prev1: &mut V,
293 ) -> V {
294let chunk = V::load_unaligned(cur);
295let (res0, res1, res2) = Mask::members3(chunk, self.masks);
296let res0prev0 = res0.shift_in_two_bytes(*prev0);
297let res1prev1 = res1.shift_in_one_byte(*prev1);
298let res = res0prev0.and(res1prev1).and(res2);
299*prev0 = res0;
300*prev1 = res1;
301res302 }
303}
304305impl<V: Vector> Slim<V, 4> {
306/// See Slim<V, 1>::find.
307#[inline(always)]
308pub(crate) unsafe fn find(
309&self,
310 start: *const u8,
311 end: *const u8,
312 ) -> Option<Match> {
313let len = end.distance(start);
314if true {
if !(len >= self.minimum_len()) {
::core::panicking::panic("assertion failed: len >= self.minimum_len()")
};
};debug_assert!(len >= self.minimum_len());
315let mut cur = start.add(3);
316let mut prev0 = V::splat(0xFF);
317let mut prev1 = V::splat(0xFF);
318let mut prev2 = V::splat(0xFF);
319while cur <= end.sub(V::BYTES) {
320if let Some(m) =
321self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2)
322 {
323return Some(m);
324 }
325 cur = cur.add(V::BYTES);
326 }
327if cur < end {
328cur = end.sub(V::BYTES);
329prev0 = V::splat(0xFF);
330prev1 = V::splat(0xFF);
331prev2 = V::splat(0xFF);
332if let Some(m) =
333self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2)
334 {
335return Some(m);
336 }
337 }
338None339 }
340341/// See Slim<V, 1>::find_one.
342#[inline(always)]
343unsafe fn find_one(
344&self,
345 cur: *const u8,
346 end: *const u8,
347 prev0: &mut V,
348 prev1: &mut V,
349 prev2: &mut V,
350 ) -> Option<Match> {
351let c = self.candidate(cur, prev0, prev1, prev2);
352if !c.is_zero() {
353if let Some(m) = self.teddy.verify(cur.sub(3), end, c) {
354return Some(m);
355 }
356 }
357None358 }
359360/// See Slim<V, 1>::candidate.
361#[inline(always)]
362unsafe fn candidate(
363&self,
364 cur: *const u8,
365 prev0: &mut V,
366 prev1: &mut V,
367 prev2: &mut V,
368 ) -> V {
369let chunk = V::load_unaligned(cur);
370let (res0, res1, res2, res3) = Mask::members4(chunk, self.masks);
371let res0prev0 = res0.shift_in_three_bytes(*prev0);
372let res1prev1 = res1.shift_in_two_bytes(*prev1);
373let res2prev2 = res2.shift_in_one_byte(*prev2);
374let res = res0prev0.and(res1prev1).and(res2prev2).and(res3);
375*prev0 = res0;
376*prev1 = res1;
377*prev2 = res2;
378res379 }
380}
381382/// A "fat" Teddy implementation that is generic over both the vector type
383/// and the minimum length of the patterns being searched for.
384///
385/// Only 1, 2, 3 and 4 bytes are supported as minimum lengths.
386#[derive(#[automatically_derived]
impl<V: ::core::clone::Clone, const BYTES : usize> ::core::clone::Clone for
Fat<V, BYTES> {
#[inline]
fn clone(&self) -> Fat<V, BYTES> {
Fat {
teddy: ::core::clone::Clone::clone(&self.teddy),
masks: ::core::clone::Clone::clone(&self.masks),
}
}
}Clone, #[automatically_derived]
impl<V: ::core::fmt::Debug, const BYTES : usize> ::core::fmt::Debug for
Fat<V, BYTES> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "Fat", "teddy",
&self.teddy, "masks", &&self.masks)
}
}Debug)]
387pub(crate) struct Fat<V, const BYTES: usize> {
388/// A generic data structure for doing "fat" Teddy verification.
389teddy: Teddy<16>,
390/// The masks used as inputs to the shuffle operation to generate
391 /// candidates (which are fed into the verification routines).
392masks: [Mask<V>; BYTES],
393}
394395impl<V: FatVector, const BYTES: usize> Fat<V, BYTES> {
396/// Create a new "fat" Teddy searcher for the given patterns.
397 ///
398 /// # Panics
399 ///
400 /// This panics when `BYTES` is any value other than 1, 2, 3 or 4.
401 ///
402 /// # Safety
403 ///
404 /// Callers must ensure that this is okay to call in the current target for
405 /// the current CPU.
406#[inline(always)]
407pub(crate) unsafe fn new(patterns: Arc<Patterns>) -> Fat<V, BYTES> {
408if !(1 <= BYTES && BYTES <= 4) {
{
::core::panicking::panic_fmt(format_args!("only 1, 2, 3 or 4 bytes are supported"));
}
};assert!(
4091 <= BYTES && BYTES <= 4,
410"only 1, 2, 3 or 4 bytes are supported"
411);
412let teddy = Teddy::new(patterns);
413let masks = FatMaskBuilder::from_teddy(&teddy);
414Fat { teddy, masks }
415 }
416417/// Returns the approximate total amount of heap used by this type, in
418 /// units of bytes.
419#[inline(always)]
420pub(crate) fn memory_usage(&self) -> usize {
421self.teddy.memory_usage()
422 }
423424/// Returns the minimum length, in bytes, that a haystack must be in order
425 /// to use it with this searcher.
426#[inline(always)]
427pub(crate) fn minimum_len(&self) -> usize {
428 V::Half::BYTES + (BYTES - 1)
429 }
430}
431432impl<V: FatVector> Fat<V, 1> {
433/// Look for an occurrences of the patterns in this finder in the haystack
434 /// given by the `start` and `end` pointers.
435 ///
436 /// If no match could be found, then `None` is returned.
437 ///
438 /// # Safety
439 ///
440 /// The given pointers representing the haystack must be valid to read
441 /// from. They must also point to a region of memory that is at least the
442 /// minimum length required by this searcher.
443 ///
444 /// Callers must ensure that this is okay to call in the current target for
445 /// the current CPU.
446#[inline(always)]
447pub(crate) unsafe fn find(
448&self,
449 start: *const u8,
450 end: *const u8,
451 ) -> Option<Match> {
452let len = end.distance(start);
453if true {
if !(len >= self.minimum_len()) {
::core::panicking::panic("assertion failed: len >= self.minimum_len()")
};
};debug_assert!(len >= self.minimum_len());
454let mut cur = start;
455while cur <= end.sub(V::Half::BYTES) {
456if let Some(m) = self.find_one(cur, end) {
457return Some(m);
458 }
459 cur = cur.add(V::Half::BYTES);
460 }
461if cur < end {
462cur = end.sub(V::Half::BYTES);
463if let Some(m) = self.find_one(cur, end) {
464return Some(m);
465 }
466 }
467None468 }
469470/// Look for a match starting at the `V::BYTES` at and after `cur`. If
471 /// there isn't one, then `None` is returned.
472 ///
473 /// # Safety
474 ///
475 /// The given pointers representing the haystack must be valid to read
476 /// from. They must also point to a region of memory that is at least the
477 /// minimum length required by this searcher.
478 ///
479 /// Callers must ensure that this is okay to call in the current target for
480 /// the current CPU.
481#[inline(always)]
482unsafe fn find_one(
483&self,
484 cur: *const u8,
485 end: *const u8,
486 ) -> Option<Match> {
487let c = self.candidate(cur);
488if !c.is_zero() {
489if let Some(m) = self.teddy.verify(cur, end, c) {
490return Some(m);
491 }
492 }
493None494 }
495496/// Look for a candidate match (represented as a vector) starting at the
497 /// `V::BYTES` at and after `cur`. If there isn't one, then a vector with
498 /// all bits set to zero is returned.
499 ///
500 /// # Safety
501 ///
502 /// The given pointer representing the haystack must be valid to read
503 /// from.
504 ///
505 /// Callers must ensure that this is okay to call in the current target for
506 /// the current CPU.
507#[inline(always)]
508unsafe fn candidate(&self, cur: *const u8) -> V {
509let chunk = V::load_half_unaligned(cur);
510Mask::members1(chunk, self.masks)
511 }
512}
513514impl<V: FatVector> Fat<V, 2> {
515/// See `Fat<V, 1>::find`.
516#[inline(always)]
517pub(crate) unsafe fn find(
518&self,
519 start: *const u8,
520 end: *const u8,
521 ) -> Option<Match> {
522let len = end.distance(start);
523if true {
if !(len >= self.minimum_len()) {
::core::panicking::panic("assertion failed: len >= self.minimum_len()")
};
};debug_assert!(len >= self.minimum_len());
524let mut cur = start.add(1);
525let mut prev0 = V::splat(0xFF);
526while cur <= end.sub(V::Half::BYTES) {
527if let Some(m) = self.find_one(cur, end, &mut prev0) {
528return Some(m);
529 }
530 cur = cur.add(V::Half::BYTES);
531 }
532if cur < end {
533cur = end.sub(V::Half::BYTES);
534prev0 = V::splat(0xFF);
535if let Some(m) = self.find_one(cur, end, &mut prev0) {
536return Some(m);
537 }
538 }
539None540 }
541542/// See `Fat<V, 1>::find_one`.
543#[inline(always)]
544unsafe fn find_one(
545&self,
546 cur: *const u8,
547 end: *const u8,
548 prev0: &mut V,
549 ) -> Option<Match> {
550let c = self.candidate(cur, prev0);
551if !c.is_zero() {
552if let Some(m) = self.teddy.verify(cur.sub(1), end, c) {
553return Some(m);
554 }
555 }
556None557 }
558559/// See `Fat<V, 1>::candidate`.
560#[inline(always)]
561unsafe fn candidate(&self, cur: *const u8, prev0: &mut V) -> V {
562let chunk = V::load_half_unaligned(cur);
563let (res0, res1) = Mask::members2(chunk, self.masks);
564let res0prev0 = res0.half_shift_in_one_byte(*prev0);
565let res = res0prev0.and(res1);
566*prev0 = res0;
567res568 }
569}
570571impl<V: FatVector> Fat<V, 3> {
572/// See `Fat<V, 1>::find`.
573#[inline(always)]
574pub(crate) unsafe fn find(
575&self,
576 start: *const u8,
577 end: *const u8,
578 ) -> Option<Match> {
579let len = end.distance(start);
580if true {
if !(len >= self.minimum_len()) {
::core::panicking::panic("assertion failed: len >= self.minimum_len()")
};
};debug_assert!(len >= self.minimum_len());
581let mut cur = start.add(2);
582let mut prev0 = V::splat(0xFF);
583let mut prev1 = V::splat(0xFF);
584while cur <= end.sub(V::Half::BYTES) {
585if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) {
586return Some(m);
587 }
588 cur = cur.add(V::Half::BYTES);
589 }
590if cur < end {
591cur = end.sub(V::Half::BYTES);
592prev0 = V::splat(0xFF);
593prev1 = V::splat(0xFF);
594if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) {
595return Some(m);
596 }
597 }
598None599 }
600601/// See `Fat<V, 1>::find_one`.
602#[inline(always)]
603unsafe fn find_one(
604&self,
605 cur: *const u8,
606 end: *const u8,
607 prev0: &mut V,
608 prev1: &mut V,
609 ) -> Option<Match> {
610let c = self.candidate(cur, prev0, prev1);
611if !c.is_zero() {
612if let Some(m) = self.teddy.verify(cur.sub(2), end, c) {
613return Some(m);
614 }
615 }
616None617 }
618619/// See `Fat<V, 1>::candidate`.
620#[inline(always)]
621unsafe fn candidate(
622&self,
623 cur: *const u8,
624 prev0: &mut V,
625 prev1: &mut V,
626 ) -> V {
627let chunk = V::load_half_unaligned(cur);
628let (res0, res1, res2) = Mask::members3(chunk, self.masks);
629let res0prev0 = res0.half_shift_in_two_bytes(*prev0);
630let res1prev1 = res1.half_shift_in_one_byte(*prev1);
631let res = res0prev0.and(res1prev1).and(res2);
632*prev0 = res0;
633*prev1 = res1;
634res635 }
636}
637638impl<V: FatVector> Fat<V, 4> {
639/// See `Fat<V, 1>::find`.
640#[inline(always)]
641pub(crate) unsafe fn find(
642&self,
643 start: *const u8,
644 end: *const u8,
645 ) -> Option<Match> {
646let len = end.distance(start);
647if true {
if !(len >= self.minimum_len()) {
::core::panicking::panic("assertion failed: len >= self.minimum_len()")
};
};debug_assert!(len >= self.minimum_len());
648let mut cur = start.add(3);
649let mut prev0 = V::splat(0xFF);
650let mut prev1 = V::splat(0xFF);
651let mut prev2 = V::splat(0xFF);
652while cur <= end.sub(V::Half::BYTES) {
653if let Some(m) =
654self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2)
655 {
656return Some(m);
657 }
658 cur = cur.add(V::Half::BYTES);
659 }
660if cur < end {
661cur = end.sub(V::Half::BYTES);
662prev0 = V::splat(0xFF);
663prev1 = V::splat(0xFF);
664prev2 = V::splat(0xFF);
665if let Some(m) =
666self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2)
667 {
668return Some(m);
669 }
670 }
671None672 }
673674/// See `Fat<V, 1>::find_one`.
675#[inline(always)]
676unsafe fn find_one(
677&self,
678 cur: *const u8,
679 end: *const u8,
680 prev0: &mut V,
681 prev1: &mut V,
682 prev2: &mut V,
683 ) -> Option<Match> {
684let c = self.candidate(cur, prev0, prev1, prev2);
685if !c.is_zero() {
686if let Some(m) = self.teddy.verify(cur.sub(3), end, c) {
687return Some(m);
688 }
689 }
690None691 }
692693/// See `Fat<V, 1>::candidate`.
694#[inline(always)]
695unsafe fn candidate(
696&self,
697 cur: *const u8,
698 prev0: &mut V,
699 prev1: &mut V,
700 prev2: &mut V,
701 ) -> V {
702let chunk = V::load_half_unaligned(cur);
703let (res0, res1, res2, res3) = Mask::members4(chunk, self.masks);
704let res0prev0 = res0.half_shift_in_three_bytes(*prev0);
705let res1prev1 = res1.half_shift_in_two_bytes(*prev1);
706let res2prev2 = res2.half_shift_in_one_byte(*prev2);
707let res = res0prev0.and(res1prev1).and(res2prev2).and(res3);
708*prev0 = res0;
709*prev1 = res1;
710*prev2 = res2;
711res712 }
713}
714715/// The common elements of all "slim" and "fat" Teddy search implementations.
716///
717/// Essentially, this contains the patterns and the buckets. Namely, it
718/// contains enough to implement the verification step after candidates are
719/// identified via the shuffle masks.
720///
721/// It is generic over the number of buckets used. In general, the number of
722/// buckets is either 8 (for "slim" Teddy) or 16 (for "fat" Teddy). The generic
723/// parameter isn't really meant to be instantiated for any value other than
724/// 8 or 16, although it is technically possible. The main hiccup is that there
725/// is some bit-shifting done in the critical part of verification that could
726/// be quite expensive if `N` is not a multiple of 2.
727#[derive(#[automatically_derived]
impl<const BUCKETS : usize> ::core::clone::Clone for Teddy<BUCKETS> {
#[inline]
fn clone(&self) -> Teddy<BUCKETS> {
Teddy {
patterns: ::core::clone::Clone::clone(&self.patterns),
buckets: ::core::clone::Clone::clone(&self.buckets),
}
}
}Clone, #[automatically_derived]
impl<const BUCKETS : usize> ::core::fmt::Debug for Teddy<BUCKETS> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "Teddy",
"patterns", &self.patterns, "buckets", &&self.buckets)
}
}Debug)]
728struct Teddy<const BUCKETS: usize> {
729/// The patterns we are searching for.
730 ///
731 /// A pattern string can be found by its `PatternID`.
732patterns: Arc<Patterns>,
733/// The allocation of patterns in buckets. This only contains the IDs of
734 /// patterns. In order to do full verification, callers must provide the
735 /// actual patterns when using Teddy.
736buckets: [Vec<PatternID>; BUCKETS],
737// N.B. The above representation is very simple, but it definitely results
738 // in ping-ponging between different allocations during verification. I've
739 // tried experimenting with other representations that flatten the pattern
740 // strings into a single allocation, but it doesn't seem to help much.
741 // Probably everything is small enough to fit into cache anyway, and so the
742 // pointer chasing isn't a big deal?
743 //
744 // One other avenue I haven't explored is some kind of hashing trick
745 // that let's us do another high-confidence check before launching into
746 // `memcmp`.
747}
748749impl<const BUCKETS: usize> Teddy<BUCKETS> {
750/// Create a new generic data structure for Teddy verification.
751fn new(patterns: Arc<Patterns>) -> Teddy<BUCKETS> {
752match (&(0), &(patterns.len())) {
(left_val, right_val) => {
if *left_val == *right_val {
let kind = ::core::panicking::AssertKind::Ne;
::core::panicking::assert_failed(kind, &*left_val, &*right_val,
::core::option::Option::Some(format_args!("Teddy requires at least one pattern")));
}
}
};assert_ne!(0, patterns.len(), "Teddy requires at least one pattern");
753match (&(0), &(patterns.minimum_len())) {
(left_val, right_val) => {
if *left_val == *right_val {
let kind = ::core::panicking::AssertKind::Ne;
::core::panicking::assert_failed(kind, &*left_val, &*right_val,
::core::option::Option::Some(format_args!("Teddy does not support zero-length patterns")));
}
}
};assert_ne!(
7540,
755 patterns.minimum_len(),
756"Teddy does not support zero-length patterns"
757);
758if !(BUCKETS == 8 || BUCKETS == 16) {
{
::core::panicking::panic_fmt(format_args!("Teddy only supports 8 or 16 buckets"));
}
};assert!(
759 BUCKETS == 8 || BUCKETS == 16,
760"Teddy only supports 8 or 16 buckets"
761);
762// MSRV(1.63): Use core::array::from_fn below instead of allocating a
763 // superfluous outer Vec. Not a big deal (especially given the BTreeMap
764 // allocation below), but nice to not do it.
765let buckets =
766 <[Vec<PatternID>; BUCKETS]>::try_from(::alloc::vec::from_elem(::alloc::vec::Vec::new(), BUCKETS)vec![vec![]; BUCKETS])
767 .unwrap();
768let mut t = Teddy { patterns, buckets };
769770let mut map: BTreeMap<Box<[u8]>, usize> = BTreeMap::new();
771for (id, pattern) in t.patterns.iter() {
772// We try to be slightly clever in how we assign patterns into
773 // buckets. Generally speaking, we want patterns with the same
774 // prefix to be in the same bucket, since it minimizes the amount
775 // of time we spend churning through buckets in the verification
776 // step.
777 //
778 // So we could assign patterns with the same N-prefix (where N is
779 // the size of the mask, which is one of {1, 2, 3}) to the same
780 // bucket. However, case insensitive searches are fairly common, so
781 // we'd for example, ideally want to treat `abc` and `ABC` as if
782 // they shared the same prefix. ASCII has the nice property that
783 // the lower 4 bits of A and a are the same, so we therefore group
784 // patterns with the same low-nybble-N-prefix into the same bucket.
785 //
786 // MOREOVER, this is actually necessary for correctness! In
787 // particular, by grouping patterns with the same prefix into the
788 // same bucket, we ensure that we preserve correct leftmost-first
789 // and leftmost-longest match semantics. In addition to the fact
790 // that `patterns.iter()` iterates in the correct order, this
791 // guarantees that all possible ambiguous matches will occur in
792 // the same bucket. The verification routine could be adjusted to
793 // support correct leftmost match semantics regardless of bucket
794 // allocation, but that results in a performance hit. It's much
795 // nicer to be able to just stop as soon as a match is found.
796let lonybs = pattern.low_nybbles(t.mask_len());
797if let Some(&bucket) = map.get(&lonybs) {
798 t.buckets[bucket].push(id);
799 } else {
800// N.B. We assign buckets in reverse because it shouldn't have
801 // any influence on performance, but it does make it harder to
802 // get leftmost match semantics accidentally correct.
803let bucket = (BUCKETS - 1) - (id.as_usize() % BUCKETS);
804 t.buckets[bucket].push(id);
805 map.insert(lonybs, bucket);
806 }
807 }
808t809 }
810811/// Verify whether there are any matches starting at or after `cur` in the
812 /// haystack. The candidate chunk given should correspond to 8-bit bitsets
813 /// for N buckets.
814 ///
815 /// # Safety
816 ///
817 /// The given pointers representing the haystack must be valid to read
818 /// from.
819#[inline(always)]
820unsafe fn verify64(
821&self,
822 cur: *const u8,
823 end: *const u8,
824mut candidate_chunk: u64,
825 ) -> Option<Match> {
826while candidate_chunk != 0 {
827let bit = candidate_chunk.trailing_zeros().as_usize();
828 candidate_chunk &= !(1 << bit);
829830let cur = cur.add(bit / BUCKETS);
831let bucket = bit % BUCKETS;
832if let Some(m) = self.verify_bucket(cur, end, bucket) {
833return Some(m);
834 }
835 }
836None837 }
838839/// Verify whether there are any matches starting at `at` in the given
840 /// `haystack` corresponding only to patterns in the given bucket.
841 ///
842 /// # Safety
843 ///
844 /// The given pointers representing the haystack must be valid to read
845 /// from.
846 ///
847 /// The bucket index must be less than or equal to `self.buckets.len()`.
848#[inline(always)]
849unsafe fn verify_bucket(
850&self,
851 cur: *const u8,
852 end: *const u8,
853 bucket: usize,
854 ) -> Option<Match> {
855if true {
if !(bucket < self.buckets.len()) {
::core::panicking::panic("assertion failed: bucket < self.buckets.len()")
};
};debug_assert!(bucket < self.buckets.len());
856// SAFETY: The caller must ensure that the bucket index is correct.
857for pid in self.buckets.get_unchecked(bucket).iter().copied() {
858// SAFETY: This is safe because we are guaranteed that every
859 // index in a Teddy bucket is a valid index into `pats`, by
860 // construction.
861if true {
if !(pid.as_usize() < self.patterns.len()) {
::core::panicking::panic("assertion failed: pid.as_usize() < self.patterns.len()")
};
};debug_assert!(pid.as_usize() < self.patterns.len());
862let pat = self.patterns.get_unchecked(pid);
863if pat.is_prefix_raw(cur, end) {
864let start = cur;
865let end = start.add(pat.len());
866return Some(Match { pid, start, end });
867 }
868 }
869None870 }
871872/// Returns the total number of masks required by the patterns in this
873 /// Teddy searcher.
874 ///
875 /// Basically, the mask length corresponds to the type of Teddy searcher
876 /// to use: a 1-byte, 2-byte, 3-byte or 4-byte searcher. The bigger the
877 /// better, typically, since searching for longer substrings usually
878 /// decreases the rate of false positives. Therefore, the number of masks
879 /// needed is the length of the shortest pattern in this searcher. If the
880 /// length of the shortest pattern (in bytes) is bigger than 4, then the
881 /// mask length is 4 since there are no Teddy searchers for more than 4
882 /// bytes.
883fn mask_len(&self) -> usize {
884 core::cmp::min(4, self.patterns.minimum_len())
885 }
886887/// Returns the approximate total amount of heap used by this type, in
888 /// units of bytes.
889fn memory_usage(&self) -> usize {
890// This is an upper bound rather than a precise accounting. No
891 // particular reason, other than it's probably very close to actual
892 // memory usage in practice.
893self.patterns.len() * core::mem::size_of::<PatternID>()
894 }
895}
896897impl Teddy<8> {
898/// Runs the verification routine for "slim" Teddy.
899 ///
900 /// The candidate given should be a collection of 8-bit bitsets (one bitset
901 /// per lane), where the ith bit is set in the jth lane if and only if the
902 /// byte occurring at `at + j` in `cur` is in the bucket `i`.
903 ///
904 /// # Safety
905 ///
906 /// Callers must ensure that this is okay to call in the current target for
907 /// the current CPU.
908 ///
909 /// The given pointers must be valid to read from.
910#[inline(always)]
911unsafe fn verify<V: Vector>(
912&self,
913mut cur: *const u8,
914 end: *const u8,
915 candidate: V,
916 ) -> Option<Match> {
917if true {
if !!candidate.is_zero() {
::core::panicking::panic("assertion failed: !candidate.is_zero()")
};
};debug_assert!(!candidate.is_zero());
918// Convert the candidate into 64-bit chunks, and then verify each of
919 // those chunks.
920candidate.for_each_64bit_lane(
921#[inline(always)]
922|_, chunk| {
923let result = self.verify64(cur, end, chunk);
924cur = cur.add(8);
925result926 },
927 )
928 }
929}
930931impl Teddy<16> {
932/// Runs the verification routine for "fat" Teddy.
933 ///
934 /// The candidate given should be a collection of 8-bit bitsets (one bitset
935 /// per lane), where the ith bit is set in the jth lane if and only if the
936 /// byte occurring at `at + (j < 16 ? j : j - 16)` in `cur` is in the
937 /// bucket `j < 16 ? i : i + 8`.
938 ///
939 /// # Safety
940 ///
941 /// Callers must ensure that this is okay to call in the current target for
942 /// the current CPU.
943 ///
944 /// The given pointers must be valid to read from.
945#[inline(always)]
946unsafe fn verify<V: FatVector>(
947&self,
948mut cur: *const u8,
949 end: *const u8,
950 candidate: V,
951 ) -> Option<Match> {
952// This is a bit tricky, but we basically want to convert our
953 // candidate, which looks like this (assuming a 256-bit vector):
954 //
955 // a31 a30 ... a17 a16 a15 a14 ... a01 a00
956 //
957 // where each a(i) is an 8-bit bitset corresponding to the activated
958 // buckets, to this
959 //
960 // a31 a15 a30 a14 a29 a13 ... a18 a02 a17 a01 a16 a00
961 //
962 // Namely, for Fat Teddy, the high 128-bits of the candidate correspond
963 // to the same bytes in the haystack in the low 128-bits (so we only
964 // scan 16 bytes at a time), but are for buckets 8-15 instead of 0-7.
965 //
966 // The verification routine wants to look at all potentially matching
967 // buckets before moving on to the next lane. So for example, both
968 // a16 and a00 both correspond to the first byte in our window; a00
969 // contains buckets 0-7 and a16 contains buckets 8-15. Specifically,
970 // a16 should be checked before a01. So the transformation shown above
971 // allows us to use our normal verification procedure with one small
972 // change: we treat each bitset as 16 bits instead of 8 bits.
973if true {
if !!candidate.is_zero() {
::core::panicking::panic("assertion failed: !candidate.is_zero()")
};
};debug_assert!(!candidate.is_zero());
974975// Swap the 128-bit lanes in the candidate vector.
976let swapped = candidate.swap_halves();
977// Interleave the bytes from the low 128-bit lanes, starting with
978 // cand first.
979let r1 = candidate.interleave_low_8bit_lanes(swapped);
980// Interleave the bytes from the high 128-bit lanes, starting with
981 // cand first.
982let r2 = candidate.interleave_high_8bit_lanes(swapped);
983// Now just take the 2 low 64-bit integers from both r1 and r2. We
984 // can drop the high 64-bit integers because they are a mirror image
985 // of the low 64-bit integers. All we care about are the low 128-bit
986 // lanes of r1 and r2. Combined, they contain all our 16-bit bitsets
987 // laid out in the desired order, as described above.
988r1.for_each_low_64bit_lane(
989r2,
990#[inline(always)]
991|_, chunk| {
992let result = self.verify64(cur, end, chunk);
993cur = cur.add(4);
994result995 },
996 )
997 }
998}
9991000/// A vector generic mask for the low and high nybbles in a set of patterns.
1001/// Each 8-bit lane `j` in a vector corresponds to a bitset where the `i`th bit
1002/// is set if and only if the nybble `j` is in the bucket `i` at a particular
1003/// position.
1004///
1005/// This is slightly tweaked dependending on whether Slim or Fat Teddy is being
1006/// used. For Slim Teddy, the bitsets in the lower half are the same as the
1007/// bitsets in the higher half, so that we can search `V::BYTES` bytes at a
1008/// time. (Remember, the nybbles in the haystack are used as indices into these
1009/// masks, and 256-bit shuffles only operate on 128-bit lanes.)
1010///
1011/// For Fat Teddy, the bitsets are not repeated, but instead, the high half
1012/// bits correspond to an addition 8 buckets. So that a bitset `00100010` has
1013/// buckets 1 and 5 set if it's in the lower half, but has buckets 9 and 13 set
1014/// if it's in the higher half.
1015#[derive(#[automatically_derived]
impl<V: ::core::clone::Clone> ::core::clone::Clone for Mask<V> {
#[inline]
fn clone(&self) -> Mask<V> {
Mask {
lo: ::core::clone::Clone::clone(&self.lo),
hi: ::core::clone::Clone::clone(&self.hi),
}
}
}Clone, #[automatically_derived]
impl<V: ::core::marker::Copy> ::core::marker::Copy for Mask<V> { }Copy, #[automatically_derived]
impl<V: ::core::fmt::Debug> ::core::fmt::Debug for Mask<V> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "Mask", "lo",
&self.lo, "hi", &&self.hi)
}
}Debug)]
1016struct Mask<V> {
1017 lo: V,
1018 hi: V,
1019}
10201021impl<V: Vector> Mask<V> {
1022/// Return a candidate for Teddy (fat or slim) that is searching for 1-byte
1023 /// candidates.
1024 ///
1025 /// If a candidate is returned, it will be a collection of 8-bit bitsets
1026 /// (one bitset per lane), where the ith bit is set in the jth lane if and
1027 /// only if the byte occurring at the jth lane in `chunk` is in the bucket
1028 /// `i`. If no candidate is found, then the vector returned will have all
1029 /// lanes set to zero.
1030 ///
1031 /// `chunk` should correspond to a `V::BYTES` window of the haystack (where
1032 /// the least significant byte corresponds to the start of the window). For
1033 /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with
1034 /// the window repeated in each half of the vector.
1035 ///
1036 /// `mask1` should correspond to a low/high mask for the first byte of all
1037 /// patterns that are being searched.
1038#[inline(always)]
1039unsafe fn members1(chunk: V, masks: [Mask<V>; 1]) -> V {
1040let lomask = V::splat(0xF);
1041let hlo = chunk.and(lomask);
1042let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask);
1043let locand = masks[0].lo.shuffle_bytes(hlo);
1044let hicand = masks[0].hi.shuffle_bytes(hhi);
1045locand.and(hicand)
1046 }
10471048/// Return a candidate for Teddy (fat or slim) that is searching for 2-byte
1049 /// candidates.
1050 ///
1051 /// If candidates are returned, each will be a collection of 8-bit bitsets
1052 /// (one bitset per lane), where the ith bit is set in the jth lane if and
1053 /// only if the byte occurring at the jth lane in `chunk` is in the bucket
1054 /// `i`. Each candidate returned corresponds to the first and second bytes
1055 /// of the patterns being searched. If no candidate is found, then all of
1056 /// the lanes will be set to zero in at least one of the vectors returned.
1057 ///
1058 /// `chunk` should correspond to a `V::BYTES` window of the haystack (where
1059 /// the least significant byte corresponds to the start of the window). For
1060 /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with
1061 /// the window repeated in each half of the vector.
1062 ///
1063 /// The masks should correspond to the masks computed for the first and
1064 /// second bytes of all patterns that are being searched.
1065#[inline(always)]
1066unsafe fn members2(chunk: V, masks: [Mask<V>; 2]) -> (V, V) {
1067let lomask = V::splat(0xF);
1068let hlo = chunk.and(lomask);
1069let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask);
10701071let locand1 = masks[0].lo.shuffle_bytes(hlo);
1072let hicand1 = masks[0].hi.shuffle_bytes(hhi);
1073let cand1 = locand1.and(hicand1);
10741075let locand2 = masks[1].lo.shuffle_bytes(hlo);
1076let hicand2 = masks[1].hi.shuffle_bytes(hhi);
1077let cand2 = locand2.and(hicand2);
10781079 (cand1, cand2)
1080 }
10811082/// Return a candidate for Teddy (fat or slim) that is searching for 3-byte
1083 /// candidates.
1084 ///
1085 /// If candidates are returned, each will be a collection of 8-bit bitsets
1086 /// (one bitset per lane), where the ith bit is set in the jth lane if and
1087 /// only if the byte occurring at the jth lane in `chunk` is in the bucket
1088 /// `i`. Each candidate returned corresponds to the first, second and third
1089 /// bytes of the patterns being searched. If no candidate is found, then
1090 /// all of the lanes will be set to zero in at least one of the vectors
1091 /// returned.
1092 ///
1093 /// `chunk` should correspond to a `V::BYTES` window of the haystack (where
1094 /// the least significant byte corresponds to the start of the window). For
1095 /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with
1096 /// the window repeated in each half of the vector.
1097 ///
1098 /// The masks should correspond to the masks computed for the first, second
1099 /// and third bytes of all patterns that are being searched.
1100#[inline(always)]
1101unsafe fn members3(chunk: V, masks: [Mask<V>; 3]) -> (V, V, V) {
1102let lomask = V::splat(0xF);
1103let hlo = chunk.and(lomask);
1104let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask);
11051106let locand1 = masks[0].lo.shuffle_bytes(hlo);
1107let hicand1 = masks[0].hi.shuffle_bytes(hhi);
1108let cand1 = locand1.and(hicand1);
11091110let locand2 = masks[1].lo.shuffle_bytes(hlo);
1111let hicand2 = masks[1].hi.shuffle_bytes(hhi);
1112let cand2 = locand2.and(hicand2);
11131114let locand3 = masks[2].lo.shuffle_bytes(hlo);
1115let hicand3 = masks[2].hi.shuffle_bytes(hhi);
1116let cand3 = locand3.and(hicand3);
11171118 (cand1, cand2, cand3)
1119 }
11201121/// Return a candidate for Teddy (fat or slim) that is searching for 4-byte
1122 /// candidates.
1123 ///
1124 /// If candidates are returned, each will be a collection of 8-bit bitsets
1125 /// (one bitset per lane), where the ith bit is set in the jth lane if and
1126 /// only if the byte occurring at the jth lane in `chunk` is in the bucket
1127 /// `i`. Each candidate returned corresponds to the first, second, third
1128 /// and fourth bytes of the patterns being searched. If no candidate is
1129 /// found, then all of the lanes will be set to zero in at least one of the
1130 /// vectors returned.
1131 ///
1132 /// `chunk` should correspond to a `V::BYTES` window of the haystack (where
1133 /// the least significant byte corresponds to the start of the window). For
1134 /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with
1135 /// the window repeated in each half of the vector.
1136 ///
1137 /// The masks should correspond to the masks computed for the first,
1138 /// second, third and fourth bytes of all patterns that are being searched.
1139#[inline(always)]
1140unsafe fn members4(chunk: V, masks: [Mask<V>; 4]) -> (V, V, V, V) {
1141let lomask = V::splat(0xF);
1142let hlo = chunk.and(lomask);
1143let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask);
11441145let locand1 = masks[0].lo.shuffle_bytes(hlo);
1146let hicand1 = masks[0].hi.shuffle_bytes(hhi);
1147let cand1 = locand1.and(hicand1);
11481149let locand2 = masks[1].lo.shuffle_bytes(hlo);
1150let hicand2 = masks[1].hi.shuffle_bytes(hhi);
1151let cand2 = locand2.and(hicand2);
11521153let locand3 = masks[2].lo.shuffle_bytes(hlo);
1154let hicand3 = masks[2].hi.shuffle_bytes(hhi);
1155let cand3 = locand3.and(hicand3);
11561157let locand4 = masks[3].lo.shuffle_bytes(hlo);
1158let hicand4 = masks[3].hi.shuffle_bytes(hhi);
1159let cand4 = locand4.and(hicand4);
11601161 (cand1, cand2, cand3, cand4)
1162 }
1163}
11641165/// Represents the low and high nybble masks that will be used during
1166/// search. Each mask is 32 bytes wide, although only the first 16 bytes are
1167/// used for 128-bit vectors.
1168///
1169/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set
1170/// if and only if the corresponding nybble is in the ith bucket. The index of
1171/// the byte (0-15, inclusive) corresponds to the nybble.
1172///
1173/// Each mask is used as the target of a shuffle, where the indices for the
1174/// shuffle are taken from the haystack. AND'ing the shuffles for both the
1175/// low and high masks together also results in 8-bit bitsets, but where bit
1176/// `i` is set if and only if the correspond *byte* is in the ith bucket.
1177#[derive(#[automatically_derived]
impl ::core::clone::Clone for SlimMaskBuilder {
#[inline]
fn clone(&self) -> SlimMaskBuilder {
SlimMaskBuilder {
lo: ::core::clone::Clone::clone(&self.lo),
hi: ::core::clone::Clone::clone(&self.hi),
}
}
}Clone, #[automatically_derived]
impl ::core::default::Default for SlimMaskBuilder {
#[inline]
fn default() -> SlimMaskBuilder {
SlimMaskBuilder {
lo: ::core::default::Default::default(),
hi: ::core::default::Default::default(),
}
}
}Default)]
1178struct SlimMaskBuilder {
1179 lo: [u8; 32],
1180 hi: [u8; 32],
1181}
11821183impl SlimMaskBuilder {
1184/// Update this mask by adding the given byte to the given bucket. The
1185 /// given bucket must be in the range 0-7.
1186 ///
1187 /// # Panics
1188 ///
1189 /// When `bucket >= 8`.
1190fn add(&mut self, bucket: usize, byte: u8) {
1191if !(bucket < 8) { ::core::panicking::panic("assertion failed: bucket < 8") };assert!(bucket < 8);
11921193let bucket = u8::try_from(bucket).unwrap();
1194let byte_lo = usize::from(byte & 0xF);
1195let byte_hi = usize::from((byte >> 4) & 0xF);
1196// When using 256-bit vectors, we need to set this bucket assignment in
1197 // the low and high 128-bit portions of the mask. This allows us to
1198 // process 32 bytes at a time. Namely, AVX2 shuffles operate on each
1199 // of the 128-bit lanes, rather than the full 256-bit vector at once.
1200self.lo[byte_lo] |= 1 << bucket;
1201self.lo[byte_lo + 16] |= 1 << bucket;
1202self.hi[byte_hi] |= 1 << bucket;
1203self.hi[byte_hi + 16] |= 1 << bucket;
1204 }
12051206/// Turn this builder into a vector mask.
1207 ///
1208 /// # Panics
1209 ///
1210 /// When `V` represents a vector bigger than what `MaskBytes` can contain.
1211 ///
1212 /// # Safety
1213 ///
1214 /// Callers must ensure that this is okay to call in the current target for
1215 /// the current CPU.
1216#[inline(always)]
1217unsafe fn build<V: Vector>(&self) -> Mask<V> {
1218if !(V::BYTES <= self.lo.len()) {
::core::panicking::panic("assertion failed: V::BYTES <= self.lo.len()")
};assert!(V::BYTES <= self.lo.len());
1219if !(V::BYTES <= self.hi.len()) {
::core::panicking::panic("assertion failed: V::BYTES <= self.hi.len()")
};assert!(V::BYTES <= self.hi.len());
1220Mask {
1221 lo: V::load_unaligned(self.lo[..].as_ptr()),
1222 hi: V::load_unaligned(self.hi[..].as_ptr()),
1223 }
1224 }
12251226/// A convenience function for building `N` vector masks from a slim
1227 /// `Teddy` value.
1228 ///
1229 /// # Panics
1230 ///
1231 /// When `V` represents a vector bigger than what `MaskBytes` can contain.
1232 ///
1233 /// # Safety
1234 ///
1235 /// Callers must ensure that this is okay to call in the current target for
1236 /// the current CPU.
1237#[inline(always)]
1238unsafe fn from_teddy<const BYTES: usize, V: Vector>(
1239 teddy: &Teddy<8>,
1240 ) -> [Mask<V>; BYTES] {
1241// MSRV(1.63): Use core::array::from_fn to just build the array here
1242 // instead of creating a vector and turning it into an array.
1243let mut mask_builders = ::alloc::vec::from_elem(SlimMaskBuilder::default(), BYTES)vec![SlimMaskBuilder::default(); BYTES];
1244for (bucket_index, bucket) in teddy.buckets.iter().enumerate() {
1245for pid in bucket.iter().copied() {
1246let pat = teddy.patterns.get(pid);
1247for (i, builder) in mask_builders.iter_mut().enumerate() {
1248 builder.add(bucket_index, pat.bytes()[i]);
1249 }
1250 }
1251 }
1252let array =
1253 <[SlimMaskBuilder; BYTES]>::try_from(mask_builders).unwrap();
1254array.map(|builder| builder.build())
1255 }
1256}
12571258impl Debugfor SlimMaskBuilder {
1259fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1260let (mut parts_lo, mut parts_hi) = (::alloc::vec::Vec::new()vec![], ::alloc::vec::Vec::new()vec![]);
1261for i in 0..32 {
1262 parts_lo.push(::alloc::__export::must_use({
::alloc::fmt::format(format_args!("{0:02}: {1:08b}", i, self.lo[i]))
})format!("{:02}: {:08b}", i, self.lo[i]));
1263 parts_hi.push(::alloc::__export::must_use({
::alloc::fmt::format(format_args!("{0:02}: {1:08b}", i, self.hi[i]))
})format!("{:02}: {:08b}", i, self.hi[i]));
1264 }
1265f.debug_struct("SlimMaskBuilder")
1266 .field("lo", &parts_lo)
1267 .field("hi", &parts_hi)
1268 .finish()
1269 }
1270}
12711272/// Represents the low and high nybble masks that will be used during "fat"
1273/// Teddy search.
1274///
1275/// Each mask is 32 bytes wide, and at the time of writing, only 256-bit vectors
1276/// support fat Teddy.
1277///
1278/// A fat Teddy mask is like a slim Teddy mask, except that instead of
1279/// repeating the bitsets in the high and low 128-bits in 256-bit vectors, the
1280/// high and low 128-bit halves each represent distinct buckets. (Bringing the
1281/// total to 16 instead of 8.) This permits spreading the patterns out a bit
1282/// more and thus putting less pressure on verification to be fast.
1283///
1284/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set
1285/// if and only if the corresponding nybble is in the ith bucket. The index of
1286/// the byte (0-15, inclusive) corresponds to the nybble.
1287#[derive(#[automatically_derived]
impl ::core::clone::Clone for FatMaskBuilder {
#[inline]
fn clone(&self) -> FatMaskBuilder {
let _: ::core::clone::AssertParamIsClone<[u8; 32]>;
let _: ::core::clone::AssertParamIsClone<[u8; 32]>;
*self
}
}Clone, #[automatically_derived]
impl ::core::marker::Copy for FatMaskBuilder { }Copy, #[automatically_derived]
impl ::core::default::Default for FatMaskBuilder {
#[inline]
fn default() -> FatMaskBuilder {
FatMaskBuilder {
lo: ::core::default::Default::default(),
hi: ::core::default::Default::default(),
}
}
}Default)]
1288struct FatMaskBuilder {
1289 lo: [u8; 32],
1290 hi: [u8; 32],
1291}
12921293impl FatMaskBuilder {
1294/// Update this mask by adding the given byte to the given bucket. The
1295 /// given bucket must be in the range 0-15.
1296 ///
1297 /// # Panics
1298 ///
1299 /// When `bucket >= 16`.
1300fn add(&mut self, bucket: usize, byte: u8) {
1301if !(bucket < 16) {
::core::panicking::panic("assertion failed: bucket < 16")
};assert!(bucket < 16);
13021303let bucket = u8::try_from(bucket).unwrap();
1304let byte_lo = usize::from(byte & 0xF);
1305let byte_hi = usize::from((byte >> 4) & 0xF);
1306// Unlike slim teddy, fat teddy only works with AVX2. For fat teddy,
1307 // the high 128 bits of our mask correspond to buckets 8-15, while the
1308 // low 128 bits correspond to buckets 0-7.
1309if bucket < 8 {
1310self.lo[byte_lo] |= 1 << bucket;
1311self.hi[byte_hi] |= 1 << bucket;
1312 } else {
1313self.lo[byte_lo + 16] |= 1 << (bucket % 8);
1314self.hi[byte_hi + 16] |= 1 << (bucket % 8);
1315 }
1316 }
13171318/// Turn this builder into a vector mask.
1319 ///
1320 /// # Panics
1321 ///
1322 /// When `V` represents a vector bigger than what `MaskBytes` can contain.
1323 ///
1324 /// # Safety
1325 ///
1326 /// Callers must ensure that this is okay to call in the current target for
1327 /// the current CPU.
1328#[inline(always)]
1329unsafe fn build<V: Vector>(&self) -> Mask<V> {
1330if !(V::BYTES <= self.lo.len()) {
::core::panicking::panic("assertion failed: V::BYTES <= self.lo.len()")
};assert!(V::BYTES <= self.lo.len());
1331if !(V::BYTES <= self.hi.len()) {
::core::panicking::panic("assertion failed: V::BYTES <= self.hi.len()")
};assert!(V::BYTES <= self.hi.len());
1332Mask {
1333 lo: V::load_unaligned(self.lo[..].as_ptr()),
1334 hi: V::load_unaligned(self.hi[..].as_ptr()),
1335 }
1336 }
13371338/// A convenience function for building `N` vector masks from a fat
1339 /// `Teddy` value.
1340 ///
1341 /// # Panics
1342 ///
1343 /// When `V` represents a vector bigger than what `MaskBytes` can contain.
1344 ///
1345 /// # Safety
1346 ///
1347 /// Callers must ensure that this is okay to call in the current target for
1348 /// the current CPU.
1349#[inline(always)]
1350unsafe fn from_teddy<const BYTES: usize, V: Vector>(
1351 teddy: &Teddy<16>,
1352 ) -> [Mask<V>; BYTES] {
1353// MSRV(1.63): Use core::array::from_fn to just build the array here
1354 // instead of creating a vector and turning it into an array.
1355let mut mask_builders = ::alloc::vec::from_elem(FatMaskBuilder::default(), BYTES)vec![FatMaskBuilder::default(); BYTES];
1356for (bucket_index, bucket) in teddy.buckets.iter().enumerate() {
1357for pid in bucket.iter().copied() {
1358let pat = teddy.patterns.get(pid);
1359for (i, builder) in mask_builders.iter_mut().enumerate() {
1360 builder.add(bucket_index, pat.bytes()[i]);
1361 }
1362 }
1363 }
1364let array =
1365 <[FatMaskBuilder; BYTES]>::try_from(mask_builders).unwrap();
1366array.map(|builder| builder.build())
1367 }
1368}
13691370impl Debugfor FatMaskBuilder {
1371fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1372let (mut parts_lo, mut parts_hi) = (::alloc::vec::Vec::new()vec![], ::alloc::vec::Vec::new()vec![]);
1373for i in 0..32 {
1374 parts_lo.push(::alloc::__export::must_use({
::alloc::fmt::format(format_args!("{0:02}: {1:08b}", i, self.lo[i]))
})format!("{:02}: {:08b}", i, self.lo[i]));
1375 parts_hi.push(::alloc::__export::must_use({
::alloc::fmt::format(format_args!("{0:02}: {1:08b}", i, self.hi[i]))
})format!("{:02}: {:08b}", i, self.hi[i]));
1376 }
1377f.debug_struct("FatMaskBuilder")
1378 .field("lo", &parts_lo)
1379 .field("hi", &parts_hi)
1380 .finish()
1381 }
1382}