1use core::{
2fmt::Debug,
3 panic::{RefUnwindSafe, UnwindSafe},
4};
56use alloc::sync::Arc;
78use crate::packed::{ext::Pointer, pattern::Patterns, teddy::generic::Match};
910/// A builder for constructing a Teddy matcher.
11///
12/// The builder primarily permits fine grained configuration of the Teddy
13/// matcher. Most options are made only available for testing/benchmarking
14/// purposes. In reality, options are automatically determined by the nature
15/// and number of patterns given to the builder.
16#[derive(#[automatically_derived]
impl ::core::clone::Clone for Builder {
#[inline]
fn clone(&self) -> Builder {
Builder {
only_fat: ::core::clone::Clone::clone(&self.only_fat),
only_256bit: ::core::clone::Clone::clone(&self.only_256bit),
heuristic_pattern_limits: ::core::clone::Clone::clone(&self.heuristic_pattern_limits),
}
}
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for Builder {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field3_finish(f, "Builder",
"only_fat", &self.only_fat, "only_256bit", &self.only_256bit,
"heuristic_pattern_limits", &&self.heuristic_pattern_limits)
}
}Debug)]
17pub(crate) struct Builder {
18/// When none, this is automatically determined. Otherwise, `false` means
19 /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used
20 /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't
21 /// available and Fat Teddy was requested, no matcher will be built.
22only_fat: Option<bool>,
23/// When none, this is automatically determined. Otherwise, `false` means
24 /// that 128-bit vectors will be used (up to SSSE3 instructions) where as
25 /// `true` means that 256-bit vectors will be used. As with `fat`, if
26 /// 256-bit vectors are requested and they aren't available, then a
27 /// searcher will not be built.
28only_256bit: Option<bool>,
29/// When true (the default), the number of patterns will be used as a
30 /// heuristic for refusing construction of a Teddy searcher. The point here
31 /// is that too many patterns can overwhelm Teddy. But this can be disabled
32 /// in cases where the caller knows better.
33heuristic_pattern_limits: bool,
34}
3536impl Defaultfor Builder {
37fn default() -> Builder {
38Builder::new()
39 }
40}
4142impl Builder {
43/// Create a new builder for configuring a Teddy matcher.
44pub(crate) fn new() -> Builder {
45Builder {
46 only_fat: None,
47 only_256bit: None,
48 heuristic_pattern_limits: true,
49 }
50 }
5152/// Build a matcher for the set of patterns given. If a matcher could not
53 /// be built, then `None` is returned.
54 ///
55 /// Generally, a matcher isn't built if the necessary CPU features aren't
56 /// available, an unsupported target or if the searcher is believed to be
57 /// slower than standard techniques (i.e., if there are too many literals).
58pub(crate) fn build(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
59self.build_imp(patterns)
60 }
6162/// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses
63 /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful
64 /// for a larger set of literals.
65 ///
66 /// `None` is the default, which results in an automatic selection based
67 /// on the number of literals and available CPU features.
68pub(crate) fn only_fat(&mut self, yes: Option<bool>) -> &mut Builder {
69self.only_fat = yes;
70self71 }
7273/// Request the use of 256-bit vectors (true) or 128-bit vectors (false).
74 /// Generally, a larger vector size is better since it either permits
75 /// matching more patterns or matching more bytes in the haystack at once.
76 ///
77 /// `None` is the default, which results in an automatic selection based on
78 /// the number of literals and available CPU features.
79pub(crate) fn only_256bit(&mut self, yes: Option<bool>) -> &mut Builder {
80self.only_256bit = yes;
81self82 }
8384/// Request that heuristic limitations on the number of patterns be
85 /// employed. This useful to disable for benchmarking where one wants to
86 /// explore how Teddy performs on large number of patterns even if the
87 /// heuristics would otherwise refuse construction.
88 ///
89 /// This is enabled by default.
90pub(crate) fn heuristic_pattern_limits(
91&mut self,
92 yes: bool,
93 ) -> &mut Builder {
94self.heuristic_pattern_limits = yes;
95self96 }
9798fn build_imp(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
99let patlimit = self.heuristic_pattern_limits;
100// There's no particular reason why we limit ourselves to little endian
101 // here, but it seems likely that some parts of Teddy as they are
102 // currently written (e.g., the uses of `trailing_zeros`) are likely
103 // wrong on non-little-endian targets. Such things are likely easy to
104 // fix, but at the time of writing (2023/09/18), I actually do not know
105 // how to test this code on a big-endian target. So for now, we're
106 // conservative and just bail out.
107if !truecfg!(target_endian = "little") {
108debug!("skipping Teddy because target isn't little endian");
109return None;
110 }
111// Too many patterns will overwhelm Teddy and likely lead to slow
112 // downs, typically in the verification step.
113if patlimit && patterns.len() > 64 {
114debug!("skipping Teddy because of too many patterns");
115return None;
116 }
117118#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
119{
120use self::x86_64::{FatAVX2, SlimAVX2, SlimSSSE3};
121122let mask_len = core::cmp::min(4, patterns.minimum_len());
123let beefy = patterns.len() > 32;
124let has_avx2 = self::x86_64::is_available_avx2();
125let has_ssse3 = has_avx2 || self::x86_64::is_available_ssse3();
126let use_avx2 = if self.only_256bit == Some(true) {
127if !has_avx2 {
128debug!(
129"skipping Teddy because avx2 was demanded but unavailable"
130);
131return None;
132 }
133true
134} else if self.only_256bit == Some(false) {
135if !has_ssse3 {
136debug!(
137"skipping Teddy because ssse3 was demanded but unavailable"
138);
139return None;
140 }
141false
142} else if !has_ssse3 && !has_avx2 {
143debug!(
144"skipping Teddy because ssse3 and avx2 are unavailable"
145);
146return None;
147 } else {
148has_avx2149 };
150let fat = match self.only_fat {
151None => use_avx2 && beefy,
152Some(false) => false,
153Some(true) if !use_avx2 => {
154debug!(
155"skipping Teddy because fat was demanded, but fat \
156 Teddy requires avx2 which is unavailable"
157);
158return None;
159 }
160Some(true) => true,
161 };
162// Just like for aarch64, it's possible that too many patterns will
163 // overhwelm Teddy. Unlike aarch64 though, we have Fat teddy which
164 // helps things scale a bit more by spreading patterns over more
165 // buckets.
166 //
167 // These thresholds were determined by looking at the measurements
168 // for the rust/aho-corasick/packed/leftmost-first and
169 // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
170 // benchmarks.
171if patlimit && mask_len == 1 && patterns.len() > 16 {
172debug!(
173"skipping Teddy (mask len: 1) because there are \
174 too many patterns",
175 );
176return None;
177 }
178match (mask_len, use_avx2, fat) {
179 (1, false, _) => {
180debug!("Teddy choice: 128-bit slim, 1 byte");
181 SlimSSSE3::<1>::new(&patterns)
182 }
183 (1, true, false) => {
184debug!("Teddy choice: 256-bit slim, 1 byte");
185 SlimAVX2::<1>::new(&patterns)
186 }
187 (1, true, true) => {
188debug!("Teddy choice: 256-bit fat, 1 byte");
189 FatAVX2::<1>::new(&patterns)
190 }
191 (2, false, _) => {
192debug!("Teddy choice: 128-bit slim, 2 bytes");
193 SlimSSSE3::<2>::new(&patterns)
194 }
195 (2, true, false) => {
196debug!("Teddy choice: 256-bit slim, 2 bytes");
197 SlimAVX2::<2>::new(&patterns)
198 }
199 (2, true, true) => {
200debug!("Teddy choice: 256-bit fat, 2 bytes");
201 FatAVX2::<2>::new(&patterns)
202 }
203 (3, false, _) => {
204debug!("Teddy choice: 128-bit slim, 3 bytes");
205 SlimSSSE3::<3>::new(&patterns)
206 }
207 (3, true, false) => {
208debug!("Teddy choice: 256-bit slim, 3 bytes");
209 SlimAVX2::<3>::new(&patterns)
210 }
211 (3, true, true) => {
212debug!("Teddy choice: 256-bit fat, 3 bytes");
213 FatAVX2::<3>::new(&patterns)
214 }
215 (4, false, _) => {
216debug!("Teddy choice: 128-bit slim, 4 bytes");
217 SlimSSSE3::<4>::new(&patterns)
218 }
219 (4, true, false) => {
220debug!("Teddy choice: 256-bit slim, 4 bytes");
221 SlimAVX2::<4>::new(&patterns)
222 }
223 (4, true, true) => {
224debug!("Teddy choice: 256-bit fat, 4 bytes");
225 FatAVX2::<4>::new(&patterns)
226 }
227_ => {
228debug!("no supported Teddy configuration found");
229None230 }
231 }
232 }
233#[cfg(all(
234 target_arch = "aarch64",
235 target_feature = "neon",
236 target_endian = "little"
237))]
238{
239use self::aarch64::SlimNeon;
240241let mask_len = core::cmp::min(4, patterns.minimum_len());
242if self.only_256bit == Some(true) {
243debug!(
244"skipping Teddy because 256-bits were demanded \
245 but unavailable"
246);
247return None;
248 }
249if self.only_fat == Some(true) {
250debug!(
251"skipping Teddy because fat was demanded but unavailable"
252);
253 }
254// Since we don't have Fat teddy in aarch64 (I think we'd want at
255 // least 256-bit vectors for that), we need to be careful not to
256 // allow too many patterns as it might overwhelm Teddy. Generally
257 // speaking, as the mask length goes up, the more patterns we can
258 // handle because the mask length results in fewer candidates
259 // generated.
260 //
261 // These thresholds were determined by looking at the measurements
262 // for the rust/aho-corasick/packed/leftmost-first and
263 // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
264 // benchmarks.
265match mask_len {
2661 => {
267if patlimit && patterns.len() > 16 {
268debug!(
269"skipping Teddy (mask len: 1) because there are \
270 too many patterns",
271 );
272 }
273debug!("Teddy choice: 128-bit slim, 1 byte");
274 SlimNeon::<1>::new(&patterns)
275 }
2762 => {
277if patlimit && patterns.len() > 32 {
278debug!(
279"skipping Teddy (mask len: 2) because there are \
280 too many patterns",
281 );
282 }
283debug!("Teddy choice: 128-bit slim, 2 bytes");
284 SlimNeon::<2>::new(&patterns)
285 }
2863 => {
287if patlimit && patterns.len() > 48 {
288debug!(
289"skipping Teddy (mask len: 3) because there are \
290 too many patterns",
291 );
292 }
293debug!("Teddy choice: 128-bit slim, 3 bytes");
294 SlimNeon::<3>::new(&patterns)
295 }
2964 => {
297debug!("Teddy choice: 128-bit slim, 4 bytes");
298 SlimNeon::<4>::new(&patterns)
299 }
300_ => {
301debug!("no supported Teddy configuration found");
302None
303}
304 }
305 }
306#[cfg(not(any(
307 all(target_arch = "x86_64", target_feature = "sse2"),
308 all(
309 target_arch = "aarch64",
310 target_feature = "neon",
311 target_endian = "little"
312)
313 )))]
314{
315None
316}
317 }
318}
319320/// A searcher that dispatches to one of several possible Teddy variants.
321#[derive(#[automatically_derived]
impl ::core::clone::Clone for Searcher {
#[inline]
fn clone(&self) -> Searcher {
Searcher {
imp: ::core::clone::Clone::clone(&self.imp),
memory_usage: ::core::clone::Clone::clone(&self.memory_usage),
minimum_len: ::core::clone::Clone::clone(&self.minimum_len),
}
}
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for Searcher {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field3_finish(f, "Searcher",
"imp", &self.imp, "memory_usage", &self.memory_usage,
"minimum_len", &&self.minimum_len)
}
}Debug)]
322pub(crate) struct Searcher {
323/// The Teddy variant we use. We use dynamic dispatch under the theory that
324 /// it results in better codegen then a enum, although this is a specious
325 /// claim.
326 ///
327 /// This `Searcher` is essentially a wrapper for a `SearcherT` trait
328 /// object. We just make `memory_usage` and `minimum_len` available without
329 /// going through dynamic dispatch.
330imp: Arc<dyn SearcherT>,
331/// Total heap memory used by the Teddy variant.
332memory_usage: usize,
333/// The minimum haystack length this searcher can handle. It is intended
334 /// for callers to use some other search routine (such as Rabin-Karp) in
335 /// cases where the haystack (or remainer of the haystack) is too short.
336minimum_len: usize,
337}
338339impl Searcher {
340/// Look for the leftmost occurrence of any pattern in this search in the
341 /// given haystack starting at the given position.
342 ///
343 /// # Panics
344 ///
345 /// This panics when `haystack[at..].len()` is less than the minimum length
346 /// for this haystack.
347#[inline(always)]
348pub(crate) fn find(
349&self,
350 haystack: &[u8],
351 at: usize,
352 ) -> Option<crate::Match> {
353// SAFETY: The Teddy implementations all require a minimum haystack
354 // length, and this is required for safety. Therefore, we assert it
355 // here in order to make this method sound.
356if !(haystack[at..].len() >= self.minimum_len) {
::core::panicking::panic("assertion failed: haystack[at..].len() >= self.minimum_len")
};assert!(haystack[at..].len() >= self.minimum_len);
357let hayptr = haystack.as_ptr();
358// SAFETY: Construction of the searcher guarantees that we are able
359 // to run it in the current environment (i.e., we won't get an AVX2
360 // searcher on a x86-64 CPU without AVX2 support). Also, the pointers
361 // are valid as they are derived directly from a borrowed slice.
362let teddym = unsafe {
363self.imp.find(hayptr.add(at), hayptr.add(haystack.len()))?
364};
365let start = teddym.start().as_usize().wrapping_sub(hayptr.as_usize());
366let end = teddym.end().as_usize().wrapping_sub(hayptr.as_usize());
367let span = crate::Span { start, end };
368// OK because we won't permit the construction of a searcher that
369 // could report a pattern ID bigger than what can fit in the crate-wide
370 // PatternID type.
371let pid = crate::PatternID::new_unchecked(teddym.pattern().as_usize());
372let m = crate::Match::new(pid, span);
373Some(m)
374 }
375376/// Returns the approximate total amount of heap used by this type, in
377 /// units of bytes.
378#[inline(always)]
379pub(crate) fn memory_usage(&self) -> usize {
380self.memory_usage
381 }
382383/// Returns the minimum length, in bytes, that a haystack must be in order
384 /// to use it with this searcher.
385#[inline(always)]
386pub(crate) fn minimum_len(&self) -> usize {
387self.minimum_len
388 }
389}
390391/// A trait that provides dynamic dispatch over the different possible Teddy
392/// variants on the same algorithm.
393///
394/// On `x86_64` for example, it isn't known until runtime which of 12 possible
395/// variants will be used. One might use one of the four slim 128-bit vector
396/// variants, or one of the four 256-bit vector variants or even one of the
397/// four fat 256-bit vector variants.
398///
399/// Since this choice is generally made when the Teddy searcher is constructed
400/// and this choice is based on the patterns given and what the current CPU
401/// supports, it follows that there must be some kind of indirection at search
402/// time that "selects" the variant chosen at build time.
403///
404/// There are a few different ways to go about this. One approach is to use an
405/// enum. It works fine, but in my experiments, this generally results in worse
406/// codegen. Another approach, which is what we use here, is dynamic dispatch
407/// via a trait object. We basically implement this trait for each possible
408/// variant, select the variant we want at build time and convert it to a
409/// trait object for use at search time.
410///
411/// Another approach is to use function pointers and stick each of the possible
412/// variants into a union. This is essentially isomorphic to the dynamic
413/// dispatch approach, but doesn't require any allocations. Since this crate
414/// requires `alloc`, there's no real reason (AFAIK) to go down this path. (The
415/// `memchr` crate does this.)
416trait SearcherT:
417Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static
418{
419/// Execute a search on the given haystack (identified by `start` and `end`
420 /// raw pointers).
421 ///
422 /// # Safety
423 ///
424 /// Essentially, the `start` and `end` pointers must be valid and point
425 /// to a haystack one can read. As long as you derive them from, for
426 /// example, a `&[u8]`, they should automatically satisfy all of the safety
427 /// obligations:
428 ///
429 /// * Both `start` and `end` must be valid for reads.
430 /// * Both `start` and `end` must point to an initialized value.
431 /// * Both `start` and `end` must point to the same allocated object and
432 /// must either be in bounds or at most one byte past the end of the
433 /// allocated object.
434 /// * Both `start` and `end` must be _derived from_ a pointer to the same
435 /// object.
436 /// * The distance between `start` and `end` must not overflow `isize`.
437 /// * The distance being in bounds must not rely on "wrapping around" the
438 /// address space.
439 /// * It must be the case that `start <= end`.
440 /// * `end - start` must be greater than the minimum length for this
441 /// searcher.
442 ///
443 /// Also, it is expected that implementations of this trait will tag this
444 /// method with a `target_feature` attribute. Callers must ensure that
445 /// they are executing this method in an environment where that attribute
446 /// is valid.
447unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match>;
448}
449450#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
451mod x86_64 {
452use core::arch::x86_64::{__m128i, __m256i};
453454use alloc::sync::Arc;
455456use crate::packed::{
457ext::Pointer,
458pattern::Patterns,
459 teddy::generic::{self, Match},
460 };
461462use super::{Searcher, SearcherT};
463464#[derive(#[automatically_derived]
impl<const BYTES : usize> ::core::clone::Clone for SlimSSSE3<BYTES> {
#[inline]
fn clone(&self) -> SlimSSSE3<BYTES> {
SlimSSSE3 { slim128: ::core::clone::Clone::clone(&self.slim128) }
}
}Clone, #[automatically_derived]
impl<const BYTES : usize> ::core::fmt::Debug for SlimSSSE3<BYTES> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field1_finish(f, "SlimSSSE3",
"slim128", &&self.slim128)
}
}Debug)]
465pub(super) struct SlimSSSE3<const BYTES: usize> {
466 slim128: generic::Slim<__m128i, BYTES>,
467 }
468469// Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
470macro_rules! slim_ssse3 {
471 ($len:expr) => {
472impl SlimSSSE3<$len> {
473/// Creates a new searcher using "slim" Teddy with 128-bit
474 /// vectors. If SSSE3 is not available in the current
475 /// environment, then this returns `None`.
476pub(super) fn new(
477 patterns: &Arc<Patterns>,
478 ) -> Option<Searcher> {
479if !is_available_ssse3() {
480return None;
481 }
482Some(unsafe { SlimSSSE3::<$len>::new_unchecked(patterns) })
483 }
484485/// Creates a new searcher using "slim" Teddy with 256-bit
486 /// vectors without checking whether SSSE3 is available or not.
487 ///
488 /// # Safety
489 ///
490 /// Callers must ensure that SSSE3 is available in the current
491 /// environment.
492#[target_feature(enable = "ssse3")]
493unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
494let slim128 = generic::Slim::<__m128i, $len>::new(
495 Arc::clone(patterns),
496 );
497let memory_usage = slim128.memory_usage();
498let minimum_len = slim128.minimum_len();
499let imp = Arc::new(SlimSSSE3 { slim128 });
500 Searcher { imp, memory_usage, minimum_len }
501 }
502 }
503504impl SearcherT for SlimSSSE3<$len> {
505#[target_feature(enable = "ssse3")]
506 #[inline]
507unsafe fn find(
508&self,
509 start: *const u8,
510 end: *const u8,
511 ) -> Option<Match> {
512// SAFETY: All obligations except for `target_feature` are
513 // passed to the caller. Our use of `target_feature` is
514 // safe because construction of this type requires that the
515 // requisite target features are available.
516self.slim128.find(start, end)
517 }
518 }
519 };
520 }
521522impl SlimSSSE3<1> {
/// Creates a new searcher using "slim" Teddy with 128-bit
/// vectors. If SSSE3 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_ssse3() { return None; }
Some(unsafe { SlimSSSE3::<1>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether SSSE3 is available or not.
///
/// # Safety
///
/// Callers must ensure that SSSE3 is available in the current
/// environment.
#[target_feature(enable = "ssse3")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let slim128 = generic::Slim::<__m128i, 1>::new(Arc::clone(patterns));
let memory_usage = slim128.memory_usage();
let minimum_len = slim128.minimum_len();
let imp = Arc::new(SlimSSSE3 { slim128 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for SlimSSSE3<1> {
#[target_feature(enable = "ssse3")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
self.slim128.find(start, end)
}
}slim_ssse3!(1);
523impl SlimSSSE3<2> {
/// Creates a new searcher using "slim" Teddy with 128-bit
/// vectors. If SSSE3 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_ssse3() { return None; }
Some(unsafe { SlimSSSE3::<2>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether SSSE3 is available or not.
///
/// # Safety
///
/// Callers must ensure that SSSE3 is available in the current
/// environment.
#[target_feature(enable = "ssse3")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let slim128 = generic::Slim::<__m128i, 2>::new(Arc::clone(patterns));
let memory_usage = slim128.memory_usage();
let minimum_len = slim128.minimum_len();
let imp = Arc::new(SlimSSSE3 { slim128 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for SlimSSSE3<2> {
#[target_feature(enable = "ssse3")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
self.slim128.find(start, end)
}
}slim_ssse3!(2);
524impl SlimSSSE3<3> {
/// Creates a new searcher using "slim" Teddy with 128-bit
/// vectors. If SSSE3 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_ssse3() { return None; }
Some(unsafe { SlimSSSE3::<3>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether SSSE3 is available or not.
///
/// # Safety
///
/// Callers must ensure that SSSE3 is available in the current
/// environment.
#[target_feature(enable = "ssse3")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let slim128 = generic::Slim::<__m128i, 3>::new(Arc::clone(patterns));
let memory_usage = slim128.memory_usage();
let minimum_len = slim128.minimum_len();
let imp = Arc::new(SlimSSSE3 { slim128 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for SlimSSSE3<3> {
#[target_feature(enable = "ssse3")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
self.slim128.find(start, end)
}
}slim_ssse3!(3);
525impl SlimSSSE3<4> {
/// Creates a new searcher using "slim" Teddy with 128-bit
/// vectors. If SSSE3 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_ssse3() { return None; }
Some(unsafe { SlimSSSE3::<4>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether SSSE3 is available or not.
///
/// # Safety
///
/// Callers must ensure that SSSE3 is available in the current
/// environment.
#[target_feature(enable = "ssse3")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let slim128 = generic::Slim::<__m128i, 4>::new(Arc::clone(patterns));
let memory_usage = slim128.memory_usage();
let minimum_len = slim128.minimum_len();
let imp = Arc::new(SlimSSSE3 { slim128 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for SlimSSSE3<4> {
#[target_feature(enable = "ssse3")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
self.slim128.find(start, end)
}
}slim_ssse3!(4);
526527#[derive(#[automatically_derived]
impl<const BYTES : usize> ::core::clone::Clone for SlimAVX2<BYTES> {
#[inline]
fn clone(&self) -> SlimAVX2<BYTES> {
SlimAVX2 {
slim128: ::core::clone::Clone::clone(&self.slim128),
slim256: ::core::clone::Clone::clone(&self.slim256),
}
}
}Clone, #[automatically_derived]
impl<const BYTES : usize> ::core::fmt::Debug for SlimAVX2<BYTES> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "SlimAVX2",
"slim128", &self.slim128, "slim256", &&self.slim256)
}
}Debug)]
528pub(super) struct SlimAVX2<const BYTES: usize> {
529 slim128: generic::Slim<__m128i, BYTES>,
530 slim256: generic::Slim<__m256i, BYTES>,
531 }
532533// Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
534macro_rules! slim_avx2 {
535 ($len:expr) => {
536impl SlimAVX2<$len> {
537/// Creates a new searcher using "slim" Teddy with 256-bit
538 /// vectors. If AVX2 is not available in the current
539 /// environment, then this returns `None`.
540pub(super) fn new(
541 patterns: &Arc<Patterns>,
542 ) -> Option<Searcher> {
543if !is_available_avx2() {
544return None;
545 }
546Some(unsafe { SlimAVX2::<$len>::new_unchecked(patterns) })
547 }
548549/// Creates a new searcher using "slim" Teddy with 256-bit
550 /// vectors without checking whether AVX2 is available or not.
551 ///
552 /// # Safety
553 ///
554 /// Callers must ensure that AVX2 is available in the current
555 /// environment.
556#[target_feature(enable = "avx2")]
557unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
558let slim128 = generic::Slim::<__m128i, $len>::new(
559 Arc::clone(&patterns),
560 );
561let slim256 = generic::Slim::<__m256i, $len>::new(
562 Arc::clone(&patterns),
563 );
564let memory_usage =
565 slim128.memory_usage() + slim256.memory_usage();
566let minimum_len = slim128.minimum_len();
567let imp = Arc::new(SlimAVX2 { slim128, slim256 });
568 Searcher { imp, memory_usage, minimum_len }
569 }
570 }
571572impl SearcherT for SlimAVX2<$len> {
573#[target_feature(enable = "avx2")]
574 #[inline]
575unsafe fn find(
576&self,
577 start: *const u8,
578 end: *const u8,
579 ) -> Option<Match> {
580// SAFETY: All obligations except for `target_feature` are
581 // passed to the caller. Our use of `target_feature` is
582 // safe because construction of this type requires that the
583 // requisite target features are available.
584let len = end.distance(start);
585if len < self.slim256.minimum_len() {
586self.slim128.find(start, end)
587 } else {
588self.slim256.find(start, end)
589 }
590 }
591 }
592 };
593 }
594595impl SlimAVX2<1> {
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors. If AVX2 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_avx2() { return None; }
Some(unsafe { SlimAVX2::<1>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether AVX2 is available or not.
///
/// # Safety
///
/// Callers must ensure that AVX2 is available in the current
/// environment.
#[target_feature(enable = "avx2")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let slim128 = generic::Slim::<__m128i, 1>::new(Arc::clone(&patterns));
let slim256 = generic::Slim::<__m256i, 1>::new(Arc::clone(&patterns));
let memory_usage = slim128.memory_usage() + slim256.memory_usage();
let minimum_len = slim128.minimum_len();
let imp = Arc::new(SlimAVX2 { slim128, slim256 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for SlimAVX2<1> {
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
let len = end.distance(start);
if len < self.slim256.minimum_len() {
self.slim128.find(start, end)
} else { self.slim256.find(start, end) }
}
}slim_avx2!(1);
596impl SlimAVX2<2> {
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors. If AVX2 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_avx2() { return None; }
Some(unsafe { SlimAVX2::<2>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether AVX2 is available or not.
///
/// # Safety
///
/// Callers must ensure that AVX2 is available in the current
/// environment.
#[target_feature(enable = "avx2")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let slim128 = generic::Slim::<__m128i, 2>::new(Arc::clone(&patterns));
let slim256 = generic::Slim::<__m256i, 2>::new(Arc::clone(&patterns));
let memory_usage = slim128.memory_usage() + slim256.memory_usage();
let minimum_len = slim128.minimum_len();
let imp = Arc::new(SlimAVX2 { slim128, slim256 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for SlimAVX2<2> {
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
let len = end.distance(start);
if len < self.slim256.minimum_len() {
self.slim128.find(start, end)
} else { self.slim256.find(start, end) }
}
}slim_avx2!(2);
597impl SlimAVX2<3> {
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors. If AVX2 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_avx2() { return None; }
Some(unsafe { SlimAVX2::<3>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether AVX2 is available or not.
///
/// # Safety
///
/// Callers must ensure that AVX2 is available in the current
/// environment.
#[target_feature(enable = "avx2")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let slim128 = generic::Slim::<__m128i, 3>::new(Arc::clone(&patterns));
let slim256 = generic::Slim::<__m256i, 3>::new(Arc::clone(&patterns));
let memory_usage = slim128.memory_usage() + slim256.memory_usage();
let minimum_len = slim128.minimum_len();
let imp = Arc::new(SlimAVX2 { slim128, slim256 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for SlimAVX2<3> {
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
let len = end.distance(start);
if len < self.slim256.minimum_len() {
self.slim128.find(start, end)
} else { self.slim256.find(start, end) }
}
}slim_avx2!(3);
598impl SlimAVX2<4> {
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors. If AVX2 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_avx2() { return None; }
Some(unsafe { SlimAVX2::<4>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether AVX2 is available or not.
///
/// # Safety
///
/// Callers must ensure that AVX2 is available in the current
/// environment.
#[target_feature(enable = "avx2")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let slim128 = generic::Slim::<__m128i, 4>::new(Arc::clone(&patterns));
let slim256 = generic::Slim::<__m256i, 4>::new(Arc::clone(&patterns));
let memory_usage = slim128.memory_usage() + slim256.memory_usage();
let minimum_len = slim128.minimum_len();
let imp = Arc::new(SlimAVX2 { slim128, slim256 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for SlimAVX2<4> {
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
let len = end.distance(start);
if len < self.slim256.minimum_len() {
self.slim128.find(start, end)
} else { self.slim256.find(start, end) }
}
}slim_avx2!(4);
599600#[derive(#[automatically_derived]
impl<const BYTES : usize> ::core::clone::Clone for FatAVX2<BYTES> {
#[inline]
fn clone(&self) -> FatAVX2<BYTES> {
FatAVX2 { fat256: ::core::clone::Clone::clone(&self.fat256) }
}
}Clone, #[automatically_derived]
impl<const BYTES : usize> ::core::fmt::Debug for FatAVX2<BYTES> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field1_finish(f, "FatAVX2",
"fat256", &&self.fat256)
}
}Debug)]
601pub(super) struct FatAVX2<const BYTES: usize> {
602 fat256: generic::Fat<__m256i, BYTES>,
603 }
604605// Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
606macro_rules! fat_avx2 {
607 ($len:expr) => {
608impl FatAVX2<$len> {
609/// Creates a new searcher using "slim" Teddy with 256-bit
610 /// vectors. If AVX2 is not available in the current
611 /// environment, then this returns `None`.
612pub(super) fn new(
613 patterns: &Arc<Patterns>,
614 ) -> Option<Searcher> {
615if !is_available_avx2() {
616return None;
617 }
618Some(unsafe { FatAVX2::<$len>::new_unchecked(patterns) })
619 }
620621/// Creates a new searcher using "slim" Teddy with 256-bit
622 /// vectors without checking whether AVX2 is available or not.
623 ///
624 /// # Safety
625 ///
626 /// Callers must ensure that AVX2 is available in the current
627 /// environment.
628#[target_feature(enable = "avx2")]
629unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
630let fat256 = generic::Fat::<__m256i, $len>::new(
631 Arc::clone(&patterns),
632 );
633let memory_usage = fat256.memory_usage();
634let minimum_len = fat256.minimum_len();
635let imp = Arc::new(FatAVX2 { fat256 });
636 Searcher { imp, memory_usage, minimum_len }
637 }
638 }
639640impl SearcherT for FatAVX2<$len> {
641#[target_feature(enable = "avx2")]
642 #[inline]
643unsafe fn find(
644&self,
645 start: *const u8,
646 end: *const u8,
647 ) -> Option<Match> {
648// SAFETY: All obligations except for `target_feature` are
649 // passed to the caller. Our use of `target_feature` is
650 // safe because construction of this type requires that the
651 // requisite target features are available.
652self.fat256.find(start, end)
653 }
654 }
655 };
656 }
657658impl FatAVX2<1> {
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors. If AVX2 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_avx2() { return None; }
Some(unsafe { FatAVX2::<1>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether AVX2 is available or not.
///
/// # Safety
///
/// Callers must ensure that AVX2 is available in the current
/// environment.
#[target_feature(enable = "avx2")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let fat256 = generic::Fat::<__m256i, 1>::new(Arc::clone(&patterns));
let memory_usage = fat256.memory_usage();
let minimum_len = fat256.minimum_len();
let imp = Arc::new(FatAVX2 { fat256 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for FatAVX2<1> {
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
self.fat256.find(start, end)
}
}fat_avx2!(1);
659impl FatAVX2<2> {
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors. If AVX2 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_avx2() { return None; }
Some(unsafe { FatAVX2::<2>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether AVX2 is available or not.
///
/// # Safety
///
/// Callers must ensure that AVX2 is available in the current
/// environment.
#[target_feature(enable = "avx2")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let fat256 = generic::Fat::<__m256i, 2>::new(Arc::clone(&patterns));
let memory_usage = fat256.memory_usage();
let minimum_len = fat256.minimum_len();
let imp = Arc::new(FatAVX2 { fat256 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for FatAVX2<2> {
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
self.fat256.find(start, end)
}
}fat_avx2!(2);
660impl FatAVX2<3> {
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors. If AVX2 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_avx2() { return None; }
Some(unsafe { FatAVX2::<3>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether AVX2 is available or not.
///
/// # Safety
///
/// Callers must ensure that AVX2 is available in the current
/// environment.
#[target_feature(enable = "avx2")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let fat256 = generic::Fat::<__m256i, 3>::new(Arc::clone(&patterns));
let memory_usage = fat256.memory_usage();
let minimum_len = fat256.minimum_len();
let imp = Arc::new(FatAVX2 { fat256 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for FatAVX2<3> {
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
self.fat256.find(start, end)
}
}fat_avx2!(3);
661impl FatAVX2<4> {
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors. If AVX2 is not available in the current
/// environment, then this returns `None`.
pub(super) fn new(patterns: &Arc<Patterns>) -> Option<Searcher> {
if !is_available_avx2() { return None; }
Some(unsafe { FatAVX2::<4>::new_unchecked(patterns) })
}
/// Creates a new searcher using "slim" Teddy with 256-bit
/// vectors without checking whether AVX2 is available or not.
///
/// # Safety
///
/// Callers must ensure that AVX2 is available in the current
/// environment.
#[target_feature(enable = "avx2")]
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
let fat256 = generic::Fat::<__m256i, 4>::new(Arc::clone(&patterns));
let memory_usage = fat256.memory_usage();
let minimum_len = fat256.minimum_len();
let imp = Arc::new(FatAVX2 { fat256 });
Searcher { imp, memory_usage, minimum_len }
}
}
impl SearcherT for FatAVX2<4> {
#[target_feature(enable = "avx2")]
#[inline]
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match> {
self.fat256.find(start, end)
}
}fat_avx2!(4);
662663#[inline]
664pub(super) fn is_available_ssse3() -> bool {
665#[cfg(not(target_feature = "sse2"))]
666{
667false
668}
669#[cfg(target_feature = "sse2")]
670{
671#[cfg(target_feature = "ssse3")]
672{
673true
674}
675#[cfg(not(target_feature = "ssse3"))]
676{
677#[cfg(feature = "std")]
678{
679false || ::std_detect::detect::__is_feature_detected::ssse3()std::is_x86_feature_detected!("ssse3")680 }
681#[cfg(not(feature = "std"))]
682{
683false
684}
685 }
686 }
687 }
688689#[inline]
690pub(super) fn is_available_avx2() -> bool {
691#[cfg(not(target_feature = "sse2"))]
692{
693false
694}
695#[cfg(target_feature = "sse2")]
696{
697#[cfg(target_feature = "avx2")]
698{
699true
700}
701#[cfg(not(target_feature = "avx2"))]
702{
703#[cfg(feature = "std")]
704{
705false || ::std_detect::detect::__is_feature_detected::avx2()std::is_x86_feature_detected!("avx2")706 }
707#[cfg(not(feature = "std"))]
708{
709false
710}
711 }
712 }
713 }
714}
715716#[cfg(all(
717 target_arch = "aarch64",
718 target_feature = "neon",
719 target_endian = "little"
720))]
721mod aarch64 {
722use core::arch::aarch64::uint8x16_t;
723724use alloc::sync::Arc;
725726use crate::packed::{
727 pattern::Patterns,
728 teddy::generic::{self, Match},
729 };
730731use super::{Searcher, SearcherT};
732733#[derive(Clone, Debug)]
734pub(super) struct SlimNeon<const BYTES: usize> {
735 slim128: generic::Slim<uint8x16_t, BYTES>,
736 }
737738// Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
739macro_rules! slim_neon {
740 ($len:expr) => {
741impl SlimNeon<$len> {
742/// Creates a new searcher using "slim" Teddy with 128-bit
743 /// vectors. If SSSE3 is not available in the current
744 /// environment, then this returns `None`.
745pub(super) fn new(
746 patterns: &Arc<Patterns>,
747 ) -> Option<Searcher> {
748Some(unsafe { SlimNeon::<$len>::new_unchecked(patterns) })
749 }
750751/// Creates a new searcher using "slim" Teddy with 256-bit
752 /// vectors without checking whether SSSE3 is available or not.
753 ///
754 /// # Safety
755 ///
756 /// Callers must ensure that SSSE3 is available in the current
757 /// environment.
758#[target_feature(enable = "neon")]
759unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
760let slim128 = generic::Slim::<uint8x16_t, $len>::new(
761 Arc::clone(patterns),
762 );
763let memory_usage = slim128.memory_usage();
764let minimum_len = slim128.minimum_len();
765let imp = Arc::new(SlimNeon { slim128 });
766 Searcher { imp, memory_usage, minimum_len }
767 }
768 }
769770impl SearcherT for SlimNeon<$len> {
771#[target_feature(enable = "neon")]
772 #[inline]
773unsafe fn find(
774&self,
775 start: *const u8,
776 end: *const u8,
777 ) -> Option<Match> {
778// SAFETY: All obligations except for `target_feature` are
779 // passed to the caller. Our use of `target_feature` is
780 // safe because construction of this type requires that the
781 // requisite target features are available.
782self.slim128.find(start, end)
783 }
784 }
785 };
786 }
787788slim_neon!(1);
789slim_neon!(2);
790slim_neon!(3);
791slim_neon!(4);
792}