1use crate::{
2 dfa::{
3accel,
4 automaton::{Automaton, OverlappingState},
5 },
6 util::{
7prefilter::Prefilter,
8primitives::StateID,
9 search::{Anchored, HalfMatch, Input, Span},
10 },
11MatchError,
12};
1314#[inline(never)]
15pub fn find_fwd<A: Automaton + ?Sized>(
16 dfa: &A,
17 input: &Input<'_>,
18) -> Result<Option<HalfMatch>, MatchError> {
19if input.is_done() {
20return Ok(None);
21 }
22let pre = if input.get_anchored().is_anchored() {
23None24 } else {
25dfa.get_prefilter()
26 };
27// Searching with a pattern ID is always anchored, so we should never use
28 // a prefilter.
29if pre.is_some() {
30if input.get_earliest() {
31find_fwd_imp(dfa, input, pre, true)
32 } else {
33find_fwd_imp(dfa, input, pre, false)
34 }
35 } else {
36if input.get_earliest() {
37find_fwd_imp(dfa, input, None, true)
38 } else {
39find_fwd_imp(dfa, input, None, false)
40 }
41 }
42}
4344#[cfg_attr(feature = "perf-inline", inline(always))]
45fn find_fwd_imp<A: Automaton + ?Sized>(
46 dfa: &A,
47 input: &Input<'_>,
48 pre: Option<&'_ Prefilter>,
49 earliest: bool,
50) -> Result<Option<HalfMatch>, MatchError> {
51// See 'prefilter_restart' docs for explanation.
52let universal_start = dfa.universal_start_state(Anchored::No).is_some();
53let mut mat = None;
54let mut sid = init_fwd(dfa, input)?;
55let mut at = input.start();
56// This could just be a closure, but then I think it would be unsound
57 // because it would need to be safe to invoke. This way, the lack of safety
58 // is clearer in the code below.
59macro_rules! next_unchecked {
60 ($sid:expr, $at:expr) => {{
61let byte = *input.haystack().get_unchecked($at);
62 dfa.next_state_unchecked($sid, byte)
63 }};
64 }
6566if let Some(ref pre) = pre {
67let span = Span::from(at..input.end());
68// If a prefilter doesn't report false positives, then we don't need to
69 // touch the DFA at all. However, since all matches include the pattern
70 // ID, and the prefilter infrastructure doesn't report pattern IDs, we
71 // limit this optimization to cases where there is exactly one pattern.
72 // In that case, any match must be the 0th pattern.
73match pre.find(input.haystack(), span) {
74None => return Ok(mat),
75Some(ref span) => {
76at = span.start;
77if !universal_start {
78sid = prefilter_restart(dfa, &input, at)?;
79 }
80 }
81 }
82 }
83while at < input.end() {
84// SAFETY: There are two safety invariants we need to uphold here in
85 // the loops below: that 'sid' and 'prev_sid' are valid state IDs
86 // for this DFA, and that 'at' is a valid index into 'haystack'.
87 // For the former, we rely on the invariant that next_state* and
88 // start_state_forward always returns a valid state ID (given a valid
89 // state ID in the former case). For the latter safety invariant, we
90 // always guard unchecked access with a check that 'at' is less than
91 // 'end', where 'end <= haystack.len()'. In the unrolled loop below, we
92 // ensure that 'at' is always in bounds.
93 //
94 // PERF: See a similar comment in src/hybrid/search.rs that justifies
95 // this extra work to make the search loop fast. The same reasoning and
96 // benchmarks apply here.
97let mut prev_sid;
98while at < input.end() {
99 prev_sid = unsafe { {
let byte = *input.haystack().get_unchecked(at);
dfa.next_state_unchecked(sid, byte)
}next_unchecked!(sid, at) };
100if dfa.is_special_state(prev_sid) || at + 3 >= input.end() {
101 core::mem::swap(&mut prev_sid, &mut sid);
102break;
103 }
104 at += 1;
105106 sid = unsafe { {
let byte = *input.haystack().get_unchecked(at);
dfa.next_state_unchecked(prev_sid, byte)
}next_unchecked!(prev_sid, at) };
107if dfa.is_special_state(sid) {
108break;
109 }
110 at += 1;
111112 prev_sid = unsafe { {
let byte = *input.haystack().get_unchecked(at);
dfa.next_state_unchecked(sid, byte)
}next_unchecked!(sid, at) };
113if dfa.is_special_state(prev_sid) {
114 core::mem::swap(&mut prev_sid, &mut sid);
115break;
116 }
117 at += 1;
118119 sid = unsafe { {
let byte = *input.haystack().get_unchecked(at);
dfa.next_state_unchecked(prev_sid, byte)
}next_unchecked!(prev_sid, at) };
120if dfa.is_special_state(sid) {
121break;
122 }
123 at += 1;
124 }
125if dfa.is_special_state(sid) {
126if dfa.is_start_state(sid) {
127if let Some(ref pre) = pre {
128let span = Span::from(at..input.end());
129match pre.find(input.haystack(), span) {
130None => return Ok(mat),
131Some(ref span) => {
132// We want to skip any update to 'at' below
133 // at the end of this iteration and just
134 // jump immediately back to the next state
135 // transition at the leading position of the
136 // candidate match.
137 //
138 // ... but only if we actually made progress
139 // with our prefilter, otherwise if the start
140 // state has a self-loop, we can get stuck.
141if span.start > at {
142 at = span.start;
143if !universal_start {
144 sid = prefilter_restart(dfa, &input, at)?;
145 }
146continue;
147 }
148 }
149 }
150 } else if dfa.is_accel_state(sid) {
151let needles = dfa.accelerator(sid);
152 at = accel::find_fwd(needles, input.haystack(), at + 1)
153 .unwrap_or(input.end());
154continue;
155 }
156 } else if dfa.is_match_state(sid) {
157let pattern = dfa.match_pattern(sid, 0);
158 mat = Some(HalfMatch::new(pattern, at));
159if earliest {
160return Ok(mat);
161 }
162if dfa.is_accel_state(sid) {
163let needles = dfa.accelerator(sid);
164 at = accel::find_fwd(needles, input.haystack(), at + 1)
165 .unwrap_or(input.end());
166continue;
167 }
168 } else if dfa.is_accel_state(sid) {
169let needs = dfa.accelerator(sid);
170 at = accel::find_fwd(needs, input.haystack(), at + 1)
171 .unwrap_or(input.end());
172continue;
173 } else if dfa.is_dead_state(sid) {
174return Ok(mat);
175 } else {
176// It's important that this is a debug_assert, since this can
177 // actually be tripped even if DFA::from_bytes succeeds and
178 // returns a supposedly valid DFA.
179return Err(MatchError::quit(input.haystack()[at], at));
180 }
181 }
182 at += 1;
183 }
184eoi_fwd(dfa, input, &mut sid, &mut mat)?;
185Ok(mat)
186}
187188#[inline(never)]
189pub fn find_rev<A: Automaton + ?Sized>(
190 dfa: &A,
191 input: &Input<'_>,
192) -> Result<Option<HalfMatch>, MatchError> {
193if input.is_done() {
194return Ok(None);
195 }
196if input.get_earliest() {
197find_rev_imp(dfa, input, true)
198 } else {
199find_rev_imp(dfa, input, false)
200 }
201}
202203#[cfg_attr(feature = "perf-inline", inline(always))]
204fn find_rev_imp<A: Automaton + ?Sized>(
205 dfa: &A,
206 input: &Input<'_>,
207 earliest: bool,
208) -> Result<Option<HalfMatch>, MatchError> {
209let mut mat = None;
210let mut sid = init_rev(dfa, input)?;
211// In reverse search, the loop below can't handle the case of searching an
212 // empty slice. Ideally we could write something congruent to the forward
213 // search, i.e., 'while at >= start', but 'start' might be 0. Since we use
214 // an unsigned offset, 'at >= 0' is trivially always true. We could avoid
215 // this extra case handling by using a signed offset, but Rust makes it
216 // annoying to do. So... We just handle the empty case separately.
217if input.start() == input.end() {
218eoi_rev(dfa, input, &mut sid, &mut mat)?;
219return Ok(mat);
220 }
221222let mut at = input.end() - 1;
223macro_rules! next_unchecked {
224 ($sid:expr, $at:expr) => {{
225let byte = *input.haystack().get_unchecked($at);
226 dfa.next_state_unchecked($sid, byte)
227 }};
228 }
229loop {
230// SAFETY: See comments in 'find_fwd' for a safety argument.
231let mut prev_sid;
232while at >= input.start() {
233 prev_sid = unsafe { {
let byte = *input.haystack().get_unchecked(at);
dfa.next_state_unchecked(sid, byte)
}next_unchecked!(sid, at) };
234if dfa.is_special_state(prev_sid)
235 || at <= input.start().saturating_add(3)
236 {
237 core::mem::swap(&mut prev_sid, &mut sid);
238break;
239 }
240 at -= 1;
241242 sid = unsafe { {
let byte = *input.haystack().get_unchecked(at);
dfa.next_state_unchecked(prev_sid, byte)
}next_unchecked!(prev_sid, at) };
243if dfa.is_special_state(sid) {
244break;
245 }
246 at -= 1;
247248 prev_sid = unsafe { {
let byte = *input.haystack().get_unchecked(at);
dfa.next_state_unchecked(sid, byte)
}next_unchecked!(sid, at) };
249if dfa.is_special_state(prev_sid) {
250 core::mem::swap(&mut prev_sid, &mut sid);
251break;
252 }
253 at -= 1;
254255 sid = unsafe { {
let byte = *input.haystack().get_unchecked(at);
dfa.next_state_unchecked(prev_sid, byte)
}next_unchecked!(prev_sid, at) };
256if dfa.is_special_state(sid) {
257break;
258 }
259 at -= 1;
260 }
261if dfa.is_special_state(sid) {
262if dfa.is_start_state(sid) {
263if dfa.is_accel_state(sid) {
264let needles = dfa.accelerator(sid);
265at = accel::find_rev(needles, input.haystack(), at)
266 .map(|i| i + 1)
267 .unwrap_or(input.start());
268 }
269 } else if dfa.is_match_state(sid) {
270let pattern = dfa.match_pattern(sid, 0);
271// Since reverse searches report the beginning of a match
272 // and the beginning is inclusive (not exclusive like the
273 // end of a match), we add 1 to make it inclusive.
274mat = Some(HalfMatch::new(pattern, at + 1));
275if earliest {
276return Ok(mat);
277 }
278if dfa.is_accel_state(sid) {
279let needles = dfa.accelerator(sid);
280at = accel::find_rev(needles, input.haystack(), at)
281 .map(|i| i + 1)
282 .unwrap_or(input.start());
283 }
284 } else if dfa.is_accel_state(sid) {
285let needles = dfa.accelerator(sid);
286// If the accelerator returns nothing, why don't we quit the
287 // search? Well, if the accelerator doesn't find anything, that
288 // doesn't mean we don't have a match. It just means that we
289 // can't leave the current state given one of the 255 possible
290 // byte values. However, there might be an EOI transition. So
291 // we set 'at' to the end of the haystack, which will cause
292 // this loop to stop and fall down into the EOI transition.
293at = accel::find_rev(needles, input.haystack(), at)
294 .map(|i| i + 1)
295 .unwrap_or(input.start());
296 } else if dfa.is_dead_state(sid) {
297return Ok(mat);
298 } else {
299return Err(MatchError::quit(input.haystack()[at], at));
300 }
301 }
302if at == input.start() {
303break;
304 }
305at -= 1;
306 }
307eoi_rev(dfa, input, &mut sid, &mut mat)?;
308Ok(mat)
309}
310311#[inline(never)]
312pub fn find_overlapping_fwd<A: Automaton + ?Sized>(
313 dfa: &A,
314 input: &Input<'_>,
315 state: &mut OverlappingState,
316) -> Result<(), MatchError> {
317state.mat = None;
318if input.is_done() {
319return Ok(());
320 }
321let pre = if input.get_anchored().is_anchored() {
322None323 } else {
324dfa.get_prefilter()
325 };
326if pre.is_some() {
327find_overlapping_fwd_imp(dfa, input, pre, state)
328 } else {
329find_overlapping_fwd_imp(dfa, input, None, state)
330 }
331}
332333#[cfg_attr(feature = "perf-inline", inline(always))]
334fn find_overlapping_fwd_imp<A: Automaton + ?Sized>(
335 dfa: &A,
336 input: &Input<'_>,
337 pre: Option<&'_ Prefilter>,
338 state: &mut OverlappingState,
339) -> Result<(), MatchError> {
340// See 'prefilter_restart' docs for explanation.
341let universal_start = dfa.universal_start_state(Anchored::No).is_some();
342let mut sid = match state.id {
343None => {
344state.at = input.start();
345init_fwd(dfa, input)?
346}
347Some(sid) => {
348if let Some(match_index) = state.next_match_index {
349let match_len = dfa.match_len(sid);
350if match_index < match_len {
351state.next_match_index = Some(match_index + 1);
352let pattern = dfa.match_pattern(sid, match_index);
353state.mat = Some(HalfMatch::new(pattern, state.at));
354return Ok(());
355 }
356 }
357// Once we've reported all matches at a given position, we need to
358 // advance the search to the next position.
359state.at += 1;
360if state.at > input.end() {
361return Ok(());
362 }
363sid364 }
365 };
366367// NOTE: We don't optimize the crap out of this routine primarily because
368 // it seems like most find_overlapping searches will have higher match
369 // counts, and thus, throughput is perhaps not as important. But if you
370 // have a use case for something faster, feel free to file an issue.
371while state.at < input.end() {
372 sid = dfa.next_state(sid, input.haystack()[state.at]);
373if dfa.is_special_state(sid) {
374 state.id = Some(sid);
375if dfa.is_start_state(sid) {
376if let Some(ref pre) = pre {
377let span = Span::from(state.at..input.end());
378match pre.find(input.haystack(), span) {
379None => return Ok(()),
380Some(ref span) => {
381if span.start > state.at {
382 state.at = span.start;
383if !universal_start {
384 sid = prefilter_restart(
385 dfa, &input, state.at,
386 )?;
387 }
388continue;
389 }
390 }
391 }
392 } else if dfa.is_accel_state(sid) {
393let needles = dfa.accelerator(sid);
394 state.at = accel::find_fwd(
395 needles,
396 input.haystack(),
397 state.at + 1,
398 )
399 .unwrap_or(input.end());
400continue;
401 }
402 } else if dfa.is_match_state(sid) {
403 state.next_match_index = Some(1);
404let pattern = dfa.match_pattern(sid, 0);
405 state.mat = Some(HalfMatch::new(pattern, state.at));
406return Ok(());
407 } else if dfa.is_accel_state(sid) {
408let needs = dfa.accelerator(sid);
409// If the accelerator returns nothing, why don't we quit the
410 // search? Well, if the accelerator doesn't find anything, that
411 // doesn't mean we don't have a match. It just means that we
412 // can't leave the current state given one of the 255 possible
413 // byte values. However, there might be an EOI transition. So
414 // we set 'at' to the end of the haystack, which will cause
415 // this loop to stop and fall down into the EOI transition.
416state.at =
417 accel::find_fwd(needs, input.haystack(), state.at + 1)
418 .unwrap_or(input.end());
419continue;
420 } else if dfa.is_dead_state(sid) {
421return Ok(());
422 } else {
423return Err(MatchError::quit(
424 input.haystack()[state.at],
425 state.at,
426 ));
427 }
428 }
429 state.at += 1;
430 }
431432let result = eoi_fwd(dfa, input, &mut sid, &mut state.mat);
433state.id = Some(sid);
434if state.mat.is_some() {
435// '1' is always correct here since if we get to this point, this
436 // always corresponds to the first (index '0') match discovered at
437 // this position. So the next match to report at this position (if
438 // it exists) is at index '1'.
439state.next_match_index = Some(1);
440 }
441result442}
443444#[inline(never)]
445pub(crate) fn find_overlapping_rev<A: Automaton + ?Sized>(
446 dfa: &A,
447 input: &Input<'_>,
448 state: &mut OverlappingState,
449) -> Result<(), MatchError> {
450state.mat = None;
451if input.is_done() {
452return Ok(());
453 }
454let mut sid = match state.id {
455None => {
456let sid = init_rev(dfa, input)?;
457state.id = Some(sid);
458if input.start() == input.end() {
459state.rev_eoi = true;
460 } else {
461state.at = input.end() - 1;
462 }
463sid464 }
465Some(sid) => {
466if let Some(match_index) = state.next_match_index {
467let match_len = dfa.match_len(sid);
468if match_index < match_len {
469state.next_match_index = Some(match_index + 1);
470let pattern = dfa.match_pattern(sid, match_index);
471state.mat = Some(HalfMatch::new(pattern, state.at));
472return Ok(());
473 }
474 }
475// Once we've reported all matches at a given position, we need
476 // to advance the search to the next position. However, if we've
477 // already followed the EOI transition, then we know we're done
478 // with the search and there cannot be any more matches to report.
479if state.rev_eoi {
480return Ok(());
481 } else if state.at == input.start() {
482// At this point, we should follow the EOI transition. This
483 // will cause us the skip the main loop below and fall through
484 // to the final 'eoi_rev' transition.
485state.rev_eoi = true;
486 } else {
487// We haven't hit the end of the search yet, so move on.
488state.at -= 1;
489 }
490sid491 }
492 };
493while !state.rev_eoi {
494 sid = dfa.next_state(sid, input.haystack()[state.at]);
495if dfa.is_special_state(sid) {
496 state.id = Some(sid);
497if dfa.is_start_state(sid) {
498if dfa.is_accel_state(sid) {
499let needles = dfa.accelerator(sid);
500 state.at =
501 accel::find_rev(needles, input.haystack(), state.at)
502 .map(|i| i + 1)
503 .unwrap_or(input.start());
504 }
505 } else if dfa.is_match_state(sid) {
506 state.next_match_index = Some(1);
507let pattern = dfa.match_pattern(sid, 0);
508 state.mat = Some(HalfMatch::new(pattern, state.at + 1));
509return Ok(());
510 } else if dfa.is_accel_state(sid) {
511let needles = dfa.accelerator(sid);
512// If the accelerator returns nothing, why don't we quit the
513 // search? Well, if the accelerator doesn't find anything, that
514 // doesn't mean we don't have a match. It just means that we
515 // can't leave the current state given one of the 255 possible
516 // byte values. However, there might be an EOI transition. So
517 // we set 'at' to the end of the haystack, which will cause
518 // this loop to stop and fall down into the EOI transition.
519state.at =
520 accel::find_rev(needles, input.haystack(), state.at)
521 .map(|i| i + 1)
522 .unwrap_or(input.start());
523 } else if dfa.is_dead_state(sid) {
524return Ok(());
525 } else {
526return Err(MatchError::quit(
527 input.haystack()[state.at],
528 state.at,
529 ));
530 }
531 }
532if state.at == input.start() {
533break;
534 }
535 state.at -= 1;
536 }
537538let result = eoi_rev(dfa, input, &mut sid, &mut state.mat);
539state.rev_eoi = true;
540state.id = Some(sid);
541if state.mat.is_some() {
542// '1' is always correct here since if we get to this point, this
543 // always corresponds to the first (index '0') match discovered at
544 // this position. So the next match to report at this position (if
545 // it exists) is at index '1'.
546state.next_match_index = Some(1);
547 }
548result549}
550551#[cfg_attr(feature = "perf-inline", inline(always))]
552fn init_fwd<A: Automaton + ?Sized>(
553 dfa: &A,
554 input: &Input<'_>,
555) -> Result<StateID, MatchError> {
556let sid = dfa.start_state_forward(input)?;
557// Start states can never be match states, since all matches are delayed
558 // by 1 byte.
559if true {
if !!dfa.is_match_state(sid) {
::core::panicking::panic("assertion failed: !dfa.is_match_state(sid)")
};
};debug_assert!(!dfa.is_match_state(sid));
560Ok(sid)
561}
562563#[cfg_attr(feature = "perf-inline", inline(always))]
564fn init_rev<A: Automaton + ?Sized>(
565 dfa: &A,
566 input: &Input<'_>,
567) -> Result<StateID, MatchError> {
568let sid = dfa.start_state_reverse(input)?;
569// Start states can never be match states, since all matches are delayed
570 // by 1 byte.
571if true {
if !!dfa.is_match_state(sid) {
::core::panicking::panic("assertion failed: !dfa.is_match_state(sid)")
};
};debug_assert!(!dfa.is_match_state(sid));
572Ok(sid)
573}
574575#[cfg_attr(feature = "perf-inline", inline(always))]
576fn eoi_fwd<A: Automaton + ?Sized>(
577 dfa: &A,
578 input: &Input<'_>,
579 sid: &mut StateID,
580 mat: &mut Option<HalfMatch>,
581) -> Result<(), MatchError> {
582let sp = input.get_span();
583match input.haystack().get(sp.end) {
584Some(&b) => {
585*sid = dfa.next_state(*sid, b);
586if dfa.is_match_state(*sid) {
587let pattern = dfa.match_pattern(*sid, 0);
588*mat = Some(HalfMatch::new(pattern, sp.end));
589 } else if dfa.is_quit_state(*sid) {
590return Err(MatchError::quit(b, sp.end));
591 }
592 }
593None => {
594*sid = dfa.next_eoi_state(*sid);
595if dfa.is_match_state(*sid) {
596let pattern = dfa.match_pattern(*sid, 0);
597*mat = Some(HalfMatch::new(pattern, input.haystack().len()));
598 }
599 }
600 }
601Ok(())
602}
603604#[cfg_attr(feature = "perf-inline", inline(always))]
605fn eoi_rev<A: Automaton + ?Sized>(
606 dfa: &A,
607 input: &Input<'_>,
608 sid: &mut StateID,
609 mat: &mut Option<HalfMatch>,
610) -> Result<(), MatchError> {
611let sp = input.get_span();
612if sp.start > 0 {
613let byte = input.haystack()[sp.start - 1];
614*sid = dfa.next_state(*sid, byte);
615if dfa.is_match_state(*sid) {
616let pattern = dfa.match_pattern(*sid, 0);
617*mat = Some(HalfMatch::new(pattern, sp.start));
618 } else if dfa.is_quit_state(*sid) {
619return Err(MatchError::quit(byte, sp.start - 1));
620 }
621 } else {
622*sid = dfa.next_eoi_state(*sid);
623if dfa.is_match_state(*sid) {
624let pattern = dfa.match_pattern(*sid, 0);
625*mat = Some(HalfMatch::new(pattern, 0));
626 }
627 }
628Ok(())
629}
630631/// Re-compute the starting state that a DFA should be in after finding a
632/// prefilter candidate match at the position `at`.
633///
634/// The function with the same name has a bit more docs in hybrid/search.rs.
635#[cfg_attr(feature = "perf-inline", inline(always))]
636fn prefilter_restart<A: Automaton + ?Sized>(
637 dfa: &A,
638 input: &Input<'_>,
639 at: usize,
640) -> Result<StateID, MatchError> {
641let mut input = input.clone();
642input.set_start(at);
643init_fwd(dfa, &input)
644}