1/*!
2This module defines two bespoke forward DFA search routines. One for the lazy
3DFA and one for the fully compiled DFA. These routines differ from the normal
4ones by reporting the position at which the search terminates when a match
5*isn't* found.
67This position at which a search terminates is useful in contexts where the meta
8regex engine runs optimizations that could go quadratic if we aren't careful.
9Namely, a regex search *could* scan to the end of the haystack only to report a
10non-match. If the caller doesn't know that the search scanned to the end of the
11haystack, it might restart the search at the next literal candidate it finds
12and repeat the process.
1314Providing the caller with the position at which the search stopped provides a
15way for the caller to determine the point at which subsequent scans should not
16pass. This is principally used in the "reverse inner" optimization, which works
17like this:
18191. Look for a match of an inner literal. Say, 'Z' in '\w+Z\d+'.
202. At the spot where 'Z' matches, do a reverse anchored search from there for
21'\w+'.
223. If the reverse search matches, it corresponds to the start position of a
23(possible) match. At this point, do a forward anchored search to find the end
24position. If an end position is found, then we have a match and we know its
25bounds.
2627If the forward anchored search in (3) searches the entire rest of the haystack
28but reports a non-match, then a naive implementation of the above will continue
29back at step 1 looking for more candidates. There might still be a match to be
30found! It's possible. But we already scanned the whole haystack. So if we keep
31repeating the process, then we might wind up taking quadratic time in the size
32of the haystack, which is not great.
3334So if the forward anchored search in (3) reports the position at which it
35stops, then we can detect whether quadratic behavior might be occurring in
36steps (1) and (2). For (1), it occurs if the literal candidate found occurs
37*before* the end of the previous search in (3), since that means we're now
38going to look for another match in a place where the forward search has already
39scanned. It is *correct* to do so, but our technique has become inefficient.
40For (2), quadratic behavior occurs similarly when its reverse search extends
41past the point where the previous forward search in (3) terminated. Indeed, to
42implement (2), we use the sibling 'limited' module for ensuring our reverse
43scan doesn't go further than we want.
4445See the 'opt/reverse-inner' benchmarks in rebar for a real demonstration of
46how quadratic behavior is mitigated.
47*/
4849use crate::{meta::error::RetryFailError, HalfMatch, Input, MatchError};
5051#[cfg(feature = "dfa-build")]
52pub(crate) fn dfa_try_search_half_fwd(
53 dfa: &crate::dfa::dense::DFA<alloc::vec::Vec<u32>>,
54 input: &Input<'_>,
55) -> Result<Result<HalfMatch, usize>, RetryFailError> {
56use crate::dfa::{accel, Automaton};
5758let mut mat = None;
59let mut sid = dfa.start_state_forward(input)?;
60let mut at = input.start();
61while at < input.end() {
62 sid = dfa.next_state(sid, input.haystack()[at]);
63if dfa.is_special_state(sid) {
64if dfa.is_match_state(sid) {
65let pattern = dfa.match_pattern(sid, 0);
66 mat = Some(HalfMatch::new(pattern, at));
67if input.get_earliest() {
68return Ok(mat.ok_or(at));
69 }
70if dfa.is_accel_state(sid) {
71let needs = dfa.accelerator(sid);
72 at = accel::find_fwd(needs, input.haystack(), at)
73 .unwrap_or(input.end());
74continue;
75 }
76 } else if dfa.is_accel_state(sid) {
77let needs = dfa.accelerator(sid);
78 at = accel::find_fwd(needs, input.haystack(), at)
79 .unwrap_or(input.end());
80continue;
81 } else if dfa.is_dead_state(sid) {
82return Ok(mat.ok_or(at));
83 } else if dfa.is_quit_state(sid) {
84return Err(MatchError::quit(input.haystack()[at], at).into());
85 } else {
86// Ideally we wouldn't use a DFA that specialized start states
87 // and thus 'is_start_state()' could never be true here, but in
88 // practice we reuse the DFA created for the full regex which
89 // will specialize start states whenever there is a prefilter.
90if true {
if !dfa.is_start_state(sid) {
::core::panicking::panic("assertion failed: dfa.is_start_state(sid)")
};
};debug_assert!(dfa.is_start_state(sid));
91 }
92 }
93 at += 1;
94 }
95dfa_eoi_fwd(dfa, input, &mut sid, &mut mat)?;
96Ok(mat.ok_or(at))
97}
9899#[cfg(feature = "hybrid")]
100pub(crate) fn hybrid_try_search_half_fwd(
101 dfa: &crate::hybrid::dfa::DFA,
102 cache: &mut crate::hybrid::dfa::Cache,
103 input: &Input<'_>,
104) -> Result<Result<HalfMatch, usize>, RetryFailError> {
105let mut mat = None;
106let mut sid = dfa.start_state_forward(cache, input)?;
107let mut at = input.start();
108while at < input.end() {
109 sid = dfa
110 .next_state(cache, sid, input.haystack()[at])
111 .map_err(|_| MatchError::gave_up(at))?;
112if sid.is_tagged() {
113if sid.is_match() {
114let pattern = dfa.match_pattern(cache, sid, 0);
115 mat = Some(HalfMatch::new(pattern, at));
116if input.get_earliest() {
117return Ok(mat.ok_or(at));
118 }
119 } else if sid.is_dead() {
120return Ok(mat.ok_or(at));
121 } else if sid.is_quit() {
122return Err(MatchError::quit(input.haystack()[at], at).into());
123 } else {
124// We should NEVER get an unknown state ID back from
125 // dfa.next_state().
126if true {
if !!sid.is_unknown() {
::core::panicking::panic("assertion failed: !sid.is_unknown()")
};
};debug_assert!(!sid.is_unknown());
127// Ideally we wouldn't use a lazy DFA that specialized start
128 // states and thus 'sid.is_start()' could never be true here,
129 // but in practice we reuse the lazy DFA created for the full
130 // regex which will specialize start states whenever there is
131 // a prefilter.
132if true {
if !sid.is_start() {
::core::panicking::panic("assertion failed: sid.is_start()")
};
};debug_assert!(sid.is_start());
133 }
134 }
135 at += 1;
136 }
137hybrid_eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?;
138Ok(mat.ok_or(at))
139}
140141#[cfg(feature = "dfa-build")]
142#[cfg_attr(feature = "perf-inline", inline(always))]
143fn dfa_eoi_fwd(
144 dfa: &crate::dfa::dense::DFA<alloc::vec::Vec<u32>>,
145 input: &Input<'_>,
146 sid: &mut crate::util::primitives::StateID,
147 mat: &mut Option<HalfMatch>,
148) -> Result<(), MatchError> {
149use crate::dfa::Automaton;
150151let sp = input.get_span();
152match input.haystack().get(sp.end) {
153Some(&b) => {
154*sid = dfa.next_state(*sid, b);
155if dfa.is_match_state(*sid) {
156let pattern = dfa.match_pattern(*sid, 0);
157*mat = Some(HalfMatch::new(pattern, sp.end));
158 } else if dfa.is_quit_state(*sid) {
159return Err(MatchError::quit(b, sp.end));
160 }
161 }
162None => {
163*sid = dfa.next_eoi_state(*sid);
164if dfa.is_match_state(*sid) {
165let pattern = dfa.match_pattern(*sid, 0);
166*mat = Some(HalfMatch::new(pattern, input.haystack().len()));
167 }
168// N.B. We don't have to check 'is_quit' here because the EOI
169 // transition can never lead to a quit state.
170if true {
if !!dfa.is_quit_state(*sid) {
::core::panicking::panic("assertion failed: !dfa.is_quit_state(*sid)")
};
};debug_assert!(!dfa.is_quit_state(*sid));
171 }
172 }
173Ok(())
174}
175176#[cfg(feature = "hybrid")]
177#[cfg_attr(feature = "perf-inline", inline(always))]
178fn hybrid_eoi_fwd(
179 dfa: &crate::hybrid::dfa::DFA,
180 cache: &mut crate::hybrid::dfa::Cache,
181 input: &Input<'_>,
182 sid: &mut crate::hybrid::LazyStateID,
183 mat: &mut Option<HalfMatch>,
184) -> Result<(), MatchError> {
185let sp = input.get_span();
186match input.haystack().get(sp.end) {
187Some(&b) => {
188*sid = dfa189 .next_state(cache, *sid, b)
190 .map_err(|_| MatchError::gave_up(sp.end))?;
191if sid.is_match() {
192let pattern = dfa.match_pattern(cache, *sid, 0);
193*mat = Some(HalfMatch::new(pattern, sp.end));
194 } else if sid.is_quit() {
195return Err(MatchError::quit(b, sp.end));
196 }
197 }
198None => {
199*sid = dfa200 .next_eoi_state(cache, *sid)
201 .map_err(|_| MatchError::gave_up(input.haystack().len()))?;
202if sid.is_match() {
203let pattern = dfa.match_pattern(cache, *sid, 0);
204*mat = Some(HalfMatch::new(pattern, input.haystack().len()));
205 }
206// N.B. We don't have to check 'is_quit' here because the EOI
207 // transition can never lead to a quit state.
208if true {
if !!sid.is_quit() {
::core::panicking::panic("assertion failed: !sid.is_quit()")
};
};debug_assert!(!sid.is_quit());
209 }
210 }
211Ok(())
212}