private bool FindFinalStatePositionDeltas <TTransition>(ReadOnlySpan <char> input, int j, ref int i, ref DfaMatchingState <TSetType> q, ref int watchdog, out int result) where TTransition : struct, ITransition { do { // Make the transition based on input[i]. q = Delta <TTransition>(input, i, q); if (q.IsNullable(GetCharKind(input, i + 1))) { watchdog = q.WatchDog; result = i; return(true); } if (q.IsNothing) { // q is a deadend state so any further search is meaningless result = NoMatchExists; return(true); } // continue from the next character i++; }while (i < j && !q.IsInitialState); result = 0; return(false); }
private bool FindEndPositionDeltas <TTransition>(ReadOnlySpan <char> input, ref int i, int j, ref DfaMatchingState <TSetType> q, ref int i_end) where TTransition : struct, ITransition { do { q = Delta <TTransition>(input, i, q); if (q.IsNullable(GetCharKind(input, i + 1))) { // Accepting state has been reached. Record the position. i_end = i; // Stop here if q is lazy. if (q.IsLazy) { return(true); } } else if (q.IsDeadend) { // Non-accepting sink state (deadend) has been reached in the original pattern. // So the match ended when the last i_end was updated. return(true); } i++; }while (i < j); return(false); }
/// <summary>Walk back in reverse using the reverse pattern to find the start position of match, start position is known to exist.</summary> /// <param name="input">the input span</param> /// <param name="i">position to start walking back from, i points at the last character of the match</param> /// <param name="match_start_boundary">do not pass this boundary when walking back</param> /// <returns></returns> private int FindStartPosition(ReadOnlySpan <char> input, int i, int match_start_boundary) { // Fetch the correct start state for the reverse pattern. // This depends on previous character --- which, because going backwards, is character number i+1. uint prevKind = GetCharKind(input, i + 1); DfaMatchingState <TSetType> q = _reverseInitialStates[prevKind]; if (i == -1) { Debug.Assert(q.IsNullable(GetCharKind(input, i)), "we reached the beginning of the input, thus the state q must be accepting"); return(0); } int last_start = -1; if (q.IsNullable(GetCharKind(input, i))) { // The whole prefix of the reverse pattern was in reverse a prefix of the original pattern, // for example when the original pattern is concrete word such as "abc" last_start = i + 1; } // Walk back to the accepting state of the reverse pattern while (i >= match_start_boundary) { int j = Math.Max(match_start_boundary, i - AntimirovThresholdLeeway); bool done = _builder._antimirov ? FindStartPositionDeltas <AntimirovTransition>(input, ref i, j, ref q, ref last_start) : FindStartPositionDeltas <BrzozowskiTransition>(input, ref i, j, ref q, ref last_start); if (done) { break; } } Debug.Assert(last_start != -1); return(last_start); }
private bool FindStartPositionDeltas <TTransition>(ReadOnlySpan <char> input, ref int i, int j, ref DfaMatchingState <TSetType> q, ref int last_start) where TTransition : struct, ITransition { do { q = Delta <TTransition>(input, i, q); // Reached a deadend state, thus the earliest match start point must have occurred already. if (q.IsNothing) { return(true); } if (q.IsNullable(GetCharKind(input, i - 1))) { // Earliest start point so far. This must happen at some point // or else the dot-star pattern would not have reached a final state after match_start_boundary. last_start = i; } i -= 1; }while (i > j); return(false); }
/// <summary>Find match end position using the original pattern, end position is known to exist.</summary> /// <param name="input">input span</param> /// <param name="i">inclusive start position</param> /// <param name="exclusiveEnd">exclusive end position</param> /// <returns></returns> private int FindEndPosition(ReadOnlySpan <char> input, int exclusiveEnd, int i) { int i_end = exclusiveEnd; // Pick the correct start state based on previous character kind. uint prevCharKind = GetCharKind(input, i - 1); DfaMatchingState <TSetType> state = _initialStates[prevCharKind]; if (state.IsNullable(GetCharKind(input, i))) { // Empty match exists because the initial state is accepting. i_end = i - 1; // Stop here if q is lazy. if (state.IsLazy) { return(i_end); } } while (i < exclusiveEnd) { int j = Math.Min(exclusiveEnd, i + AntimirovThresholdLeeway); bool done = _builder._antimirov ? FindEndPositionDeltas <AntimirovTransition>(input, ref i, j, ref state, ref i_end) : FindEndPositionDeltas <BrzozowskiTransition>(input, ref i, j, ref state, ref i_end); if (done) { break; } } Debug.Assert(i_end != exclusiveEnd); return(i_end); }
/// <summary>Returns NoMatchExists if no match exists. Returns -1 when i=0 and the initial state is nullable.</summary> /// <param name="input">given input span</param> /// <param name="k">input length or bounded input length</param> /// <param name="i">start position</param> /// <param name="timeoutOccursAt">The time at which timeout occurs, if timeouts are being checked.</param> /// <param name="initialStateIndex">last position the initial state of <see cref="_dotStarredPattern"/> was visited</param> /// <param name="watchdog">length of match when positive</param> private int FindFinalStatePosition(ReadOnlySpan <char> input, int k, int i, int timeoutOccursAt, out int initialStateIndex, out int watchdog) { // Get the correct start state of the dot-star pattern, which in general depends on the previous character kind in the input. uint prevCharKindId = GetCharKind(input, i - 1); DfaMatchingState <TSetType> q = _dotstarredInitialStates[prevCharKindId]; initialStateIndex = i; if (q.IsNothing) { // If q is nothing then it is a deadend from the beginning this happens for example when the original // regex started with start anchor and prevCharKindId is not Start watchdog = -1; return(NoMatchExists); } if (q.IsNullable(GetCharKind(input, i))) { // The initial state is nullable in this context so at least an empty match exists. // The last position of the match is i-1 because the match is empty. // This value is -1 if i == 0. watchdog = -1; return(i - 1); } watchdog = -1; // Search for a match end position within input[i..k-1] while (i < k) { if (q.IsInitialState) { // i_q0_A1 is the most recent position in the input when the dot-star pattern is in the initial state initialStateIndex = i; if (_findOpts is RegexFindOptimizations findOpts) { // Find the first position i that matches with some likely character. if (!findOpts.TryFindNextStartingPosition(input, ref i, 0, 0, k)) { // no match was found return(NoMatchExists); } initialStateIndex = i; // the start state must be updated // to reflect the kind of the previous character // when anchors are not used, q will remain the same state q = _dotstarredInitialStates[GetCharKind(input, i - 1)]; if (q.IsNothing) { return(NoMatchExists); } } } int result; int j = Math.Min(k, i + AntimirovThresholdLeeway); bool done = _builder._antimirov ? FindFinalStatePositionDeltas <AntimirovTransition>(input, j, ref i, ref q, ref watchdog, out result) : FindFinalStatePositionDeltas <BrzozowskiTransition>(input, j, ref i, ref q, ref watchdog, out result); if (done) { return(result); } if (_checkTimeout) { DoCheckTimeout(timeoutOccursAt); } } //no match was found return(NoMatchExists); }