Ejemplo n.º 1
0
        private bool FindFinalStatePositionDeltas <TTransition>(ReadOnlySpan <char> input, int j, ref int i, ref DfaMatchingState <TSetType> q, ref int watchdog, out int result) where TTransition : struct, ITransition
        {
            do
            {
                // Make the transition based on input[i].
                q = Delta <TTransition>(input, i, q);

                if (q.IsNullable(GetCharKind(input, i + 1)))
                {
                    watchdog = q.WatchDog;
                    result   = i;
                    return(true);
                }

                if (q.IsNothing)
                {
                    // q is a deadend state so any further search is meaningless
                    result = NoMatchExists;
                    return(true);
                }

                // continue from the next character
                i++;
            }while (i < j && !q.IsInitialState);

            result = 0;
            return(false);
        }
Ejemplo n.º 2
0
        private bool FindEndPositionDeltas <TTransition>(ReadOnlySpan <char> input, ref int i, int j, ref DfaMatchingState <TSetType> q, ref int i_end) where TTransition : struct, ITransition
        {
            do
            {
                q = Delta <TTransition>(input, i, q);

                if (q.IsNullable(GetCharKind(input, i + 1)))
                {
                    // Accepting state has been reached. Record the position.
                    i_end = i;

                    // Stop here if q is lazy.
                    if (q.IsLazy)
                    {
                        return(true);
                    }
                }
                else if (q.IsDeadend)
                {
                    // Non-accepting sink state (deadend) has been reached in the original pattern.
                    // So the match ended when the last i_end was updated.
                    return(true);
                }

                i++;
            }while (i < j);

            return(false);
        }
Ejemplo n.º 3
0
        /// <summary>Walk back in reverse using the reverse pattern to find the start position of match, start position is known to exist.</summary>
        /// <param name="input">the input span</param>
        /// <param name="i">position to start walking back from, i points at the last character of the match</param>
        /// <param name="match_start_boundary">do not pass this boundary when walking back</param>
        /// <returns></returns>
        private int FindStartPosition(ReadOnlySpan <char> input, int i, int match_start_boundary)
        {
            // Fetch the correct start state for the reverse pattern.
            // This depends on previous character --- which, because going backwards, is character number i+1.
            uint prevKind = GetCharKind(input, i + 1);
            DfaMatchingState <TSetType> q = _reverseInitialStates[prevKind];

            if (i == -1)
            {
                Debug.Assert(q.IsNullable(GetCharKind(input, i)), "we reached the beginning of the input, thus the state q must be accepting");
                return(0);
            }

            int last_start = -1;

            if (q.IsNullable(GetCharKind(input, i)))
            {
                // The whole prefix of the reverse pattern was in reverse a prefix of the original pattern,
                // for example when the original pattern is concrete word such as "abc"
                last_start = i + 1;
            }

            // Walk back to the accepting state of the reverse pattern
            while (i >= match_start_boundary)
            {
                int  j    = Math.Max(match_start_boundary, i - AntimirovThresholdLeeway);
                bool done = _builder._antimirov ?
                            FindStartPositionDeltas <AntimirovTransition>(input, ref i, j, ref q, ref last_start) :
                            FindStartPositionDeltas <BrzozowskiTransition>(input, ref i, j, ref q, ref last_start);

                if (done)
                {
                    break;
                }
            }

            Debug.Assert(last_start != -1);
            return(last_start);
        }
Ejemplo n.º 4
0
        private bool FindStartPositionDeltas <TTransition>(ReadOnlySpan <char> input, ref int i, int j, ref DfaMatchingState <TSetType> q, ref int last_start) where TTransition : struct, ITransition
        {
            do
            {
                q = Delta <TTransition>(input, i, q);

                // Reached a deadend state, thus the earliest match start point must have occurred already.
                if (q.IsNothing)
                {
                    return(true);
                }

                if (q.IsNullable(GetCharKind(input, i - 1)))
                {
                    // Earliest start point so far. This must happen at some point
                    // or else the dot-star pattern would not have reached a final state after match_start_boundary.
                    last_start = i;
                }

                i -= 1;
            }while (i > j);

            return(false);
        }
Ejemplo n.º 5
0
        /// <summary>Find match end position using the original pattern, end position is known to exist.</summary>
        /// <param name="input">input span</param>
        /// <param name="i">inclusive start position</param>
        /// <param name="exclusiveEnd">exclusive end position</param>
        /// <returns></returns>
        private int FindEndPosition(ReadOnlySpan <char> input, int exclusiveEnd, int i)
        {
            int i_end = exclusiveEnd;

            // Pick the correct start state based on previous character kind.
            uint prevCharKind = GetCharKind(input, i - 1);
            DfaMatchingState <TSetType> state = _initialStates[prevCharKind];

            if (state.IsNullable(GetCharKind(input, i)))
            {
                // Empty match exists because the initial state is accepting.
                i_end = i - 1;

                // Stop here if q is lazy.
                if (state.IsLazy)
                {
                    return(i_end);
                }
            }

            while (i < exclusiveEnd)
            {
                int  j    = Math.Min(exclusiveEnd, i + AntimirovThresholdLeeway);
                bool done = _builder._antimirov ?
                            FindEndPositionDeltas <AntimirovTransition>(input, ref i, j, ref state, ref i_end) :
                            FindEndPositionDeltas <BrzozowskiTransition>(input, ref i, j, ref state, ref i_end);

                if (done)
                {
                    break;
                }
            }

            Debug.Assert(i_end != exclusiveEnd);
            return(i_end);
        }
Ejemplo n.º 6
0
        /// <summary>Returns NoMatchExists if no match exists. Returns -1 when i=0 and the initial state is nullable.</summary>
        /// <param name="input">given input span</param>
        /// <param name="k">input length or bounded input length</param>
        /// <param name="i">start position</param>
        /// <param name="timeoutOccursAt">The time at which timeout occurs, if timeouts are being checked.</param>
        /// <param name="initialStateIndex">last position the initial state of <see cref="_dotStarredPattern"/> was visited</param>
        /// <param name="watchdog">length of match when positive</param>
        private int FindFinalStatePosition(ReadOnlySpan <char> input, int k, int i, int timeoutOccursAt, out int initialStateIndex, out int watchdog)
        {
            // Get the correct start state of the dot-star pattern, which in general depends on the previous character kind in the input.
            uint prevCharKindId           = GetCharKind(input, i - 1);
            DfaMatchingState <TSetType> q = _dotstarredInitialStates[prevCharKindId];

            initialStateIndex = i;

            if (q.IsNothing)
            {
                // If q is nothing then it is a deadend from the beginning this happens for example when the original
                // regex started with start anchor and prevCharKindId is not Start
                watchdog = -1;
                return(NoMatchExists);
            }

            if (q.IsNullable(GetCharKind(input, i)))
            {
                // The initial state is nullable in this context so at least an empty match exists.
                // The last position of the match is i-1 because the match is empty.
                // This value is -1 if i == 0.
                watchdog = -1;
                return(i - 1);
            }

            watchdog = -1;

            // Search for a match end position within input[i..k-1]
            while (i < k)
            {
                if (q.IsInitialState)
                {
                    // i_q0_A1 is the most recent position in the input when the dot-star pattern is in the initial state
                    initialStateIndex = i;

                    if (_findOpts is RegexFindOptimizations findOpts)
                    {
                        // Find the first position i that matches with some likely character.
                        if (!findOpts.TryFindNextStartingPosition(input, ref i, 0, 0, k))
                        {
                            // no match was found
                            return(NoMatchExists);
                        }

                        initialStateIndex = i;

                        // the start state must be updated
                        // to reflect the kind of the previous character
                        // when anchors are not used, q will remain the same state
                        q = _dotstarredInitialStates[GetCharKind(input, i - 1)];
                        if (q.IsNothing)
                        {
                            return(NoMatchExists);
                        }
                    }
                }

                int  result;
                int  j    = Math.Min(k, i + AntimirovThresholdLeeway);
                bool done = _builder._antimirov ?
                            FindFinalStatePositionDeltas <AntimirovTransition>(input, j, ref i, ref q, ref watchdog, out result) :
                            FindFinalStatePositionDeltas <BrzozowskiTransition>(input, j, ref i, ref q, ref watchdog, out result);

                if (done)
                {
                    return(result);
                }

                if (_checkTimeout)
                {
                    DoCheckTimeout(timeoutOccursAt);
                }
            }

            //no match was found
            return(NoMatchExists);
        }