Beispiel #1
0
            public DfaMatchingState <TSetType> TakeTransition(
                SymbolicRegexMatcher <TSetType> matcher, DfaMatchingState <TSetType> currentStates, int mintermId, TSetType minterm)
            {
                if (currentStates.Node.Kind != SymbolicRegexKind.Or)
                {
                    // Fall back to Brzozowski when the state is not a disjunction.
                    return(default(BrzozowskiTransition).TakeTransition(matcher, currentStates, mintermId, minterm));
                }

                SymbolicRegexBuilder <TSetType> builder = matcher._builder;

                Debug.Assert(builder._delta is not null);

                SymbolicRegexNode <TSetType> union = builder._nothing;
                uint kind = 0;

                // Produce the new list of states from the current list, considering transitions from members one at a time.
                Debug.Assert(currentStates.Node._alts is not null);
                foreach (SymbolicRegexNode <TSetType> oneState in currentStates.Node._alts)
                {
                    DfaMatchingState <TSetType> nextStates = builder.MkState(oneState, currentStates.PrevCharKind);

                    int offset = (nextStates.Id << builder._mintermsCount) | mintermId;
                    DfaMatchingState <TSetType> p = Volatile.Read(ref builder._delta[offset]) ?? matcher.CreateNewTransition(nextStates, minterm, offset);

                    // Observe that if p.Node is an Or it will be flattened.
                    union = builder.MkOr2(union, p.Node);

                    // kind is just the kind of the partition.
                    kind = p.PrevCharKind;
                }

                return(builder.MkState(union, kind, true));
            }
Beispiel #2
0
        /// <summary>Constructs matcher for given symbolic regex.</summary>
        internal SymbolicRegexMatcher(SymbolicRegexNode <TSetType> sr, RegexCode code, CharSetSolver css, BDD[] minterms, TimeSpan matchTimeout, CultureInfo culture)
        {
            Debug.Assert(sr._builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {sr._builder._solver}");

            _pattern      = sr;
            _builder      = sr._builder;
            _checkTimeout = Regex.InfiniteMatchTimeout != matchTimeout;
            _timeout      = (int)(matchTimeout.TotalMilliseconds + 0.5); // Round up, so it will be at least 1ms
            _partitions   = _builder._solver switch
            {
                BV64Algebra bv64 => bv64._classifier,
                BVAlgebra bv => bv._classifier,
                            _ => new MintermClassifier((CharSetSolver)(object)_builder._solver, minterms),
            };

            if (code.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch &&
                code.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match.
            {
                _findOpts = code.FindOptimizations;
            }

            // Determine the number of initial states. If there's no anchor, only the default previous
            // character kind 0 is ever going to be used for all initial states.
            int statesCount = _pattern._info.ContainsSomeAnchor ? CharKind.CharKindCount : 1;

            // Create the initial states for the original pattern.
            var initialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < initialStates.Length; i++)
            {
                initialStates[i] = _builder.MkState(_pattern, i);
            }
            _initialStates = initialStates;

            // Create the dot-star pattern (a concatenation of any* with the original pattern)
            // and all of its initial states.
            _dotStarredPattern = _builder.MkConcat(_builder._anyStar, _pattern);
            var dotstarredInitialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < dotstarredInitialStates.Length; i++)
            {
                // Used to detect if initial state was reentered,
                // but observe that the behavior from the state may ultimately depend on the previous
                // input char e.g. possibly causing nullability of \b or \B or of a start-of-line anchor,
                // in that sense there can be several "versions" (not more than StateCount) of the initial state.
                DfaMatchingState <TSetType> state = _builder.MkState(_dotStarredPattern, i);
                state.IsInitialState       = true;
                dotstarredInitialStates[i] = state;
            }
            _dotstarredInitialStates = dotstarredInitialStates;

            // Create the reverse pattern (the original pattern in reverse order) and all of its
            // initial states.
            _reversePattern = _pattern.Reverse();
            var reverseInitialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < reverseInitialStates.Length; i++)
            {
                reverseInitialStates[i] = _builder.MkState(_reversePattern, i);
            }
            _reverseInitialStates = reverseInitialStates;

            // Initialize our fast-lookup for determining the character kind of ASCII characters.
            // This is only required when the pattern contains anchors, as otherwise there's only
            // ever a single kind used.
            if (_pattern._info.ContainsSomeAnchor)
            {
                var asciiCharKinds = new uint[128];
                for (int i = 0; i < asciiCharKinds.Length; i++)
                {
                    TSetType predicate2;
                    uint     charKind;

                    if (i == '\n')
                    {
                        predicate2 = _builder._newLinePredicate;
                        charKind   = CharKind.Newline;
                    }
                    else
                    {
                        predicate2 = _builder._wordLetterPredicateForAnchors;
                        charKind   = CharKind.WordLetter;
                    }

                    asciiCharKinds[i] = _builder._solver.And(GetMinterm(i), predicate2).Equals(_builder._solver.False) ? 0 : charKind;
                }
                _asciiCharKinds = asciiCharKinds;
            }
        }