Example #1
0
        /// <summary>Initializes the factory.</summary>
        public SymbolicRegexRunnerFactory(RegexCode code, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture)
        {
            // RightToLeft and ECMAScript are currently not supported in conjunction with NonBacktracking.
            if ((options & (RegexOptions.RightToLeft | RegexOptions.ECMAScript)) != 0)
            {
                throw new NotSupportedException(
                          SR.Format(SR.NotSupported_NonBacktrackingConflictingOption,
                                    (options & RegexOptions.RightToLeft) != 0 ? nameof(RegexOptions.RightToLeft) : nameof(RegexOptions.ECMAScript)));
            }

            var converter = new RegexNodeToSymbolicConverter(s_unicode, culture);
            var solver    = (CharSetSolver)s_unicode._solver;
            SymbolicRegexNode <BDD> root = converter.Convert(code.Tree.Root, topLevel: true);

            _minRequiredLength = code.Tree.MinRequiredLength;

            BDD[] minterms = root.ComputeMinterms();
            if (minterms.Length > 64)
            {
                // Use BV to represent a predicate
                var algBV     = new BVAlgebra(solver, minterms);
                var builderBV = new SymbolicRegexBuilder <BV>(algBV);

                // The default constructor sets the following predicates to False; this update happens after the fact.
                // It depends on whether anchors where used in the regex whether the predicates are actually different from False.
                builderBV._wordLetterPredicateForAnchors = algBV.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors);
                builderBV._newLinePredicate = algBV.ConvertFromCharSet(solver, converter._builder._newLinePredicate);

                //Convert the BDD based AST to BV based AST
                SymbolicRegexNode <BV> rootBV = converter._builder.Transform(root, builderBV, bdd => builderBV._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <BV>(rootBV, solver, minterms, matchTimeout, culture);
            }
            else
            {
                // Use ulong to represent a predicate
                var alg64     = new BV64Algebra(solver, minterms);
                var builder64 = new SymbolicRegexBuilder <ulong>(alg64)
                {
                    // The default constructor sets the following predicates to False, this update happens after the fact
                    // It depends on whether anchors where used in the regex whether the predicates are actually different from False
                    _wordLetterPredicateForAnchors = alg64.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors),
                    _newLinePredicate = alg64.ConvertFromCharSet(solver, converter._builder._newLinePredicate)
                };

                // Convert the BDD-based AST to ulong-based AST
                SymbolicRegexNode <ulong> root64 = converter._builder.Transform(root, builder64, bdd => builder64._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <ulong>(root64, solver, minterms, matchTimeout, culture);
            }
        }
Example #2
0
        private SymbolicRegexRunner(RegexCode code, TimeSpan matchTimeout, CultureInfo culture)
        {
            var converter = new RegexNodeToSymbolicConverter(s_unicode, culture);
            var solver    = (CharSetSolver)s_unicode._solver;
            SymbolicRegexNode <BDD> root = converter.Convert(code.Tree.Root, topLevel: true);

            _minRequiredLength = code.Tree.MinRequiredLength;

            BDD[] minterms = root.ComputeMinterms();
            if (minterms.Length > 64)
            {
                // Use BV to represent a predicate
                var algBV     = new BVAlgebra(solver, minterms);
                var builderBV = new SymbolicRegexBuilder <BV>(algBV);

                // The default constructor sets the following predicates to False; this update happens after the fact.
                // It depends on whether anchors where used in the regex whether the predicates are actually different from False.
                builderBV._wordLetterPredicateForAnchors = algBV.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors);
                builderBV._newLinePredicate = algBV.ConvertFromCharSet(solver, converter._builder._newLinePredicate);

                //Convert the BDD based AST to BV based AST
                SymbolicRegexNode <BV> rootBV = converter._builder.Transform(root, builderBV, bdd => builderBV._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <BV>(rootBV, solver, minterms, matchTimeout, culture);
            }
            else
            {
                // Use ulong to represent a predicate
                var alg64     = new BV64Algebra(solver, minterms);
                var builder64 = new SymbolicRegexBuilder <ulong>(alg64)
                {
                    // The default constructor sets the following predicates to False, this update happens after the fact
                    // It depends on whether anchors where used in the regex whether the predicates are actually different from False
                    _wordLetterPredicateForAnchors = alg64.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors),
                    _newLinePredicate = alg64.ConvertFromCharSet(solver, converter._builder._newLinePredicate)
                };

                // Convert the BDD-based AST to ulong-based AST
                SymbolicRegexNode <ulong> root64 = converter._builder.Transform(root, builder64, bdd => builder64._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <ulong>(root64, solver, minterms, matchTimeout, culture);
            }
        }
Example #3
0
        /// <summary>Constructs matcher for given symbolic regex.</summary>
        internal SymbolicRegexMatcher(SymbolicRegexNode <TSetType> sr, RegexCode code, CharSetSolver css, BDD[] minterms, TimeSpan matchTimeout, CultureInfo culture)
        {
            Debug.Assert(sr._builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {sr._builder._solver}");

            _pattern      = sr;
            _builder      = sr._builder;
            _checkTimeout = Regex.InfiniteMatchTimeout != matchTimeout;
            _timeout      = (int)(matchTimeout.TotalMilliseconds + 0.5); // Round up, so it will be at least 1ms
            _partitions   = _builder._solver switch
            {
                BV64Algebra bv64 => bv64._classifier,
                BVAlgebra bv => bv._classifier,
                            _ => new MintermClassifier((CharSetSolver)(object)_builder._solver, minterms),
            };

            if (code.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch &&
                code.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match.
            {
                _findOpts = code.FindOptimizations;
            }

            // Determine the number of initial states. If there's no anchor, only the default previous
            // character kind 0 is ever going to be used for all initial states.
            int statesCount = _pattern._info.ContainsSomeAnchor ? CharKind.CharKindCount : 1;

            // Create the initial states for the original pattern.
            var initialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < initialStates.Length; i++)
            {
                initialStates[i] = _builder.MkState(_pattern, i);
            }
            _initialStates = initialStates;

            // Create the dot-star pattern (a concatenation of any* with the original pattern)
            // and all of its initial states.
            _dotStarredPattern = _builder.MkConcat(_builder._anyStar, _pattern);
            var dotstarredInitialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < dotstarredInitialStates.Length; i++)
            {
                // Used to detect if initial state was reentered,
                // but observe that the behavior from the state may ultimately depend on the previous
                // input char e.g. possibly causing nullability of \b or \B or of a start-of-line anchor,
                // in that sense there can be several "versions" (not more than StateCount) of the initial state.
                DfaMatchingState <TSetType> state = _builder.MkState(_dotStarredPattern, i);
                state.IsInitialState       = true;
                dotstarredInitialStates[i] = state;
            }
            _dotstarredInitialStates = dotstarredInitialStates;

            // Create the reverse pattern (the original pattern in reverse order) and all of its
            // initial states.
            _reversePattern = _pattern.Reverse();
            var reverseInitialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < reverseInitialStates.Length; i++)
            {
                reverseInitialStates[i] = _builder.MkState(_reversePattern, i);
            }
            _reverseInitialStates = reverseInitialStates;

            // Initialize our fast-lookup for determining the character kind of ASCII characters.
            // This is only required when the pattern contains anchors, as otherwise there's only
            // ever a single kind used.
            if (_pattern._info.ContainsSomeAnchor)
            {
                var asciiCharKinds = new uint[128];
                for (int i = 0; i < asciiCharKinds.Length; i++)
                {
                    TSetType predicate2;
                    uint     charKind;

                    if (i == '\n')
                    {
                        predicate2 = _builder._newLinePredicate;
                        charKind   = CharKind.Newline;
                    }
                    else
                    {
                        predicate2 = _builder._wordLetterPredicateForAnchors;
                        charKind   = CharKind.WordLetter;
                    }

                    asciiCharKinds[i] = _builder._solver.And(GetMinterm(i), predicate2).Equals(_builder._solver.False) ? 0 : charKind;
                }
                _asciiCharKinds = asciiCharKinds;
            }
        }