예제 #1
0
        /*
         * This is the only function that should be called from outside.
         * It takes a RegexTree and creates a corresponding RegexCode.
         */
        internal static RegexCode Write(RegexTree t)
        {
            RegexWriter w      = new RegexWriter();
            RegexCode   retval = w.RegexCodeFromRegexTree(t);

#if DBG
            if (t.Debug)
            {
                t.Dump();
                retval.Dump();
            }
#endif
            return(retval);
        }
예제 #2
0
        internal CachedCodeEntry(string key, Hashtable capnames, String[] capslist, RegexCode code, Hashtable caps, int capsize, ExclusiveReference runner, SharedReference repl)
#endif
        {
            _key      = key;
            _capnames = capnames;
            _capslist = capslist;

            _code    = code;
            _caps    = caps;
            _capsize = capsize;

            _runnerref = runner;
            _replref   = repl;
        }
예제 #3
0
 /*
  * Emits a one-argument operation.
  */
 internal void Emit(int op, int opd1)
 {
     if (_counting)
     {
         _count += 2;
         if (RegexCode.OpcodeBacktracks(op))
         {
             _trackcount += 1;
         }
         return;
     }
     _emitted[_curpos++] = op;
     _emitted[_curpos++] = opd1;
 }
예제 #4
0
        /// <summary>Initializes the factory.</summary>
        public SymbolicRegexRunnerFactory(RegexCode code, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture)
        {
            // RightToLeft and ECMAScript are currently not supported in conjunction with NonBacktracking.
            if ((options & (RegexOptions.RightToLeft | RegexOptions.ECMAScript)) != 0)
            {
                throw new NotSupportedException(
                          SR.Format(SR.NotSupported_NonBacktrackingConflictingOption,
                                    (options & RegexOptions.RightToLeft) != 0 ? nameof(RegexOptions.RightToLeft) : nameof(RegexOptions.ECMAScript)));
            }

            var converter = new RegexNodeToSymbolicConverter(s_unicode, culture);
            var solver    = (CharSetSolver)s_unicode._solver;
            SymbolicRegexNode <BDD> root = converter.Convert(code.Tree.Root, topLevel: true);

            _minRequiredLength = code.Tree.MinRequiredLength;

            BDD[] minterms = root.ComputeMinterms();
            if (minterms.Length > 64)
            {
                // Use BV to represent a predicate
                var algBV     = new BVAlgebra(solver, minterms);
                var builderBV = new SymbolicRegexBuilder <BV>(algBV);

                // The default constructor sets the following predicates to False; this update happens after the fact.
                // It depends on whether anchors where used in the regex whether the predicates are actually different from False.
                builderBV._wordLetterPredicateForAnchors = algBV.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors);
                builderBV._newLinePredicate = algBV.ConvertFromCharSet(solver, converter._builder._newLinePredicate);

                //Convert the BDD based AST to BV based AST
                SymbolicRegexNode <BV> rootBV = converter._builder.Transform(root, builderBV, bdd => builderBV._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <BV>(rootBV, solver, minterms, matchTimeout, culture);
            }
            else
            {
                // Use ulong to represent a predicate
                var alg64     = new BV64Algebra(solver, minterms);
                var builder64 = new SymbolicRegexBuilder <ulong>(alg64)
                {
                    // The default constructor sets the following predicates to False, this update happens after the fact
                    // It depends on whether anchors where used in the regex whether the predicates are actually different from False
                    _wordLetterPredicateForAnchors = alg64.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors),
                    _newLinePredicate = alg64.ConvertFromCharSet(solver, converter._builder._newLinePredicate)
                };

                // Convert the BDD-based AST to ulong-based AST
                SymbolicRegexNode <ulong> root64 = converter._builder.Transform(root, builder64, bdd => builder64._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <ulong>(root64, solver, minterms, matchTimeout, culture);
            }
        }
예제 #5
0
        private SymbolicRegexRunner(RegexCode code, TimeSpan matchTimeout, CultureInfo culture)
        {
            var converter = new RegexNodeToSymbolicConverter(s_unicode, culture);
            var solver    = (CharSetSolver)s_unicode._solver;
            SymbolicRegexNode <BDD> root = converter.Convert(code.Tree.Root, topLevel: true);

            _minRequiredLength = code.Tree.MinRequiredLength;

            BDD[] minterms = root.ComputeMinterms();
            if (minterms.Length > 64)
            {
                // Use BV to represent a predicate
                var algBV     = new BVAlgebra(solver, minterms);
                var builderBV = new SymbolicRegexBuilder <BV>(algBV);

                // The default constructor sets the following predicates to False; this update happens after the fact.
                // It depends on whether anchors where used in the regex whether the predicates are actually different from False.
                builderBV._wordLetterPredicateForAnchors = algBV.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors);
                builderBV._newLinePredicate = algBV.ConvertFromCharSet(solver, converter._builder._newLinePredicate);

                //Convert the BDD based AST to BV based AST
                SymbolicRegexNode <BV> rootBV = converter._builder.Transform(root, builderBV, bdd => builderBV._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <BV>(rootBV, solver, minterms, matchTimeout, culture);
            }
            else
            {
                // Use ulong to represent a predicate
                var alg64     = new BV64Algebra(solver, minterms);
                var builder64 = new SymbolicRegexBuilder <ulong>(alg64)
                {
                    // The default constructor sets the following predicates to False, this update happens after the fact
                    // It depends on whether anchors where used in the regex whether the predicates are actually different from False
                    _wordLetterPredicateForAnchors = alg64.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors),
                    _newLinePredicate = alg64.ConvertFromCharSet(solver, converter._builder._newLinePredicate)
                };

                // Convert the BDD-based AST to ulong-based AST
                SymbolicRegexNode <ulong> root64 = converter._builder.Transform(root, builder64, bdd => builder64._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <ulong>(root64, solver, minterms, matchTimeout, culture);
            }
        }
예제 #6
0
        /// <summary>Constructs matcher for given symbolic regex.</summary>
        internal SymbolicRegexMatcher(SymbolicRegexNode <TSetType> sr, RegexCode code, CharSetSolver css, BDD[] minterms, TimeSpan matchTimeout, CultureInfo culture)
        {
            Debug.Assert(sr._builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {sr._builder._solver}");

            _pattern      = sr;
            _builder      = sr._builder;
            _checkTimeout = Regex.InfiniteMatchTimeout != matchTimeout;
            _timeout      = (int)(matchTimeout.TotalMilliseconds + 0.5); // Round up, so it will be at least 1ms
            _partitions   = _builder._solver switch
            {
                BV64Algebra bv64 => bv64._classifier,
                BVAlgebra bv => bv._classifier,
                            _ => new MintermClassifier((CharSetSolver)(object)_builder._solver, minterms),
            };

            if (code.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch &&
                code.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match.
            {
                _findOpts = code.FindOptimizations;
            }

            // Determine the number of initial states. If there's no anchor, only the default previous
            // character kind 0 is ever going to be used for all initial states.
            int statesCount = _pattern._info.ContainsSomeAnchor ? CharKind.CharKindCount : 1;

            // Create the initial states for the original pattern.
            var initialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < initialStates.Length; i++)
            {
                initialStates[i] = _builder.MkState(_pattern, i);
            }
            _initialStates = initialStates;

            // Create the dot-star pattern (a concatenation of any* with the original pattern)
            // and all of its initial states.
            _dotStarredPattern = _builder.MkConcat(_builder._anyStar, _pattern);
            var dotstarredInitialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < dotstarredInitialStates.Length; i++)
            {
                // Used to detect if initial state was reentered,
                // but observe that the behavior from the state may ultimately depend on the previous
                // input char e.g. possibly causing nullability of \b or \B or of a start-of-line anchor,
                // in that sense there can be several "versions" (not more than StateCount) of the initial state.
                DfaMatchingState <TSetType> state = _builder.MkState(_dotStarredPattern, i);
                state.IsInitialState       = true;
                dotstarredInitialStates[i] = state;
            }
            _dotstarredInitialStates = dotstarredInitialStates;

            // Create the reverse pattern (the original pattern in reverse order) and all of its
            // initial states.
            _reversePattern = _pattern.Reverse();
            var reverseInitialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < reverseInitialStates.Length; i++)
            {
                reverseInitialStates[i] = _builder.MkState(_reversePattern, i);
            }
            _reverseInitialStates = reverseInitialStates;

            // Initialize our fast-lookup for determining the character kind of ASCII characters.
            // This is only required when the pattern contains anchors, as otherwise there's only
            // ever a single kind used.
            if (_pattern._info.ContainsSomeAnchor)
            {
                var asciiCharKinds = new uint[128];
                for (int i = 0; i < asciiCharKinds.Length; i++)
                {
                    TSetType predicate2;
                    uint     charKind;

                    if (i == '\n')
                    {
                        predicate2 = _builder._newLinePredicate;
                        charKind   = CharKind.Newline;
                    }
                    else
                    {
                        predicate2 = _builder._wordLetterPredicateForAnchors;
                        charKind   = CharKind.WordLetter;
                    }

                    asciiCharKinds[i] = _builder._solver.And(GetMinterm(i), predicate2).Equals(_builder._solver.False) ? 0 : charKind;
                }
                _asciiCharKinds = asciiCharKinds;
            }
        }
예제 #7
0
 internal RegexRunnerFactory Compile(RegexCode code, RegexOptions roptions)
 {
     return(RegexCompiler.Compile(code, roptions));
 }
예제 #8
0
        private Regex(String pattern, RegexOptions options, TimeSpan matchTimeout, bool useCache)
        {
            RegexTree       tree;
            CachedCodeEntry cached     = null;
            string          cultureKey = null;

            if (pattern == null)
            {
                throw new ArgumentNullException("pattern");
            }
            if (options < RegexOptions.None || (((int)options) >> MaxOptionShift) != 0)
            {
                throw new ArgumentOutOfRangeException("options");
            }
            if ((options & RegexOptions.ECMAScript) != 0 &&
                (options & ~(RegexOptions.ECMAScript |
                             RegexOptions.IgnoreCase |
                             RegexOptions.Multiline |
#if !(SILVERLIGHT) || FEATURE_LEGACYNETCF
                             RegexOptions.Compiled |
#endif
                             RegexOptions.CultureInvariant
#if DBG
                             | RegexOptions.Debug
#endif
                             )) != 0)
            {
                throw new ArgumentOutOfRangeException("options");
            }

            ValidateMatchTimeout(matchTimeout);

            // Try to look up this regex in the cache.  We do this regardless of whether useCache is true since there's
            // really no reason not to.
            if ((options & RegexOptions.CultureInvariant) != 0)
            {
                cultureKey = CultureInfo.InvariantCulture.ToString(); // "English (United States)"
            }
            else
            {
                cultureKey = CultureInfo.CurrentCulture.ToString();
            }

            String key = ((int)options).ToString(NumberFormatInfo.InvariantInfo) + ":" + cultureKey + ":" + pattern;
            cached = LookupCachedAndUpdate(key);

            this.pattern  = pattern;
            this.roptions = options;

            this.internalMatchTimeout = matchTimeout;

            if (cached == null)
            {
                // Parse the input
                tree = RegexParser.Parse(pattern, roptions);

                // Extract the relevant information
                capnames = tree._capnames;
                capslist = tree._capslist;
                code     = RegexWriter.Write(tree);
                caps     = code._caps;
                capsize  = code._capsize;

                InitializeReferences();

                tree = null;
                if (useCache)
                {
                    cached = CacheCode(key);
                }
            }
            else
            {
                caps            = cached._caps;
                capnames        = cached._capnames;
                capslist        = cached._capslist;
                capsize         = cached._capsize;
                code            = cached._code;
                factory         = cached._factory;
                runnerref       = cached._runnerref;
                replref         = cached._replref;
                refsInitialized = true;
            }

#if !(SILVERLIGHT || FULL_AOT_RUNTIME)
            // if the compile option is set, then compile the code if it's not already
            if (UseOptionC() && factory == null)
            {
                factory = Compile(code, roptions);

                if (useCache && cached != null)
                {
                    cached.AddCompiled(factory);
                }
                code = null;
            }
#endif
        }
예제 #9
0
 internal void AddCompiled(RegexRunnerFactory factory)
 {
     _factory = factory;
     _code    = null;
 }
예제 #10
0
 internal CachedCodeEntry(string key, Dictionary <String, Int32> capnames, String[] capslist, RegexCode code, Dictionary <Int32, Int32> caps, int capsize, ExclusiveReference runner, SharedReference repl)
예제 #11
0
 public CachedCodeEntry(CachedCodeEntryKey key, Hashtable capnames, string[] capslist, RegexCode code,
                        Hashtable caps, int capsize, ExclusiveReference runner, WeakReference <RegexReplacement?> replref)
 {
     Key       = key;
     Capnames  = capnames;
     Capslist  = capslist;
     Code      = code;
     Caps      = caps;
     Capsize   = capsize;
     Runnerref = runner;
     ReplRef   = replref;
 }