/* * This is the only function that should be called from outside. * It takes a RegexTree and creates a corresponding RegexCode. */ internal static RegexCode Write(RegexTree t) { RegexWriter w = new RegexWriter(); RegexCode retval = w.RegexCodeFromRegexTree(t); #if DBG if (t.Debug) { t.Dump(); retval.Dump(); } #endif return(retval); }
internal CachedCodeEntry(string key, Hashtable capnames, String[] capslist, RegexCode code, Hashtable caps, int capsize, ExclusiveReference runner, SharedReference repl) #endif { _key = key; _capnames = capnames; _capslist = capslist; _code = code; _caps = caps; _capsize = capsize; _runnerref = runner; _replref = repl; }
/* * Emits a one-argument operation. */ internal void Emit(int op, int opd1) { if (_counting) { _count += 2; if (RegexCode.OpcodeBacktracks(op)) { _trackcount += 1; } return; } _emitted[_curpos++] = op; _emitted[_curpos++] = opd1; }
/// <summary>Initializes the factory.</summary> public SymbolicRegexRunnerFactory(RegexCode code, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture) { // RightToLeft and ECMAScript are currently not supported in conjunction with NonBacktracking. if ((options & (RegexOptions.RightToLeft | RegexOptions.ECMAScript)) != 0) { throw new NotSupportedException( SR.Format(SR.NotSupported_NonBacktrackingConflictingOption, (options & RegexOptions.RightToLeft) != 0 ? nameof(RegexOptions.RightToLeft) : nameof(RegexOptions.ECMAScript))); } var converter = new RegexNodeToSymbolicConverter(s_unicode, culture); var solver = (CharSetSolver)s_unicode._solver; SymbolicRegexNode <BDD> root = converter.Convert(code.Tree.Root, topLevel: true); _minRequiredLength = code.Tree.MinRequiredLength; BDD[] minterms = root.ComputeMinterms(); if (minterms.Length > 64) { // Use BV to represent a predicate var algBV = new BVAlgebra(solver, minterms); var builderBV = new SymbolicRegexBuilder <BV>(algBV); // The default constructor sets the following predicates to False; this update happens after the fact. // It depends on whether anchors where used in the regex whether the predicates are actually different from False. builderBV._wordLetterPredicateForAnchors = algBV.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors); builderBV._newLinePredicate = algBV.ConvertFromCharSet(solver, converter._builder._newLinePredicate); //Convert the BDD based AST to BV based AST SymbolicRegexNode <BV> rootBV = converter._builder.Transform(root, builderBV, bdd => builderBV._solver.ConvertFromCharSet(solver, bdd)); _matcher = new SymbolicRegexMatcher <BV>(rootBV, solver, minterms, matchTimeout, culture); } else { // Use ulong to represent a predicate var alg64 = new BV64Algebra(solver, minterms); var builder64 = new SymbolicRegexBuilder <ulong>(alg64) { // The default constructor sets the following predicates to False, this update happens after the fact // It depends on whether anchors where used in the regex whether the predicates are actually different from False _wordLetterPredicateForAnchors = alg64.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors), _newLinePredicate = alg64.ConvertFromCharSet(solver, converter._builder._newLinePredicate) }; // Convert the BDD-based AST to ulong-based AST SymbolicRegexNode <ulong> root64 = converter._builder.Transform(root, builder64, bdd => builder64._solver.ConvertFromCharSet(solver, bdd)); _matcher = new SymbolicRegexMatcher <ulong>(root64, solver, minterms, matchTimeout, culture); } }
private SymbolicRegexRunner(RegexCode code, TimeSpan matchTimeout, CultureInfo culture) { var converter = new RegexNodeToSymbolicConverter(s_unicode, culture); var solver = (CharSetSolver)s_unicode._solver; SymbolicRegexNode <BDD> root = converter.Convert(code.Tree.Root, topLevel: true); _minRequiredLength = code.Tree.MinRequiredLength; BDD[] minterms = root.ComputeMinterms(); if (minterms.Length > 64) { // Use BV to represent a predicate var algBV = new BVAlgebra(solver, minterms); var builderBV = new SymbolicRegexBuilder <BV>(algBV); // The default constructor sets the following predicates to False; this update happens after the fact. // It depends on whether anchors where used in the regex whether the predicates are actually different from False. builderBV._wordLetterPredicateForAnchors = algBV.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors); builderBV._newLinePredicate = algBV.ConvertFromCharSet(solver, converter._builder._newLinePredicate); //Convert the BDD based AST to BV based AST SymbolicRegexNode <BV> rootBV = converter._builder.Transform(root, builderBV, bdd => builderBV._solver.ConvertFromCharSet(solver, bdd)); _matcher = new SymbolicRegexMatcher <BV>(rootBV, solver, minterms, matchTimeout, culture); } else { // Use ulong to represent a predicate var alg64 = new BV64Algebra(solver, minterms); var builder64 = new SymbolicRegexBuilder <ulong>(alg64) { // The default constructor sets the following predicates to False, this update happens after the fact // It depends on whether anchors where used in the regex whether the predicates are actually different from False _wordLetterPredicateForAnchors = alg64.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors), _newLinePredicate = alg64.ConvertFromCharSet(solver, converter._builder._newLinePredicate) }; // Convert the BDD-based AST to ulong-based AST SymbolicRegexNode <ulong> root64 = converter._builder.Transform(root, builder64, bdd => builder64._solver.ConvertFromCharSet(solver, bdd)); _matcher = new SymbolicRegexMatcher <ulong>(root64, solver, minterms, matchTimeout, culture); } }
/// <summary>Constructs matcher for given symbolic regex.</summary> internal SymbolicRegexMatcher(SymbolicRegexNode <TSetType> sr, RegexCode code, CharSetSolver css, BDD[] minterms, TimeSpan matchTimeout, CultureInfo culture) { Debug.Assert(sr._builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {sr._builder._solver}"); _pattern = sr; _builder = sr._builder; _checkTimeout = Regex.InfiniteMatchTimeout != matchTimeout; _timeout = (int)(matchTimeout.TotalMilliseconds + 0.5); // Round up, so it will be at least 1ms _partitions = _builder._solver switch { BV64Algebra bv64 => bv64._classifier, BVAlgebra bv => bv._classifier, _ => new MintermClassifier((CharSetSolver)(object)_builder._solver, minterms), }; if (code.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch && code.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match. { _findOpts = code.FindOptimizations; } // Determine the number of initial states. If there's no anchor, only the default previous // character kind 0 is ever going to be used for all initial states. int statesCount = _pattern._info.ContainsSomeAnchor ? CharKind.CharKindCount : 1; // Create the initial states for the original pattern. var initialStates = new DfaMatchingState <TSetType> [statesCount]; for (uint i = 0; i < initialStates.Length; i++) { initialStates[i] = _builder.MkState(_pattern, i); } _initialStates = initialStates; // Create the dot-star pattern (a concatenation of any* with the original pattern) // and all of its initial states. _dotStarredPattern = _builder.MkConcat(_builder._anyStar, _pattern); var dotstarredInitialStates = new DfaMatchingState <TSetType> [statesCount]; for (uint i = 0; i < dotstarredInitialStates.Length; i++) { // Used to detect if initial state was reentered, // but observe that the behavior from the state may ultimately depend on the previous // input char e.g. possibly causing nullability of \b or \B or of a start-of-line anchor, // in that sense there can be several "versions" (not more than StateCount) of the initial state. DfaMatchingState <TSetType> state = _builder.MkState(_dotStarredPattern, i); state.IsInitialState = true; dotstarredInitialStates[i] = state; } _dotstarredInitialStates = dotstarredInitialStates; // Create the reverse pattern (the original pattern in reverse order) and all of its // initial states. _reversePattern = _pattern.Reverse(); var reverseInitialStates = new DfaMatchingState <TSetType> [statesCount]; for (uint i = 0; i < reverseInitialStates.Length; i++) { reverseInitialStates[i] = _builder.MkState(_reversePattern, i); } _reverseInitialStates = reverseInitialStates; // Initialize our fast-lookup for determining the character kind of ASCII characters. // This is only required when the pattern contains anchors, as otherwise there's only // ever a single kind used. if (_pattern._info.ContainsSomeAnchor) { var asciiCharKinds = new uint[128]; for (int i = 0; i < asciiCharKinds.Length; i++) { TSetType predicate2; uint charKind; if (i == '\n') { predicate2 = _builder._newLinePredicate; charKind = CharKind.Newline; } else { predicate2 = _builder._wordLetterPredicateForAnchors; charKind = CharKind.WordLetter; } asciiCharKinds[i] = _builder._solver.And(GetMinterm(i), predicate2).Equals(_builder._solver.False) ? 0 : charKind; } _asciiCharKinds = asciiCharKinds; } }
internal RegexRunnerFactory Compile(RegexCode code, RegexOptions roptions) { return(RegexCompiler.Compile(code, roptions)); }
private Regex(String pattern, RegexOptions options, TimeSpan matchTimeout, bool useCache) { RegexTree tree; CachedCodeEntry cached = null; string cultureKey = null; if (pattern == null) { throw new ArgumentNullException("pattern"); } if (options < RegexOptions.None || (((int)options) >> MaxOptionShift) != 0) { throw new ArgumentOutOfRangeException("options"); } if ((options & RegexOptions.ECMAScript) != 0 && (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | #if !(SILVERLIGHT) || FEATURE_LEGACYNETCF RegexOptions.Compiled | #endif RegexOptions.CultureInvariant #if DBG | RegexOptions.Debug #endif )) != 0) { throw new ArgumentOutOfRangeException("options"); } ValidateMatchTimeout(matchTimeout); // Try to look up this regex in the cache. We do this regardless of whether useCache is true since there's // really no reason not to. if ((options & RegexOptions.CultureInvariant) != 0) { cultureKey = CultureInfo.InvariantCulture.ToString(); // "English (United States)" } else { cultureKey = CultureInfo.CurrentCulture.ToString(); } String key = ((int)options).ToString(NumberFormatInfo.InvariantInfo) + ":" + cultureKey + ":" + pattern; cached = LookupCachedAndUpdate(key); this.pattern = pattern; this.roptions = options; this.internalMatchTimeout = matchTimeout; if (cached == null) { // Parse the input tree = RegexParser.Parse(pattern, roptions); // Extract the relevant information capnames = tree._capnames; capslist = tree._capslist; code = RegexWriter.Write(tree); caps = code._caps; capsize = code._capsize; InitializeReferences(); tree = null; if (useCache) { cached = CacheCode(key); } } else { caps = cached._caps; capnames = cached._capnames; capslist = cached._capslist; capsize = cached._capsize; code = cached._code; factory = cached._factory; runnerref = cached._runnerref; replref = cached._replref; refsInitialized = true; } #if !(SILVERLIGHT || FULL_AOT_RUNTIME) // if the compile option is set, then compile the code if it's not already if (UseOptionC() && factory == null) { factory = Compile(code, roptions); if (useCache && cached != null) { cached.AddCompiled(factory); } code = null; } #endif }
internal void AddCompiled(RegexRunnerFactory factory) { _factory = factory; _code = null; }
internal CachedCodeEntry(string key, Dictionary <String, Int32> capnames, String[] capslist, RegexCode code, Dictionary <Int32, Int32> caps, int capsize, ExclusiveReference runner, SharedReference repl)
public CachedCodeEntry(CachedCodeEntryKey key, Hashtable capnames, string[] capslist, RegexCode code, Hashtable caps, int capsize, ExclusiveReference runner, WeakReference <RegexReplacement?> replref) { Key = key; Capnames = capnames; Capslist = capslist; Code = code; Caps = caps; Capsize = capsize; Runnerref = runner; ReplRef = replref; }