/// <summary>Constructs matcher for given symbolic regex.</summary> internal SymbolicRegexMatcher(SymbolicRegexNode <TSetType> sr, RegexCode code, CharSetSolver css, BDD[] minterms, TimeSpan matchTimeout, CultureInfo culture) { Debug.Assert(sr._builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {sr._builder._solver}"); _pattern = sr; _builder = sr._builder; _checkTimeout = Regex.InfiniteMatchTimeout != matchTimeout; _timeout = (int)(matchTimeout.TotalMilliseconds + 0.5); // Round up, so it will be at least 1ms _partitions = _builder._solver switch { BV64Algebra bv64 => bv64._classifier, BVAlgebra bv => bv._classifier, _ => new MintermClassifier((CharSetSolver)(object)_builder._solver, minterms), }; if (code.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch && code.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match. { _findOpts = code.FindOptimizations; } // Determine the number of initial states. If there's no anchor, only the default previous // character kind 0 is ever going to be used for all initial states. int statesCount = _pattern._info.ContainsSomeAnchor ? CharKind.CharKindCount : 1; // Create the initial states for the original pattern. var initialStates = new DfaMatchingState <TSetType> [statesCount]; for (uint i = 0; i < initialStates.Length; i++) { initialStates[i] = _builder.MkState(_pattern, i); } _initialStates = initialStates; // Create the dot-star pattern (a concatenation of any* with the original pattern) // and all of its initial states. _dotStarredPattern = _builder.MkConcat(_builder._anyStar, _pattern); var dotstarredInitialStates = new DfaMatchingState <TSetType> [statesCount]; for (uint i = 0; i < dotstarredInitialStates.Length; i++) { // Used to detect if initial state was reentered, // but observe that the behavior from the state may ultimately depend on the previous // input char e.g. possibly causing nullability of \b or \B or of a start-of-line anchor, // in that sense there can be several "versions" (not more than StateCount) of the initial state. DfaMatchingState <TSetType> state = _builder.MkState(_dotStarredPattern, i); state.IsInitialState = true; dotstarredInitialStates[i] = state; } _dotstarredInitialStates = dotstarredInitialStates; // Create the reverse pattern (the original pattern in reverse order) and all of its // initial states. _reversePattern = _pattern.Reverse(); var reverseInitialStates = new DfaMatchingState <TSetType> [statesCount]; for (uint i = 0; i < reverseInitialStates.Length; i++) { reverseInitialStates[i] = _builder.MkState(_reversePattern, i); } _reverseInitialStates = reverseInitialStates; // Initialize our fast-lookup for determining the character kind of ASCII characters. // This is only required when the pattern contains anchors, as otherwise there's only // ever a single kind used. if (_pattern._info.ContainsSomeAnchor) { var asciiCharKinds = new uint[128]; for (int i = 0; i < asciiCharKinds.Length; i++) { TSetType predicate2; uint charKind; if (i == '\n') { predicate2 = _builder._newLinePredicate; charKind = CharKind.Newline; } else { predicate2 = _builder._wordLetterPredicateForAnchors; charKind = CharKind.WordLetter; } asciiCharKinds[i] = _builder._solver.And(GetMinterm(i), predicate2).Equals(_builder._solver.False) ? 0 : charKind; } _asciiCharKinds = asciiCharKinds; } }