/// <summary> /// Compute a set of transitions for the given minterm. /// </summary> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> /// <returns>an enumeration of the transitions as pairs of the target state and a list of effects to be applied</returns> internal List <(DfaMatchingState <TSet> State, DerivativeEffect[] Effects)> NfaNextWithEffects(TSet minterm) { uint nextCharKind = GetNextCharKind(ref minterm); // Combined character context uint context = CharKind.Context(PrevCharKind, nextCharKind); // Compute the transitions for the given context List <(SymbolicRegexNode <TSet>, DerivativeEffect[])> nodesAndEffects = Node.CreateNfaDerivativeWithEffects(minterm, context); var list = new List <(DfaMatchingState <TSet> State, DerivativeEffect[] Effects)>(); foreach ((SymbolicRegexNode <TSet> node, DerivativeEffect[]? effects) in nodesAndEffects) { // nextCharKind will be the PrevCharKind of the target state // use an existing state instead if one exists already // otherwise create a new new id for it DfaMatchingState <TSet> state = Node._builder.CreateState(node, nextCharKind, capturing: true); if (!state.IsDeadend) { list.Add((state, effects)); } } return(list); }
internal bool IsNullable(uint nextCharKind) { Debug.Assert(nextCharKind is 0 or CharKind.StartStop or CharKind.Newline or CharKind.WordLetter or CharKind.NewLineS); uint context = CharKind.Context(PrevCharKind, nextCharKind); return(Node.IsNullableFor(context)); }
internal bool IsNullableFor(uint nextCharKind) { Debug.Assert(CharKind.IsValidCharKind(nextCharKind)); uint context = CharKind.Context(PrevCharKind, nextCharKind); return(Node.IsNullableFor(context)); }
/// <summary>The node must be nullable here</summary> internal int FixedLength(uint nextCharKind) { Debug.Assert(nextCharKind is 0 or CharKind.BeginningEnd or CharKind.Newline or CharKind.WordLetter or CharKind.NewLineS); uint context = CharKind.Context(PrevCharKind, nextCharKind); return(Node.ResolveFixedLength(context)); }
/// <summary>Find a match.</summary> /// <param name="isMatch">Whether to return once we know there's a match without determining where exactly it matched.</param> /// <param name="input">The input span</param> /// <param name="startat">The position to start search in the input span.</param> /// <param name="end">The non-inclusive position to end the search in the input span.</param> public SymbolicMatch FindMatch(bool isMatch, ReadOnlySpan <char> input, int startat, int end) { int timeoutOccursAt = 0; if (_checkTimeout) { // Using Environment.TickCount for efficiency instead of Stopwatch -- as in the non-DFA case. timeoutOccursAt = Environment.TickCount + (int)(_timeout + 0.5); } if (startat == end) { // Covers the special-case of an empty match at the end of the input. uint prevKind = GetCharKind(input, startat - 1); uint nextKind = GetCharKind(input, startat); bool emptyMatchExists = _pattern.IsNullableFor(CharKind.Context(prevKind, nextKind)); return(emptyMatchExists ? new SymbolicMatch(startat, 0) : SymbolicMatch.NoMatch); } // Find the first accepting state. Initial start position in the input is i == 0. int i = startat; // May return -1 as a legitimate value when the initial state is nullable and startat == 0. // Returns NoMatchExists when there is no match. i = FindFinalStatePosition(input, end, i, timeoutOccursAt, out int i_q0_A1, out int watchdog); if (i == NoMatchExists) { return(SymbolicMatch.NoMatch); } if (isMatch) { // this means success -- the original call was IsMatch return(SymbolicMatch.QuickMatch); } int i_start; int i_end; if (watchdog >= 0) { i_start = i - watchdog + 1; i_end = i; } else { Debug.Assert(i >= startat - 1); i_start = i < startat ? startat : FindStartPosition(input, i, i_q0_A1); // Walk in reverse to locate the start position of the match i_end = FindEndPosition(input, end, i_start); } return(new SymbolicMatch(i_start, i_end + 1 - i_start)); }
/// <summary> /// Compute a set of transitions for the given minterm. /// </summary> /// <param name="builder">the builder that owns <see cref="Node"/></param> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> /// <param name="nextCharKind"></param> /// <returns>an enumeration of the transitions as pairs of the target state and a list of effects to be applied</returns> internal List <(SymbolicRegexNode <TSet> Node, DerivativeEffect[] Effects)> NfaNextWithEffects(SymbolicRegexBuilder <TSet> builder, TSet minterm, uint nextCharKind) { // Combined character context uint context = CharKind.Context(PrevCharKind, nextCharKind); // Compute the transitions for the given context return(Node.CreateNfaDerivativeWithEffects(builder, minterm, context)); }
/// <summary> /// Compute the target state for the given input minterm. /// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input. /// </summary> /// <param name="builder">the builder that owns <see cref="Node"/></param> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> /// <param name="nextCharKind"></param> internal SymbolicRegexNode <TSet> Next(SymbolicRegexBuilder <TSet> builder, TSet minterm, uint nextCharKind) { // Combined character context uint context = CharKind.Context(PrevCharKind, nextCharKind); // Compute the derivative of the node for the given context return(Node.CreateDerivativeWithoutEffects(builder, minterm, context)); }
/// <summary> /// Returns the fixed length that any match ending with this state must have, or -1 if there is no such /// fixed length, <see cref="SymbolicRegexNode{TSet}.ResolveFixedLength(uint)"/>. The context is defined /// by <see cref="PrevCharKind"/> of this state and the given nextCharKind. The node must be nullable here. /// </summary> internal int FixedLength(uint nextCharKind) { Debug.Assert(IsNullableFor(nextCharKind)); Debug.Assert(CharKind.IsValidCharKind(nextCharKind)); uint context = CharKind.Context(PrevCharKind, nextCharKind); return(Node.ResolveFixedLength(context)); }
/// <summary> /// Compute the target state for the given input minterm. /// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input. /// </summary> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> internal DfaMatchingState <T> Next(T minterm) { uint nextCharKind = GetNextCharKind(ref minterm); // Combined character context uint context = CharKind.Context(PrevCharKind, nextCharKind); // Compute the derivative of the node for the given context SymbolicRegexNode <T> derivative = Node.MkDerivativeWithEffects(eager: true).TransitionOrdered(minterm, context); // nextCharKind will be the PrevCharKind of the target state // use an existing state instead if one exists already // otherwise create a new new id for it return(Node._builder.MkState(derivative, nextCharKind, capturing: false)); }
/// <summary> /// Compute the target state for the given input minterm. /// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input. /// </summary> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> internal DfaMatchingState <TSet> Next(TSet minterm) { uint nextCharKind = GetNextCharKind(ref minterm); // Combined character context uint context = CharKind.Context(PrevCharKind, nextCharKind); // Compute the derivative of the node for the given context SymbolicRegexNode <TSet> derivative = Node.CreateDerivative(minterm, context); // nextCharKind will be the PrevCharKind of the target state // use an existing state instead if one exists already // otherwise create a new new id for it return(Node._builder.CreateState(derivative, nextCharKind, capturing: false)); }
/// <summary> /// Compute a set of transitions for the given minterm. /// </summary> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> /// <returns>an enumeration of the transitions as pairs of the target state and a list of effects to be applied</returns> internal IEnumerable <(DfaMatchingState <T>, List <DerivativeEffect>)> AntimirovEagerNextWithEffects(T minterm) { uint nextCharKind = GetNextCharKind(ref minterm); // Combined character context uint context = CharKind.Context(PrevCharKind, nextCharKind); // Compute the transitions for the given context IEnumerable <(SymbolicRegexNode <T>, List <DerivativeEffect>)> derivativesAndEffects = Node.MkDerivativeWithEffects(eager: true).TransitionsWithEffects(minterm, context); foreach (var(derivative, effects) in derivativesAndEffects) { // nextCharKind will be the PrevCharKind of the target state // use an existing state instead if one exists already // otherwise create a new new id for it yield return(Node._builder.MkState(derivative, nextCharKind, capturing: true), effects); } }
/// <summary> /// Compute the target state for the given input minterm. /// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input. /// </summary> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> internal DfaMatchingState <T> Next(T minterm) { ICharAlgebra <T> alg = Node._builder._solver; T wordLetterPredicate = Node._builder._wordLetterPredicateForAnchors; T newLinePredicate = Node._builder._newLinePredicate; // minterm == solver.False is used to represent the very last \n uint nextCharKind = 0; if (alg.False.Equals(minterm)) { nextCharKind = CharKind.NewLineS; minterm = newLinePredicate; } else if (newLinePredicate.Equals(minterm)) { // If the previous state was the start state, mark this as the very FIRST \n. // Essentially, this looks the same as the very last \n and is used to nullify // rev(\Z) in the conext of a reversed automaton. nextCharKind = PrevCharKind == CharKind.StartStop ? CharKind.NewLineS : CharKind.Newline; } else if (alg.IsSatisfiable(alg.And(wordLetterPredicate, minterm))) { nextCharKind = CharKind.WordLetter; } // Combined character context uint context = CharKind.Context(PrevCharKind, nextCharKind); // Compute the derivative of the node for the given context SymbolicRegexNode <T> derivative = Node.MkDerivative(minterm, context); // nextCharKind will be the PrevCharKind of the target state // use an existing state instead if one exists already // otherwise create a new new id for it return(Node._builder.MkState(derivative, nextCharKind)); }
/// <summary>Generates up to k random strings accepted by the regex</summary> public IEnumerable <string> GenerateRandomMembers(int k) { for (int i = 0; i < k; i++) { // Holds the generated input so far StringBuilder input_so_far = new(); // Initially there is no previous character // Here one could also consider previous characters for example for \b, \B, and ^ anchors // and initialize input_so_far accordingly uint prevCharKind = CharKind.BeginningEnd; // This flag is set to false in the unlikely situation that generation ends up in a dead-end bool generationSucceeded = true; // Current set of states reached initially contains just the root List <SymbolicRegexNode <S> > states = new(); states.Add(_root); // Used for end suffixes List <string> possible_endings = new(); List <SymbolicRegexNode <S> > nextStates = new(); while (true) { Debug.Assert(states.Count > 0); if (CanBeFinal(states)) { // Unconditionally final state or end of the input due to \Z anchor for example if (IsFinal(states) || IsFinal(states, CharKind.Context(prevCharKind, CharKind.BeginningEnd))) { possible_endings.Add(""); } // End of line due to end-of-line anchor if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.Newline))) { possible_endings.Add("\n"); } // Related to wordborder due to \b or \B if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.WordLetter))) { possible_endings.Add(ChooseChar(_asciiWordCharacters).ToString()); } // Related to wordborder due to \b or \B if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.General))) { possible_endings.Add(ChooseChar(_asciiNonWordCharacters).ToString()); } } // Choose to stop here based on a coin-toss if (possible_endings.Count > 0 && ChooseRandomlyTrueOrFalse()) { //Choose some suffix that allows some anchor (if any) to be nullable input_so_far.Append(Choose(possible_endings)); break; } SymbolicRegexNode <S> state = Choose(states); char c = '\0'; uint cKind = 0; // Observe that state.CreateDerivative() can be a deadend List <(S, SymbolicRegexNode <S>?, SymbolicRegexNode <S>)> paths = new(state.CreateDerivative().EnumeratePaths(_solver.True)); if (paths.Count > 0) { (S, SymbolicRegexNode <S>?, SymbolicRegexNode <S>)path = Choose(paths); // Consider a random path from some random state in states and // select a random member of the predicate on that path c = ChooseChar(ToBDD(path.Item1)); // Map the character back into the corresponding character constraint of the solver S c_pred = _solver.CharConstraint(c); // Determine the character kind of c cKind = IsNewline(c_pred) ? CharKind.Newline : (IsWordchar(c_pred) ? CharKind.WordLetter : CharKind.General); // Construct the combined context of previous and c kind uint context = CharKind.Context(prevCharKind, cKind); // Step into the next set of states nextStates.AddRange(Step(states, c_pred, context)); } // In the case that there are no next states: stop here if (nextStates.Count == 0) { if (possible_endings.Count > 0) { input_so_far.Append(Choose(possible_endings)); } else { // Ending up here is unlikely but possible for example for infeasible patterns such as @"no\bway" // or due to poor choice of c -- no anchor is enabled -- so this is a deadend generationSucceeded = false; } break; } input_so_far.Append(c); states.Clear(); possible_endings.Clear(); List <SymbolicRegexNode <S> > tmp = states; states = nextStates; nextStates = tmp; prevCharKind = cKind; } if (generationSucceeded) { yield return(input_so_far.ToString()); } } }