/// <summary> /// Translates a minterm predicate to a character kind, which is a general categorization of characters used /// for cheaply deciding the nullability of anchors. /// </summary> /// <remarks> /// A False predicate is handled as a special case to indicate the very last \n. /// </remarks> /// <param name="minterm">the minterm to translate</param> /// <returns>the character kind of the minterm</returns> private uint GetNextCharKind(ref T minterm) { ICharAlgebra <T> alg = Node._builder._solver; T wordLetterPredicate = Node._builder._wordLetterPredicateForAnchors; T newLinePredicate = Node._builder._newLinePredicate; // minterm == solver.False is used to represent the very last \n uint nextCharKind = CharKind.General; if (alg.False.Equals(minterm)) { nextCharKind = CharKind.NewLineS; minterm = newLinePredicate; } else if (newLinePredicate.Equals(minterm)) { // If the previous state was the start state, mark this as the very FIRST \n. // Essentially, this looks the same as the very last \n and is used to nullify // rev(\Z) in the conext of a reversed automaton. nextCharKind = PrevCharKind == CharKind.StartStop ? CharKind.NewLineS : CharKind.Newline; } else if (alg.IsSatisfiable(alg.And(wordLetterPredicate, minterm))) { nextCharKind = CharKind.WordLetter; } return(nextCharKind); }
public SymbolicRegexSampler(SymbolicRegexNode <S> root, int randomseed, bool negative) { _root = negative ? root._builder.MkNot(root) : root; // Treat 0 as no seed and instead choose a random seed randomly RandomSeed = randomseed == 0 ? new Random().Next() : randomseed; _random = new Random(RandomSeed); _solver = root._builder._solver; ICharAlgebra <BDD> bddSolver = SymbolicRegexRunnerFactory.s_unicode._solver; _asciiWordCharacters = bddSolver.Or(new BDD[] { bddSolver.RangeConstraint('A', 'Z'), bddSolver.RangeConstraint('a', 'z'), bddSolver.CharConstraint('_'), bddSolver.RangeConstraint('0', '9') }); // Visible ASCII range for input character generation _ascii = bddSolver.RangeConstraint('\x20', '\x7E'); _asciiNonWordCharacters = bddSolver.And(_ascii, bddSolver.Not(_asciiWordCharacters)); }
/// <summary> /// Compute the target state for the given input minterm. /// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input. /// </summary> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> internal DfaMatchingState <T> Next(T minterm) { ICharAlgebra <T> alg = Node._builder._solver; T wordLetterPredicate = Node._builder._wordLetterPredicateForAnchors; T newLinePredicate = Node._builder._newLinePredicate; // minterm == solver.False is used to represent the very last \n uint nextCharKind = 0; if (alg.False.Equals(minterm)) { nextCharKind = CharKind.NewLineS; minterm = newLinePredicate; } else if (newLinePredicate.Equals(minterm)) { // If the previous state was the start state, mark this as the very FIRST \n. // Essentially, this looks the same as the very last \n and is used to nullify // rev(\Z) in the conext of a reversed automaton. nextCharKind = PrevCharKind == CharKind.StartStop ? CharKind.NewLineS : CharKind.Newline; } else if (alg.IsSatisfiable(alg.And(wordLetterPredicate, minterm))) { nextCharKind = CharKind.WordLetter; } // Combined character context uint context = CharKind.Context(PrevCharKind, nextCharKind); // Compute the derivative of the node for the given context SymbolicRegexNode <T> derivative = Node.MkDerivative(minterm, context); // nextCharKind will be the PrevCharKind of the target state // use an existing state instead if one exists already // otherwise create a new new id for it return(Node._builder.MkState(derivative, nextCharKind)); }