/// <summary> /// Initializer all fields, used also by deserializer of SymbolicRegexMatcher /// </summary> private void InitilizeFields(ICharAlgebra <S> solver) { this.solver = solver; this.nothing = SymbolicRegexNode <S> .MkFalse(this, solver.False); this.dot = SymbolicRegexNode <S> .MkTrue(this, solver.True); this.dotStar = SymbolicRegexNode <S> .MkDotStar(this, this.dot); this.newLine = SymbolicRegexNode <S> .MkNewline(this, solver.MkCharConstraint('\n')); this.bolRegex = SymbolicRegexNode <S> .MkLoop(this, SymbolicRegexNode <S> .MkConcat(this, this.dotStar, this.newLine), 0, 1); this.eolRegex = SymbolicRegexNode <S> .MkLoop(this, SymbolicRegexNode <S> .MkConcat(this, this.newLine, this.dotStar), 0, 1); // --- initialize caches --- this.singletonCache[this.solver.False] = this.nothing; this.singletonCache[this.newLine.set] = this.newLine; this.singletonCache[this.solver.True] = this.dot; //--- this.nodeCache[this.nothing] = this.nothing; this.nodeCache[this.dot] = this.dot; this.nodeCache[this.dotStar] = this.dotStar; this.nodeCache[this.newLine] = this.newLine; this.nodeCache[this.bolRegex] = this.bolRegex; this.nodeCache[this.eolRegex] = this.eolRegex; }
internal CountingAutomaton(Automaton <Tuple <Maybe <S>, Sequence <CounterOperation> > > aut, Dictionary <int, SymbolicRegexNode <S> > stateMap, Dictionary <int, ICounter> countingStates) : base(aut) { this.countingStates = countingStates; this.stateMap = stateMap; this.solver = ((CABA <S>)Algebra).builder.solver; //countingStates only defined in monadic case if (countingStates != null) { this.counters = new ICounter[countingStates.Count]; foreach (var pair in countingStates) { counters[pair.Value.CounterId] = pair.Value; } } else { var cntrsSet = new HashSet <ICounter>(); foreach (var m in aut.GetMoves()) { foreach (var c in m.Label.Item2) { cntrsSet.Add(c.Counter); } } this.counters = new ICounter[cntrsSet.Count]; foreach (var c in cntrsSet) { this.counters[c.CounterId] = c; } } }
/// <summary> /// Create a new incremental symbolic regex builder. /// </summary> /// <param name="solver">Effective Boolean algebra over S.</param> public SymbolicRegexBuilder(ICharAlgebra <S> solver) { this.solver = solver; this.epsilon = SymbolicRegex <S> .MkEpsilon(this); this.nothing = SymbolicRegex <S> .MkFalse(this); singletonCache[solver.False] = this.nothing; this.dot = SymbolicRegex <S> .MkTrue(this); singletonCache[solver.True] = this.dot; this.dotStar = SymbolicRegex <S> .MkDotStar(this, this.dot); this.startAnchor = SymbolicRegex <S> .MkStartAnchor(this); this.endAnchor = SymbolicRegex <S> .MkEndAnchor(this); this.eolAnchor = SymbolicRegex <S> .MkEolAnchor(this); this.bolAnchor = SymbolicRegex <S> .MkBolAnchor(this); this.newLine = SymbolicRegex <S> .MkNewline(this); singletonCache[this.newLine.set] = this.newLine; this.bolRegex = SymbolicRegex <S> .MkLoop(this, SymbolicRegex <S> .MkConcat(this, this.dotStar, this.newLine), 0, 1); this.eolRegex = SymbolicRegex <S> .MkLoop(this, SymbolicRegex <S> .MkConcat(this, this.newLine, this.dotStar), 0, 1); }
/// <summary> /// Translates a minterm predicate to a character kind, which is a general categorization of characters used /// for cheaply deciding the nullability of anchors. /// </summary> /// <remarks> /// A False predicate is handled as a special case to indicate the very last \n. /// </remarks> /// <param name="minterm">the minterm to translate</param> /// <returns>the character kind of the minterm</returns> private uint GetNextCharKind(ref T minterm) { ICharAlgebra <T> alg = Node._builder._solver; T wordLetterPredicate = Node._builder._wordLetterPredicateForAnchors; T newLinePredicate = Node._builder._newLinePredicate; // minterm == solver.False is used to represent the very last \n uint nextCharKind = CharKind.General; if (alg.False.Equals(minterm)) { nextCharKind = CharKind.NewLineS; minterm = newLinePredicate; } else if (newLinePredicate.Equals(minterm)) { // If the previous state was the start state, mark this as the very FIRST \n. // Essentially, this looks the same as the very last \n and is used to nullify // rev(\Z) in the conext of a reversed automaton. nextCharKind = PrevCharKind == CharKind.StartStop ? CharKind.NewLineS : CharKind.Newline; } else if (alg.IsSatisfiable(alg.And(wordLetterPredicate, minterm))) { nextCharKind = CharKind.WordLetter; } return(nextCharKind); }
/// <summary>Pretty print the bitvector bv as the character set it represents.</summary> public string PrettyPrint(ulong bv) { ICharAlgebra <BDD> bddalgebra = SymbolicRegexRunnerFactory.s_unicode._solver; Debug.Assert(_partition is not null && bddalgebra is not null); return(bddalgebra.PrettyPrint(ConvertToCharSet(bddalgebra, bv))); }
/// <summary> /// Create instance of LikeToAutomatonConverter for a given character solver. /// </summary> public LikePatternToAutomatonConverter(ICharAlgebra <S> solver) { this.solver = solver; this.builder = new RegexToAutomatonBuilder <ILikeNode, S>(solver, this.TokenToAutomaton); //add implicit start and end anchors this.builder.isBeg = false; this.builder.isEnd = false; }
/// <summary> /// Constructs a regex to symbolic finite automata converter /// </summary> /// <param name="solver">solver for character constraints</param> public RegexToAutomatonConverter(ICharAlgebra <S> solver) { this.solver = solver; this.categorizer = new UnicodeCategoryTheory <S>(solver); description.Add(solver.True, ""); description.Add(solver.False, "[]"); this.automBuilder = new RegexToAutomatonBuilder <RegexNode, S>(solver, ConvertNode); //this.converterHelper.Callback = ConvertNode; }
/// <summary> /// Constructs a regex to symbolic finite automata converter /// </summary> /// <param name="solver">solver for character constraints</param> /// <param name="categorizer">maps unicode categories to corresponding character conditions</param> internal RegexToAutomatonConverter(ICharAlgebra <S> solver, IUnicodeCategoryTheory <S> categorizer) { this.solver = solver; this.categorizer = categorizer; description.Add(solver.True, ""); description.Add(solver.False, "[]"); this.automBuilder = new RegexToAutomatonBuilder <RegexNode, S>(solver, ConvertNode); //this.converterHelper.Callback = (node, start, end) => ConvertNode(node, start, end); }
/// <summary>Pretty print the bitvector bv as the character set it represents.</summary> public string PrettyPrint(BV bv) { //accesses the shared BDD solver ICharAlgebra <BDD> bddalgebra = SymbolicRegexRunner.s_unicode._solver; Debug.Assert(_partition is not null && bddalgebra is not null); return(bddalgebra.PrettyPrint(ConvertToCharSet(bddalgebra, bv))); }
//public SymbolicRegexBuilder<S> SRBuilder //{ // get // { // return srBuilder; // } //} /// <summary> /// Constructs a regex to symbolic finite automata converter /// </summary> /// <param name="solver">solver for character constraints</param> /// <param name="categorizer">maps unicode categories to corresponding character conditions</param> internal RegexToAutomatonConverter(ICharAlgebra <S> solver, IUnicodeCategoryTheory <S> categorizer = null) { this.solver = solver; this.categorizer = (categorizer == null ? new UnicodeCategoryTheory <S>(solver) : categorizer); description.Add(solver.True, "."); //"[]" does not unfortunately parse as a valid regex //description.Add(solver.False, "[0-[0]]"); description.Add(solver.False, "[]"); this.automBuilder = new RegexToAutomatonBuilder <RegexNode, S>(solver, ConvertNode); this.srBuilder = new SymbolicRegexBuilder <S>((ICharAlgebra <S>)solver); //this.converterHelper.Callback = (node, start, end) => ConvertNode(node, start, end); }
public SymbolicRegexSampler(SymbolicRegexNode <S> root, int randomseed, bool negative) { _root = negative ? root._builder.Not(root) : root; // Treat 0 as no seed and instead choose a random seed randomly RandomSeed = randomseed == 0 ? new Random().Next() : randomseed; _random = new Random(RandomSeed); _solver = root._builder._solver; CharSetSolver bddSolver = CharSetSolver.Instance; _asciiWordCharacters = bddSolver.Or(new BDD[] { bddSolver.RangeConstraint('A', 'Z'), bddSolver.RangeConstraint('a', 'z'), bddSolver.CharConstraint('_'), bddSolver.RangeConstraint('0', '9') }); // Visible ASCII range for input character generation _ascii = bddSolver.RangeConstraint('\x20', '\x7E'); _asciiNonWordCharacters = bddSolver.And(_ascii, bddSolver.Not(_asciiWordCharacters)); }
public BDD ConvertToCharSet(ICharAlgebra <BDD> solver, ulong pred) { Debug.Assert(_partition is not null); // the result will be the union of all minterms in the set BDD res = BDD.False; if (pred != _false) { for (int i = 0; i < _bits; i++) { // include the i'th minterm in the union if the i'th bit is set if ((pred & ((ulong)1 << i)) != _false) { res = solver.Or(res, _partition[i]); } } } return(res); }
public BDD ConvertToCharSet(ICharAlgebra <BDD> solver, BV pred) { Debug.Assert(_partition is not null); // the result will be the union of all minterms in the set BDD res = solver.False; if (!pred.Equals(False)) { for (int i = 0; i < _bits; i++) { // include the i'th minterm in the union if the i'th bit is set if (pred[i]) { res = solver.Or(res, _partition[i]); } } } return(res); }
/// <summary> /// Compute the target state for the given input minterm. /// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input. /// </summary> /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param> internal DfaMatchingState <T> Next(T minterm) { ICharAlgebra <T> alg = Node._builder._solver; T wordLetterPredicate = Node._builder._wordLetterPredicateForAnchors; T newLinePredicate = Node._builder._newLinePredicate; // minterm == solver.False is used to represent the very last \n uint nextCharKind = 0; if (alg.False.Equals(minterm)) { nextCharKind = CharKind.NewLineS; minterm = newLinePredicate; } else if (newLinePredicate.Equals(minterm)) { // If the previous state was the start state, mark this as the very FIRST \n. // Essentially, this looks the same as the very last \n and is used to nullify // rev(\Z) in the conext of a reversed automaton. nextCharKind = PrevCharKind == CharKind.StartStop ? CharKind.NewLineS : CharKind.Newline; } else if (alg.IsSatisfiable(alg.And(wordLetterPredicate, minterm))) { nextCharKind = CharKind.WordLetter; } // Combined character context uint context = CharKind.Context(PrevCharKind, nextCharKind); // Compute the derivative of the node for the given context SymbolicRegexNode <T> derivative = Node.MkDerivative(minterm, context); // nextCharKind will be the PrevCharKind of the target state // use an existing state instead if one exists already // otherwise create a new new id for it return(Node._builder.MkState(derivative, nextCharKind)); }
/// <summary>Generates up to k random strings accepted by the regex</summary> public IEnumerable <string> GenerateRandomMembers(int k) { ICharAlgebra <BDD> bddSolver = SymbolicRegexRunner.s_unicode._solver; for (int i = 0; i < k; i++) { // Holds the generated input so far StringBuilder input_so_far = new(); // Initially there is no previous character // Here one could also consider previous characters for example for \b, \B, and ^ anchors // and initialize input_so_far accordingly uint prevCharKind = CharKind.StartStop; // This flag is set to false in the unlikely situation that generation ends up in a dead-end bool generationSucceeded = true; // Current set of states reached initially contains just the root List <SymbolicRegexNode <S> > states = new(); states.Add(_root); // Used for end suffixes List <string> possible_endings = new(); List <SymbolicRegexNode <S> > nextStates = new(); while (true) { Debug.Assert(states.Count > 0); if (CanBeFinal(states)) { // Unconditionally final state or end of the input due to \Z anchor for example if (IsFinal(states) || IsFinal(states, CharKind.Context(prevCharKind, CharKind.StartStop))) { possible_endings.Add(""); } // End of line due to end-of-line anchor if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.Newline))) { possible_endings.Add("\n"); } // Related to wordborder due to \b or \B if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.WordLetter))) { possible_endings.Add(ChooseChar(_asciiWordCharacters).ToString()); } // Related to wordborder due to \b or \B if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.General))) { possible_endings.Add(ChooseChar(_asciiNonWordCharacters).ToString()); } } // Choose to stop here based on a coin-toss if (possible_endings.Count > 0 && ChooseRandomlyTrueOrFalse()) { //Choose some suffix that allows some anchor (if any) to be nullable input_so_far.Append(Choose(possible_endings)); break; } SymbolicRegexNode <S> state = Choose(states); char c = '\0'; uint cKind = 0; // Observe that state.MkDerivative() can be a deadend List <(S, SymbolicRegexNode <S>?, SymbolicRegexNode <S>)> paths = new(state.MkDerivative().EnumeratePaths(_solver.True)); if (paths.Count > 0) { (S, SymbolicRegexNode <S>?, SymbolicRegexNode <S>)path = Choose(paths); // Consider a random path from some random state in states and // select a random member of the predicate on that path c = ChooseChar(ToBDD(path.Item1)); // Map the character back into the corresponding character constraint of the solver S c_pred = _solver.CharConstraint(c); // Determine the character kind of c cKind = IsNewline(c_pred) ? CharKind.Newline : (IsWordchar(c_pred) ? CharKind.WordLetter : CharKind.General); // Construct the combined context of previous and c kind uint context = CharKind.Context(prevCharKind, cKind); // Step into the next set of states nextStates.AddRange(Step(states, c_pred, context)); } // In the case that there are no next states: stop here if (nextStates.Count == 0) { if (possible_endings.Count > 0) { input_so_far.Append(Choose(possible_endings)); } else { // Ending up here is unlikely but possible for example for infeasible patterns such as @"no\bway" // or due to poor choice of c -- no anchor is enabled -- so this is a deadend generationSucceeded = false; } break; } input_so_far.Append(c); states.Clear(); possible_endings.Clear(); List <SymbolicRegexNode <S> > tmp = states; states = nextStates; nextStates = tmp; prevCharKind = cKind; } if (generationSucceeded) { yield return(input_so_far.ToString()); } } }
/// <summary> /// Create instance of LikeToAutomatonConverter for a given character solver. /// </summary> public LikePatternToAutomatonConverter(ICharAlgebra <S> solver) { this.solver = solver; this.builder = new RegexToAutomatonBuilder <ILikeNode, S>(solver, this.TokenToAutomaton); }
public UnicodeCategoryTheory(ICharAlgebra <TPredicate> solver) => _solver = solver;
/// <summary> /// Convert to Automaton /// </summary> public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver) { return(builder.MkConcatenate(this.children, implicitAnchors: true)); }
/// <summary> /// Convert to automaton /// </summary> public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver) { return(builder.MkSeq(this.chars.Select(c => solver.MkRangeConstraint(c, c)).ToArray())); }
/// <summary> /// Create a new symbolic regex builder. /// </summary> /// <param name="solver">Effective Boolean algebra over S.</param> internal SymbolicRegexBuilder(ICharAlgebra <S> solver) : this() { InitilizeFields(solver); }
//public SymbolicRegexBuilder<S> SRBuilder //{ // get // { // return srBuilder; // } //} /// <summary> /// Constructs a regex to symbolic finite automata converter /// </summary> /// <param name="solver">solver for character constraints</param> /// <param name="categorizer">maps unicode categories to corresponding character conditions</param> public RegexToAutomatonConverter(ICharAlgebra <S> solver, IUnicodeCategoryTheory <S> categorizer = null) { this.solver = solver; this.categorizer = (categorizer == null ? new UnicodeCategoryTheory <S>(solver) : categorizer); this.srBuilder = new SymbolicRegexBuilder <S>((ICharAlgebra <S>)solver); }
public UnicodeCategoryTheory(ICharAlgebra <PRED> solver) { this.solver = solver; InitializeUnicodeCategoryDefinitions(); }
/// <summary> /// Convert to automaton /// </summary> public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver) { if (this.set.Length == 0) { return(builder.MkSeq(solver.False)); } var moveCond = solver.MkOr(this.set.Select(c => solver.MkCharConstraint(c))); moveCond = this.negate ? solver.MkNot(moveCond) : moveCond; return(builder.MkSeq(moveCond)); }
/// <summary> /// Convert to automaton /// </summary> public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver) { var moveCond = solver.MkRangeConstraint(this.start, this.end); moveCond = this.negate ? solver.MkNot(moveCond) : moveCond; return(builder.MkSeq(moveCond)); }
/// <summary> /// Convert to automaton /// </summary> public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver) { return(builder.MkLoop(new LikeAny(), 0, int.MaxValue)); }
/// <summary> /// Convert to automaton /// </summary> public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver) { return(builder.MkSeq(solver.False)); }