Пример #1
0
        /// <summary>
        /// Initializer all fields, used also by deserializer of SymbolicRegexMatcher
        /// </summary>
        private void InitilizeFields(ICharAlgebra <S> solver)
        {
            this.solver  = solver;
            this.nothing = SymbolicRegexNode <S> .MkFalse(this, solver.False);

            this.dot = SymbolicRegexNode <S> .MkTrue(this, solver.True);

            this.dotStar = SymbolicRegexNode <S> .MkDotStar(this, this.dot);

            this.newLine = SymbolicRegexNode <S> .MkNewline(this, solver.MkCharConstraint('\n'));

            this.bolRegex = SymbolicRegexNode <S> .MkLoop(this, SymbolicRegexNode <S> .MkConcat(this, this.dotStar, this.newLine), 0, 1);

            this.eolRegex = SymbolicRegexNode <S> .MkLoop(this, SymbolicRegexNode <S> .MkConcat(this, this.newLine, this.dotStar), 0, 1);

            // --- initialize caches ---
            this.singletonCache[this.solver.False] = this.nothing;
            this.singletonCache[this.newLine.set]  = this.newLine;
            this.singletonCache[this.solver.True]  = this.dot;
            //---
            this.nodeCache[this.nothing]  = this.nothing;
            this.nodeCache[this.dot]      = this.dot;
            this.nodeCache[this.dotStar]  = this.dotStar;
            this.nodeCache[this.newLine]  = this.newLine;
            this.nodeCache[this.bolRegex] = this.bolRegex;
            this.nodeCache[this.eolRegex] = this.eolRegex;
        }
Пример #2
0
 internal CountingAutomaton(Automaton <Tuple <Maybe <S>, Sequence <CounterOperation> > > aut,
                            Dictionary <int, SymbolicRegexNode <S> > stateMap, Dictionary <int, ICounter> countingStates) : base(aut)
 {
     this.countingStates = countingStates;
     this.stateMap       = stateMap;
     this.solver         = ((CABA <S>)Algebra).builder.solver;
     //countingStates only defined in monadic case
     if (countingStates != null)
     {
         this.counters = new ICounter[countingStates.Count];
         foreach (var pair in countingStates)
         {
             counters[pair.Value.CounterId] = pair.Value;
         }
     }
     else
     {
         var cntrsSet = new HashSet <ICounter>();
         foreach (var m in aut.GetMoves())
         {
             foreach (var c in m.Label.Item2)
             {
                 cntrsSet.Add(c.Counter);
             }
         }
         this.counters = new ICounter[cntrsSet.Count];
         foreach (var c in cntrsSet)
         {
             this.counters[c.CounterId] = c;
         }
     }
 }
Пример #3
0
        /// <summary>
        /// Create a new incremental symbolic regex builder.
        /// </summary>
        /// <param name="solver">Effective Boolean algebra over S.</param>
        public SymbolicRegexBuilder(ICharAlgebra <S> solver)
        {
            this.solver  = solver;
            this.epsilon = SymbolicRegex <S> .MkEpsilon(this);

            this.nothing = SymbolicRegex <S> .MkFalse(this);

            singletonCache[solver.False] = this.nothing;
            this.dot = SymbolicRegex <S> .MkTrue(this);

            singletonCache[solver.True] = this.dot;
            this.dotStar = SymbolicRegex <S> .MkDotStar(this, this.dot);

            this.startAnchor = SymbolicRegex <S> .MkStartAnchor(this);

            this.endAnchor = SymbolicRegex <S> .MkEndAnchor(this);

            this.eolAnchor = SymbolicRegex <S> .MkEolAnchor(this);

            this.bolAnchor = SymbolicRegex <S> .MkBolAnchor(this);

            this.newLine = SymbolicRegex <S> .MkNewline(this);

            singletonCache[this.newLine.set] = this.newLine;
            this.bolRegex = SymbolicRegex <S> .MkLoop(this, SymbolicRegex <S> .MkConcat(this, this.dotStar, this.newLine), 0, 1);

            this.eolRegex = SymbolicRegex <S> .MkLoop(this, SymbolicRegex <S> .MkConcat(this, this.newLine, this.dotStar), 0, 1);
        }
Пример #4
0
        /// <summary>
        /// Translates a minterm predicate to a character kind, which is a general categorization of characters used
        /// for cheaply deciding the nullability of anchors.
        /// </summary>
        /// <remarks>
        /// A False predicate is handled as a special case to indicate the very last \n.
        /// </remarks>
        /// <param name="minterm">the minterm to translate</param>
        /// <returns>the character kind of the minterm</returns>
        private uint GetNextCharKind(ref T minterm)
        {
            ICharAlgebra <T> alg  = Node._builder._solver;
            T wordLetterPredicate = Node._builder._wordLetterPredicateForAnchors;
            T newLinePredicate    = Node._builder._newLinePredicate;

            // minterm == solver.False is used to represent the very last \n
            uint nextCharKind = CharKind.General;

            if (alg.False.Equals(minterm))
            {
                nextCharKind = CharKind.NewLineS;
                minterm      = newLinePredicate;
            }
            else if (newLinePredicate.Equals(minterm))
            {
                // If the previous state was the start state, mark this as the very FIRST \n.
                // Essentially, this looks the same as the very last \n and is used to nullify
                // rev(\Z) in the conext of a reversed automaton.
                nextCharKind = PrevCharKind == CharKind.StartStop ?
                               CharKind.NewLineS :
                               CharKind.Newline;
            }
            else if (alg.IsSatisfiable(alg.And(wordLetterPredicate, minterm)))
            {
                nextCharKind = CharKind.WordLetter;
            }
            return(nextCharKind);
        }
Пример #5
0
        /// <summary>Pretty print the bitvector bv as the character set it represents.</summary>
        public string PrettyPrint(ulong bv)
        {
            ICharAlgebra <BDD> bddalgebra = SymbolicRegexRunnerFactory.s_unicode._solver;

            Debug.Assert(_partition is not null && bddalgebra is not null);

            return(bddalgebra.PrettyPrint(ConvertToCharSet(bddalgebra, bv)));
        }
Пример #6
0
 /// <summary>
 /// Create instance of LikeToAutomatonConverter for a given character solver.
 /// </summary>
 public LikePatternToAutomatonConverter(ICharAlgebra <S> solver)
 {
     this.solver  = solver;
     this.builder = new RegexToAutomatonBuilder <ILikeNode, S>(solver, this.TokenToAutomaton);
     //add implicit start and end anchors
     this.builder.isBeg = false;
     this.builder.isEnd = false;
 }
Пример #7
0
 /// <summary>
 /// Constructs a regex to symbolic finite automata converter
 /// </summary>
 /// <param name="solver">solver for character constraints</param>
 public RegexToAutomatonConverter(ICharAlgebra <S> solver)
 {
     this.solver      = solver;
     this.categorizer = new UnicodeCategoryTheory <S>(solver);
     description.Add(solver.True, "");
     description.Add(solver.False, "[]");
     this.automBuilder = new RegexToAutomatonBuilder <RegexNode, S>(solver, ConvertNode);
     //this.converterHelper.Callback = ConvertNode;
 }
Пример #8
0
 /// <summary>
 /// Constructs a regex to symbolic finite automata converter
 /// </summary>
 /// <param name="solver">solver for character constraints</param>
 /// <param name="categorizer">maps unicode categories to corresponding character conditions</param>
 internal RegexToAutomatonConverter(ICharAlgebra <S> solver, IUnicodeCategoryTheory <S> categorizer)
 {
     this.solver      = solver;
     this.categorizer = categorizer;
     description.Add(solver.True, "");
     description.Add(solver.False, "[]");
     this.automBuilder = new RegexToAutomatonBuilder <RegexNode, S>(solver, ConvertNode);
     //this.converterHelper.Callback = (node, start, end) => ConvertNode(node, start, end);
 }
Пример #9
0
        /// <summary>Pretty print the bitvector bv as the character set it represents.</summary>
        public string PrettyPrint(BV bv)
        {
            //accesses the shared BDD solver
            ICharAlgebra <BDD> bddalgebra = SymbolicRegexRunner.s_unicode._solver;

            Debug.Assert(_partition is not null && bddalgebra is not null);

            return(bddalgebra.PrettyPrint(ConvertToCharSet(bddalgebra, bv)));
        }
Пример #10
0
        //public SymbolicRegexBuilder<S> SRBuilder
        //{
        //    get
        //    {
        //        return srBuilder;
        //    }
        //}

        /// <summary>
        /// Constructs a regex to symbolic finite automata converter
        /// </summary>
        /// <param name="solver">solver for character constraints</param>
        /// <param name="categorizer">maps unicode categories to corresponding character conditions</param>
        internal RegexToAutomatonConverter(ICharAlgebra <S> solver, IUnicodeCategoryTheory <S> categorizer = null)
        {
            this.solver      = solver;
            this.categorizer = (categorizer == null ? new UnicodeCategoryTheory <S>(solver) : categorizer);
            description.Add(solver.True, ".");
            //"[]" does not unfortunately parse as a valid regex
            //description.Add(solver.False, "[0-[0]]");
            description.Add(solver.False, "[]");
            this.automBuilder = new RegexToAutomatonBuilder <RegexNode, S>(solver, ConvertNode);
            this.srBuilder    = new SymbolicRegexBuilder <S>((ICharAlgebra <S>)solver);
            //this.converterHelper.Callback = (node, start, end) => ConvertNode(node, start, end);
        }
Пример #11
0
        public SymbolicRegexSampler(SymbolicRegexNode <S> root, int randomseed, bool negative)
        {
            _root = negative ? root._builder.Not(root) : root;
            // Treat 0 as no seed and instead choose a random seed randomly
            RandomSeed = randomseed == 0 ? new Random().Next() : randomseed;
            _random    = new Random(RandomSeed);
            _solver    = root._builder._solver;
            CharSetSolver bddSolver = CharSetSolver.Instance;

            _asciiWordCharacters = bddSolver.Or(new BDD[] {
                bddSolver.RangeConstraint('A', 'Z'),
                bddSolver.RangeConstraint('a', 'z'),
                bddSolver.CharConstraint('_'),
                bddSolver.RangeConstraint('0', '9')
            });
            // Visible ASCII range for input character generation
            _ascii = bddSolver.RangeConstraint('\x20', '\x7E');
            _asciiNonWordCharacters = bddSolver.And(_ascii, bddSolver.Not(_asciiWordCharacters));
        }
Пример #12
0
        public BDD ConvertToCharSet(ICharAlgebra <BDD> solver, ulong pred)
        {
            Debug.Assert(_partition is not null);

            // the result will be the union of all minterms in the set
            BDD res = BDD.False;

            if (pred != _false)
            {
                for (int i = 0; i < _bits; i++)
                {
                    // include the i'th minterm in the union if the i'th bit is set
                    if ((pred & ((ulong)1 << i)) != _false)
                    {
                        res = solver.Or(res, _partition[i]);
                    }
                }
            }

            return(res);
        }
Пример #13
0
        public BDD ConvertToCharSet(ICharAlgebra <BDD> solver, BV pred)
        {
            Debug.Assert(_partition is not null);

            // the result will be the union of all minterms in the set
            BDD res = solver.False;

            if (!pred.Equals(False))
            {
                for (int i = 0; i < _bits; i++)
                {
                    // include the i'th minterm in the union if the i'th bit is set
                    if (pred[i])
                    {
                        res = solver.Or(res, _partition[i]);
                    }
                }
            }

            return(res);
        }
Пример #14
0
        /// <summary>
        /// Compute the target state for the given input minterm.
        /// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input.
        /// </summary>
        /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param>
        internal DfaMatchingState <T> Next(T minterm)
        {
            ICharAlgebra <T> alg  = Node._builder._solver;
            T wordLetterPredicate = Node._builder._wordLetterPredicateForAnchors;
            T newLinePredicate    = Node._builder._newLinePredicate;

            // minterm == solver.False is used to represent the very last \n
            uint nextCharKind = 0;

            if (alg.False.Equals(minterm))
            {
                nextCharKind = CharKind.NewLineS;
                minterm      = newLinePredicate;
            }
            else if (newLinePredicate.Equals(minterm))
            {
                // If the previous state was the start state, mark this as the very FIRST \n.
                // Essentially, this looks the same as the very last \n and is used to nullify
                // rev(\Z) in the conext of a reversed automaton.
                nextCharKind = PrevCharKind == CharKind.StartStop ?
                               CharKind.NewLineS :
                               CharKind.Newline;
            }
            else if (alg.IsSatisfiable(alg.And(wordLetterPredicate, minterm)))
            {
                nextCharKind = CharKind.WordLetter;
            }

            // Combined character context
            uint context = CharKind.Context(PrevCharKind, nextCharKind);

            // Compute the derivative of the node for the given context
            SymbolicRegexNode <T> derivative = Node.MkDerivative(minterm, context);

            // nextCharKind will be the PrevCharKind of the target state
            // use an existing state instead if one exists already
            // otherwise create a new new id for it
            return(Node._builder.MkState(derivative, nextCharKind));
        }
Пример #15
0
        /// <summary>Generates up to k random strings accepted by the regex</summary>
        public IEnumerable <string> GenerateRandomMembers(int k)
        {
            ICharAlgebra <BDD> bddSolver = SymbolicRegexRunner.s_unicode._solver;

            for (int i = 0; i < k; i++)
            {
                // Holds the generated input so far
                StringBuilder input_so_far = new();

                // Initially there is no previous character
                // Here one could also consider previous characters for example for \b, \B, and ^ anchors
                // and initialize input_so_far accordingly
                uint prevCharKind = CharKind.StartStop;

                // This flag is set to false in the unlikely situation that generation ends up in a dead-end
                bool generationSucceeded = true;

                // Current set of states reached initially contains just the root
                List <SymbolicRegexNode <S> > states = new();
                states.Add(_root);

                // Used for end suffixes
                List <string> possible_endings = new();

                List <SymbolicRegexNode <S> > nextStates = new();

                while (true)
                {
                    Debug.Assert(states.Count > 0);

                    if (CanBeFinal(states))
                    {
                        // Unconditionally final state or end of the input due to \Z anchor for example
                        if (IsFinal(states) || IsFinal(states, CharKind.Context(prevCharKind, CharKind.StartStop)))
                        {
                            possible_endings.Add("");
                        }

                        // End of line due to end-of-line anchor
                        if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.Newline)))
                        {
                            possible_endings.Add("\n");
                        }

                        // Related to wordborder due to \b or \B
                        if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.WordLetter)))
                        {
                            possible_endings.Add(ChooseChar(_asciiWordCharacters).ToString());
                        }

                        // Related to wordborder due to \b or \B
                        if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.General)))
                        {
                            possible_endings.Add(ChooseChar(_asciiNonWordCharacters).ToString());
                        }
                    }

                    // Choose to stop here based on a coin-toss
                    if (possible_endings.Count > 0 && ChooseRandomlyTrueOrFalse())
                    {
                        //Choose some suffix that allows some anchor (if any) to be nullable
                        input_so_far.Append(Choose(possible_endings));
                        break;
                    }

                    SymbolicRegexNode <S> state = Choose(states);
                    char c     = '\0';
                    uint cKind = 0;
                    // Observe that state.MkDerivative() can be a deadend
                    List <(S, SymbolicRegexNode <S>?, SymbolicRegexNode <S>)> paths = new(state.MkDerivative().EnumeratePaths(_solver.True));
                    if (paths.Count > 0)
                    {
                        (S, SymbolicRegexNode <S>?, SymbolicRegexNode <S>)path = Choose(paths);
                        // Consider a random path from some random state in states and
                        // select a random member of the predicate on that path
                        c = ChooseChar(ToBDD(path.Item1));

                        // Map the character back into the corresponding character constraint of the solver
                        S c_pred = _solver.CharConstraint(c);

                        // Determine the character kind of c
                        cKind = IsNewline(c_pred) ? CharKind.Newline : (IsWordchar(c_pred) ? CharKind.WordLetter : CharKind.General);

                        // Construct the combined context of previous and c kind
                        uint context = CharKind.Context(prevCharKind, cKind);

                        // Step into the next set of states
                        nextStates.AddRange(Step(states, c_pred, context));
                    }

                    // In the case that there are no next states: stop here
                    if (nextStates.Count == 0)
                    {
                        if (possible_endings.Count > 0)
                        {
                            input_so_far.Append(Choose(possible_endings));
                        }
                        else
                        {
                            // Ending up here is unlikely but possible for example for infeasible patterns such as @"no\bway"
                            // or due to poor choice of c -- no anchor is enabled -- so this is a deadend
                            generationSucceeded = false;
                        }
                        break;
                    }

                    input_so_far.Append(c);
                    states.Clear();
                    possible_endings.Clear();
                    List <SymbolicRegexNode <S> > tmp = states;
                    states       = nextStates;
                    nextStates   = tmp;
                    prevCharKind = cKind;
                }

                if (generationSucceeded)
                {
                    yield return(input_so_far.ToString());
                }
            }
        }
Пример #16
0
 /// <summary>
 /// Create instance of LikeToAutomatonConverter for a given character solver.
 /// </summary>
 public LikePatternToAutomatonConverter(ICharAlgebra <S> solver)
 {
     this.solver  = solver;
     this.builder = new RegexToAutomatonBuilder <ILikeNode, S>(solver, this.TokenToAutomaton);
 }
Пример #17
0
 public UnicodeCategoryTheory(ICharAlgebra <TPredicate> solver) => _solver = solver;
Пример #18
0
 /// <summary>
 /// Convert to Automaton
 /// </summary>
 public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver)
 {
     return(builder.MkConcatenate(this.children, implicitAnchors: true));
 }
Пример #19
0
 /// <summary>
 /// Convert to automaton
 /// </summary>
 public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver)
 {
     return(builder.MkSeq(this.chars.Select(c => solver.MkRangeConstraint(c, c)).ToArray()));
 }
Пример #20
0
 /// <summary>
 /// Create a new symbolic regex builder.
 /// </summary>
 /// <param name="solver">Effective Boolean algebra over S.</param>
 internal SymbolicRegexBuilder(ICharAlgebra <S> solver) : this()
 {
     InitilizeFields(solver);
 }
Пример #21
0
        //public SymbolicRegexBuilder<S> SRBuilder
        //{
        //    get
        //    {
        //        return srBuilder;
        //    }
        //}

        /// <summary>
        /// Constructs a regex to symbolic finite automata converter
        /// </summary>
        /// <param name="solver">solver for character constraints</param>
        /// <param name="categorizer">maps unicode categories to corresponding character conditions</param>
        public RegexToAutomatonConverter(ICharAlgebra <S> solver, IUnicodeCategoryTheory <S> categorizer = null)
        {
            this.solver      = solver;
            this.categorizer = (categorizer == null ? new UnicodeCategoryTheory <S>(solver) : categorizer);
            this.srBuilder   = new SymbolicRegexBuilder <S>((ICharAlgebra <S>)solver);
        }
Пример #22
0
 public UnicodeCategoryTheory(ICharAlgebra <PRED> solver)
 {
     this.solver = solver;
     InitializeUnicodeCategoryDefinitions();
 }
Пример #23
0
            /// <summary>
            /// Convert to automaton
            /// </summary>
            public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver)
            {
                if (this.set.Length == 0)
                {
                    return(builder.MkSeq(solver.False));
                }

                var moveCond = solver.MkOr(this.set.Select(c => solver.MkCharConstraint(c)));

                moveCond = this.negate ? solver.MkNot(moveCond) : moveCond;
                return(builder.MkSeq(moveCond));
            }
Пример #24
0
            /// <summary>
            /// Convert to automaton
            /// </summary>
            public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver)
            {
                var moveCond = solver.MkRangeConstraint(this.start, this.end);

                moveCond = this.negate ? solver.MkNot(moveCond) : moveCond;
                return(builder.MkSeq(moveCond));
            }
Пример #25
0
 /// <summary>
 /// Convert to automaton
 /// </summary>
 public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver)
 {
     return(builder.MkLoop(new LikeAny(), 0, int.MaxValue));
 }
Пример #26
0
 /// <summary>
 /// Convert to automaton
 /// </summary>
 public Automaton <S> ToAutomaton(RegexToAutomatonBuilder <ILikeNode, S> builder, ICharAlgebra <S> solver)
 {
     return(builder.MkSeq(solver.False));
 }