Example #1
0
        /// <summary>
        /// Translates a minterm predicate to a character kind, which is a general categorization of characters used
        /// for cheaply deciding the nullability of anchors.
        /// </summary>
        /// <remarks>
        /// A False predicate is handled as a special case to indicate the very last \n.
        /// </remarks>
        /// <param name="minterm">the minterm to translate</param>
        /// <returns>the character kind of the minterm</returns>
        private uint GetNextCharKind(ref T minterm)
        {
            ICharAlgebra <T> alg  = Node._builder._solver;
            T wordLetterPredicate = Node._builder._wordLetterPredicateForAnchors;
            T newLinePredicate    = Node._builder._newLinePredicate;

            // minterm == solver.False is used to represent the very last \n
            uint nextCharKind = CharKind.General;

            if (alg.False.Equals(minterm))
            {
                nextCharKind = CharKind.NewLineS;
                minterm      = newLinePredicate;
            }
            else if (newLinePredicate.Equals(minterm))
            {
                // If the previous state was the start state, mark this as the very FIRST \n.
                // Essentially, this looks the same as the very last \n and is used to nullify
                // rev(\Z) in the conext of a reversed automaton.
                nextCharKind = PrevCharKind == CharKind.StartStop ?
                               CharKind.NewLineS :
                               CharKind.Newline;
            }
            else if (alg.IsSatisfiable(alg.And(wordLetterPredicate, minterm)))
            {
                nextCharKind = CharKind.WordLetter;
            }
            return(nextCharKind);
        }
Example #2
0
        public SymbolicRegexSampler(SymbolicRegexNode <S> root, int randomseed, bool negative)
        {
            _root = negative ? root._builder.MkNot(root) : root;
            // Treat 0 as no seed and instead choose a random seed randomly
            RandomSeed = randomseed == 0 ? new Random().Next() : randomseed;
            _random    = new Random(RandomSeed);
            _solver    = root._builder._solver;
            ICharAlgebra <BDD> bddSolver = SymbolicRegexRunnerFactory.s_unicode._solver;

            _asciiWordCharacters = bddSolver.Or(new BDD[] {
                bddSolver.RangeConstraint('A', 'Z'),
                bddSolver.RangeConstraint('a', 'z'),
                bddSolver.CharConstraint('_'),
                bddSolver.RangeConstraint('0', '9')
            });
            // Visible ASCII range for input character generation
            _ascii = bddSolver.RangeConstraint('\x20', '\x7E');
            _asciiNonWordCharacters = bddSolver.And(_ascii, bddSolver.Not(_asciiWordCharacters));
        }
Example #3
0
        /// <summary>
        /// Compute the target state for the given input minterm.
        /// If <paramref name="minterm"/> is False this means that this is \n and it is the last character of the input.
        /// </summary>
        /// <param name="minterm">minterm corresponding to some input character or False corresponding to last \n</param>
        internal DfaMatchingState <T> Next(T minterm)
        {
            ICharAlgebra <T> alg  = Node._builder._solver;
            T wordLetterPredicate = Node._builder._wordLetterPredicateForAnchors;
            T newLinePredicate    = Node._builder._newLinePredicate;

            // minterm == solver.False is used to represent the very last \n
            uint nextCharKind = 0;

            if (alg.False.Equals(minterm))
            {
                nextCharKind = CharKind.NewLineS;
                minterm      = newLinePredicate;
            }
            else if (newLinePredicate.Equals(minterm))
            {
                // If the previous state was the start state, mark this as the very FIRST \n.
                // Essentially, this looks the same as the very last \n and is used to nullify
                // rev(\Z) in the conext of a reversed automaton.
                nextCharKind = PrevCharKind == CharKind.StartStop ?
                               CharKind.NewLineS :
                               CharKind.Newline;
            }
            else if (alg.IsSatisfiable(alg.And(wordLetterPredicate, minterm)))
            {
                nextCharKind = CharKind.WordLetter;
            }

            // Combined character context
            uint context = CharKind.Context(PrevCharKind, nextCharKind);

            // Compute the derivative of the node for the given context
            SymbolicRegexNode <T> derivative = Node.MkDerivative(minterm, context);

            // nextCharKind will be the PrevCharKind of the target state
            // use an existing state instead if one exists already
            // otherwise create a new new id for it
            return(Node._builder.MkState(derivative, nextCharKind));
        }