Beispiel #1
0
        /// <summary>Generates up to k random strings accepted by the regex</summary>
        public IEnumerable <string> GenerateRandomMembers(int k)
        {
            for (int i = 0; i < k; i++)
            {
                // Holds the generated input so far
                StringBuilder input_so_far = new();

                // Initially there is no previous character
                // Here one could also consider previous characters for example for \b, \B, and ^ anchors
                // and initialize input_so_far accordingly
                uint prevCharKind = CharKind.StartStop;

                // This flag is set to false in the unlikely situation that generation ends up in a dead-end
                bool generationSucceeded = true;

                // Current set of states reached initially contains just the root
                List <SymbolicRegexNode <S> > states = new();
                states.Add(_root);

                // Used for end suffixes
                List <string> possible_endings = new();

                List <SymbolicRegexNode <S> > nextStates = new();

                while (true)
                {
                    Debug.Assert(states.Count > 0);

                    if (CanBeFinal(states))
                    {
                        // Unconditionally final state or end of the input due to \Z anchor for example
                        if (IsFinal(states) || IsFinal(states, CharKind.Context(prevCharKind, CharKind.StartStop)))
                        {
                            possible_endings.Add("");
                        }

                        // End of line due to end-of-line anchor
                        if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.Newline)))
                        {
                            possible_endings.Add("\n");
                        }

                        // Related to wordborder due to \b or \B
                        if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.WordLetter)))
                        {
                            possible_endings.Add(ChooseChar(_asciiWordCharacters).ToString());
                        }

                        // Related to wordborder due to \b or \B
                        if (IsFinal(states, CharKind.Context(prevCharKind, CharKind.General)))
                        {
                            possible_endings.Add(ChooseChar(_asciiNonWordCharacters).ToString());
                        }
                    }

                    // Choose to stop here based on a coin-toss
                    if (possible_endings.Count > 0 && ChooseRandomlyTrueOrFalse())
                    {
                        //Choose some suffix that allows some anchor (if any) to be nullable
                        input_so_far.Append(Choose(possible_endings));
                        break;
                    }

                    SymbolicRegexNode <S> state = Choose(states);
                    char c     = '\0';
                    uint cKind = 0;
                    // Observe that state.MkDerivative() can be a deadend
                    List <(S, SymbolicRegexNode <S>?, SymbolicRegexNode <S>)> paths = new(state.MkDerivative().EnumeratePaths(_solver.True));
                    if (paths.Count > 0)
                    {
                        (S, SymbolicRegexNode <S>?, SymbolicRegexNode <S>)path = Choose(paths);
                        // Consider a random path from some random state in states and
                        // select a random member of the predicate on that path
                        c = ChooseChar(ToBDD(path.Item1));

                        // Map the character back into the corresponding character constraint of the solver
                        S c_pred = _solver.CharConstraint(c);

                        // Determine the character kind of c
                        cKind = IsNewline(c_pred) ? CharKind.Newline : (IsWordchar(c_pred) ? CharKind.WordLetter : CharKind.General);

                        // Construct the combined context of previous and c kind
                        uint context = CharKind.Context(prevCharKind, cKind);

                        // Step into the next set of states
                        nextStates.AddRange(Step(states, c_pred, context));
                    }

                    // In the case that there are no next states: stop here
                    if (nextStates.Count == 0)
                    {
                        if (possible_endings.Count > 0)
                        {
                            input_so_far.Append(Choose(possible_endings));
                        }
                        else
                        {
                            // Ending up here is unlikely but possible for example for infeasible patterns such as @"no\bway"
                            // or due to poor choice of c -- no anchor is enabled -- so this is a deadend
                            generationSucceeded = false;
                        }
                        break;
                    }

                    input_so_far.Append(c);
                    states.Clear();
                    possible_endings.Clear();
                    List <SymbolicRegexNode <S> > tmp = states;
                    states       = nextStates;
                    nextStates   = tmp;
                    prevCharKind = cKind;
                }

                if (generationSucceeded)
                {
                    yield return(input_so_far.ToString());
                }
            }
        }