Exemple #1
0
        private RegexNode mReservedPath;         //The child node that must be chosen.
        //If this is not null then the node must repeat at least once

        public RegexRepeatNode(RegexNode refNode, int minRepeat, int maxRepeat, bool sameValue)
        {
            //if this does not cover zero to infinity, then this node can be invalidated
            if (RegexCompiler.IsInvalidSection && (minRepeat > 0 || maxRepeat != -1))
            {
                RegexCompiler.InvalidableNodes.Add(this);
            }
            mMinRepeat      = minRepeat;
            mMaxRepeat      = maxRepeat;
            mSameValue      = sameValue;
            mReservedPath   = null;
            mRefNode        = refNode;
            mRefNode.Parent = this;
        }
Exemple #2
0
        /// <summary>
        /// Returns a random string, conforming to the provided regular expression pattern.
        /// </summary>
        /// <param name="regex">The regular expression, which the generated string should conform to.</param>
        /// <param name="seed">The random number generator seed.</param>
        /// <returns>A string, conforming to the provided regular expression pattern.</returns>
        ///
        /// <example>
        /// The following example demonstrates how to generate a random string from a Regex pattern:
        /// <code lang="C#" >
        /// // Using a custom regex pattern...
        /// Regex emailAddress1 = new Regex(@"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@([0-9a-zA-Z][-\w]*[0-9a-zA-Z]\.)+[a-zA-Z]{2,9})$");
        /// string s1 = StringFactory.GenerateRandomString(emailAddress1, 1234);
        ///
        /// // Using a standard regex pattern...
        /// Regex emailAddress2 = CommonRegexPatterns.EmailAddress;
        /// string s2 = StringFactory.GenerateRandomString(emailAddress2, 1234);
        /// </code>
        /// </example>
        public static string GenerateRandomString(Regex regex, int seed)
        {
            Random random = new Random(seed);

            //reset the static variables
            RegexCompiler.IsInvalidSection = false;
            RegexCompiler.InvalidNode      = null;
            RegexCompiler.InvalidableNodes.Clear();

            //construct the RegEx tree
            RegexCompiler compiler = new RegexCompiler();
            RegexNode     node     = compiler.Compile(regex.ToString());

            //search for a signal to invalidate a node
            if (regex.ToString().IndexOf("\\i") != -1)
            {
                //something should have been invalidated
                //select a node to invalidate
                if (RegexCompiler.InvalidableNodes.Count == 0)
                {
                    throw new ArgumentException("Asked to generate invalid: Impossible to invalidate");
                }
                RegexCompiler.InvalidNode = RegexCompiler.InvalidableNodes[random.Next(RegexCompiler.InvalidableNodes.Count)];

                //Mark REOrNodes and RERepeatNodes to ensure that the invalid node will be part of the string
                RegexCompiler.InvalidNode.ReservePath(null);
            }

            //generate and return the string
            string result = node.Generate(random);

            if (RegexCompiler.InvalidNode != null)
            {
                //confirm that the generated string is invalid (e.g. [a-z]|[^a-z] will always fail)
                Regex compare = new Regex("^" + regex.Replace("\\i", "") + "$");
                if (compare.IsMatch(result))
                {
                    throw new ArgumentException(regex + ": Did not generate invalid string: " + result);
                }
            }

            return(result);
        }
        public override string Generate(Random random)
        {
            if (this == RegexCompiler.InvalidNode)
            {
                RegexNode.AssertParse(mNumChoices > 0, "No valid range specified in char set");

                //select from the elements that are not available (elements that are invalid)
                int randIndex = random.Next(mMapSize - mNumChoices);

                int i = -1;
                while (randIndex >= 0)  //seek to the available element
                {
                    i++;
                    //invert positive and negative sets
                    if ((mPositiveSet && mMap[i] == 0) || (!mPositiveSet && mMap[i] == 1))
                    {
                        randIndex--;
                    }
                }

                return(Convert.ToChar(i).ToString());
            }
            else
            {
                RegexNode.AssertParse(mNumChoices > 0, "No valid range specified in char set");
                //select from the elements that are available
                int randIndex = random.Next(mNumChoices);

                int i = -1;
                while (randIndex >= 0)  //seek to the available element
                {
                    i++;
                    if ((mPositiveSet && mMap[i] == 1) || (!mPositiveSet && mMap[i] == 0))
                    {
                        randIndex--;
                    }
                }

                return(Convert.ToChar(i).ToString());
            }
        }
Exemple #4
0
        //Compile node starting with |
        public RegexNode CompileBranch()
        {
            RegexNode piece = CompilePiece();

            if (mParseDone || mCurrent == '|' || mCurrent == ')')
            {
                return(piece);
            }

            RegexAndNode andNode = new RegexAndNode();

            andNode.Children.Add(piece);
            piece.Parent = andNode;

            while (!(mParseDone || mCurrent == '|' || mCurrent == ')'))
            {
                RegexNode nextPiece = CompilePiece();
                andNode.Children.Add(nextPiece);
                nextPiece.Parent = andNode;
            }

            return(andNode);
        }
Exemple #5
0
        //Compile the expression i.e. main body or expr in paranthesis
        public RegexNode CompileExpr()
        {
            RegexNode branch = CompileBranch();

            if (mCurrent != '|')
            {
                return(branch);
            }

            RegexOrNode expr = new RegexOrNode();

            expr.Children.Add(branch);
            branch.Parent = expr;

            while (mCurrent == '|')
            {
                NextChar();
                RegexNode nextBranch = CompileBranch();
                expr.Children.Add(nextBranch);
                nextBranch.Parent = expr;
            }

            return(expr);
        }
Exemple #6
0
        //Compile \d \D \s \S etc.
        public RegexNode CompileSimpleMacro(char c)
        {
            RegexNode    node = null;
            RegexSetNode set  = null;

            if (@"[]{}()*-+.?\|".Contains(c.ToString()))
            {
                return(new RegexTextNode(c.ToString()));
            }

            switch (c)
            {
            case 'd':     // [0-9]
                node = set = new RegexSetNode(true);
                set.AddRange('0', '9');
                break;

            case 'D':     // [^0-9]
                node = set = new RegexSetNode(false);
                set.AddRange('0', '9');
                break;

            case 's':
                node = set = new RegexSetNode(true);
                set.AddChars(" \r\n\f\v\t");
                break;

            case 'S':
                node = set = new RegexSetNode(false);
                set.AddChars(" \r\n\f\v\t");
                break;

            case 'w':     // [a-zA-Z0-9_]
                node = set = new RegexSetNode(true);
                set.AddRange('a', 'z');
                set.AddRange('A', 'Z');
                set.AddRange('0', '9');
                set.AddChars("_");
                break;

            case 'W':     // [^a-zA-Z0-9_]
                node = set = new RegexSetNode(false);
                set.AddRange('a', 'z');
                set.AddRange('A', 'Z');
                set.AddRange('0', '9');
                set.AddChars("_");
                break;

            case 'f':
                node = new RegexTextNode("\f");
                break;

            case 'n':
                node = new RegexTextNode("\n");
                break;

            case 'r':
                node = new RegexTextNode("\r");
                break;

            case 't':
                node = new RegexTextNode("\t");
                break;

            case 'v':
                node = new RegexTextNode("\v");
                break;

            case 'A':
            case 'Z':
            case 'z':
                node = new RegexTextNode(String.Empty);
                break;

            default:
                AssertParse(false, "Invalid escape.");
                break;
            }

            return(node);
        }
Exemple #7
0
        //Compile token
        public RegexNode CompileAtom()
        {
            RegexNode    atom  = null;
            RegexSetNode set   = null;
            int          start = 0;
            int          end   = 0;

            AssertParse(!mParseDone, "Reached end of string. No element found.");
            AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found.");

            switch (mCurrent)
            {
            case '.':     //Any single char
                atom = set = new RegexSetNode(true);
                set.AddRange(Convert.ToChar(0), Convert.ToChar(127));
                NextChar();
                break;

            case '[':     //Positive or negative set
                NextChar();
                atom = CompileSet();
                break;

            case '(':             //Sub expression
                int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture
                NextChar();

                //By default, subexpressions must be captured for future reference,
                if (mCurrent == '?')
                {
                    NextChar();
                    if (mCurrent == ':')     //If sub expression begins with ?: it means don't store reference
                    {
                        NextChar();
                        refIndex = -2;
                    }
                    else     //Named backreference, extract backreference name
                    {
                        ExtractBackrefName(ref start, ref end);
                        refIndex = -1;
                    }
                }     //else use indexed backreference

                atom = new RegexSubExpressionNode(CompileExpr());
                AssertParse(mCurrent == ')', "Expected ')'");
                NextChar();

                if (refIndex == -1)     //Named backreference
                {
                    (atom as RegexSubExpressionNode).Name = mRegex.ToString().Substring(start, end - start + 1);
                    mNamedBackRefs.Add(atom);
                }
                else if (refIndex == 0)     //Indexed backreference
                {
                    mBackRefs.Add(atom);
                }

                break;

            case '^':
            case '$':
                atom = new RegexTextNode(String.Empty);
                NextChar();
                break;

            case '\\':
                NextChar();

                if (Char.ToLower(mCurrent) == 'x' || Char.ToLower(mCurrent) == 'u' || mCurrent == '0')
                {
                    atom = new RegexTextNode(EscapeValue().ToString());
                }
                else if (Char.IsDigit(mCurrent))
                {
                    atom = GetBackRef((int)EscapeValue());
                    AssertParse(atom != null, "Couldn't find back reference");
                    atom = new RegexSubExpressionNode(atom);
                }
                else if (mCurrent == 'k')     //referencing a backreference by name
                {
                    NextChar();
                    ExtractBackrefName(ref start, ref end);
                    atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1));
                    AssertParse(atom != null, "Couldn't find back reference");
                    atom = new RegexSubExpressionNode(atom);     //Create a copy of the referenced node
                }
                else
                {
                    atom = CompileSimpleMacro(mCurrent);
                    NextChar();
                }
                break;

            default:
                int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1);

                if (closeIndex == -1)
                {
                    mParseDone = true;
                    closeIndex = mRegex.Length - 1;
                    atom       = new RegexTextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1));
                }
                else
                {
                    atom = new RegexTextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex));
                }

                mIndex   = closeIndex;
                mCurrent = mRegex[mIndex];
                break;
            }

            return(atom);
        }
Exemple #8
0
        //Compile token followed by *+?{}
        public RegexNode CompilePiece()
        {
            RegexNode node = null;

            //store the old invalidating state for restoring after this node
            bool oldInvalidState = RegexCompiler.IsInvalidSection;

            //check if we want to invalidate the 'atom' node and subnodes
            if (mCurrent == '\\' && mRegex[mIndex + 1] == 'i') //entering invalidating nodes section
            {
                NextChar();
                NextChar();
                //invalidate the following node and subnodes
                RegexCompiler.IsInvalidSection = true;
            }

            RegexNode atom = CompileAtom();

            //revert the invalidating state
            RegexCompiler.IsInvalidSection = oldInvalidState;

            //check special case of invalidating a repeating node
            //have to confirm with "*+?{" to verify that it's not another type of node (that parses elsewhere)
            if (mCurrent == '\\' && mRegex[mIndex + 1] == 'i' && "*+?{".Contains(mRegex[mIndex + 2].ToString()))
            {
                NextChar();
                NextChar();
                //invalidate the repeating node
                RegexCompiler.IsInvalidSection = true;
            }

            const int MAXREPEAT = -1; //value representing infinity

            switch (mCurrent)
            {
            case '*':     //zero or more repetition
                node = new RegexRepeatNode(atom, 0, MAXREPEAT, false);
                NextChar();
                break;

            case '+':     //one or more repetition
                node = new RegexRepeatNode(atom, 1, MAXREPEAT, false);
                NextChar();
                break;

            case '?':     //zero or one repetition
                node = new RegexRepeatNode(atom, 0, 1, false);
                NextChar();
                break;

            case '{':     //Min and max repetition limits defined
                int  nMin     = 0;
                int  nMax     = 0;
                bool sameChar = false;
                NextChar();

                if (mCurrent == '=')
                {
                    sameChar = true;
                    NextChar();
                }

                int closeIndex = mRegex.ToString().IndexOf('}', mIndex);
                AssertParse(closeIndex != -1, "Expected '}'");

                string[] repeatTokens = mRegex.ToString().Substring(mIndex, closeIndex - mIndex).
                                        Split(new char[] { ',' });

                if (repeatTokens.Length == 1)
                {
                    nMin = nMax = int.Parse(repeatTokens[0]);
                }
                else if (repeatTokens.Length == 2)
                {
                    nMin = int.Parse(repeatTokens[0]);
                    //check for {n,} case
                    if (repeatTokens[1].Length > 0)
                    {
                        nMax = int.Parse(repeatTokens[1]);
                    }
                    else
                    {
                        nMax = MAXREPEAT;     //only lower bound specified
                    }
                }
                else
                {
                    AssertParse(false, "Repeat values cannot be parsed");
                }

                AssertParse(nMin <= nMax || repeatTokens[1].Length == 0, "Max repeat is less than min repeat");
                mIndex = closeIndex;
                NextChar();
                node = new RegexRepeatNode(atom, nMin, nMax, sameChar);
                break;

            default:
                node = atom;
                break;
            }

            //revert invalidation after generating the repeating node
            RegexCompiler.IsInvalidSection = oldInvalidState;

            return(node);
        }
        private RegexNode mReservedPath  = null; //The child node that this Or Node must choose
        //Chosen node is random if this is null

        public override void ReservePath(RegexNode child)
        {
            //this child (in Children) must be called when generating the string
            mReservedPath = child;
            base.ReservePath(child);
        }
Exemple #10
0
 public override void ReservePath(RegexNode child)
 {
     //this child (mRefNode) must be called when generating the string (cannot repeat zero times)
     mReservedPath = child;
     base.ReservePath(child);
 }
        public string Name; //Identifies subexpression by name, used for named backreferences

        public RegexSubExpressionNode(RegexNode subExpr)
        {
            mRefNode        = subExpr;
            mRefNode.Parent = this;
        }