Exemple #1
0
        //Add the chars in alphabet from start to end to the set
        internal void AddRange(char start, char end)
        {
            RENode.AssertParse((start < end) && end <= char.MaxValue, "Invalid range specified in char set");

            if (end > mMapSize)
            {
                ExpandToUnicodeRange();
            }

            //mark the added characters and update the number of available choices
            for (long c = start; c <= end; c++)
            {
                if (mMap[c] == 0)
                {
                    mMap[c]      = 1;
                    mNumChoices += mPositiveSet ? 1 : -1;
                }
            }

            //check if this set still has invalid characters available
            if ((mPositiveSet && mNumChoices == mMapSize) || (!mPositiveSet && mNumChoices == 0))
            {
                //can never be invalid
                RECompiler.InvalidableNodes.Remove(this);
            }
        }
Exemple #2
0
 //Required for GenerateInvalid.
 //Marks along the RegEx tree to ensure that 'child' is part of the generated string
 //child - The child node that must be part of the generated string
 internal virtual void ReservePath(RENode child)
 {
     if (Parent != null)
     {
         Parent.ReservePath(this);
     }
 }
Exemple #3
0
        //Compile a character set (i.e expressions like [abc], [A-Z])
        internal RENode CompileSet()
        {
            RENode    atom = null;
            char      cStart, cEnd;
            RESetNode set;

            if (mCurrent == ':')
            {
                NextChar();
                int closeIndex = mRegex.ToString().IndexOf(":]", StringComparison.Ordinal);
                atom   = CompileMacro(mIndex, closeIndex - mIndex);
                mIndex = closeIndex;
                NextChar();
                NextChar();
                return(atom);
            }

            if (mCurrent == '^')
            {
                atom = set = new RESetNode(false);
                NextChar();
            }
            else
            {
                atom = set = new RESetNode(true);
            }

            if (mCurrent == '-' || mCurrent == ']') //if - or ] are specified as the first char, escape is not required
            {
                set.AddChars(mCurrent.ToString());
                NextChar();
            }

            while ((!mParseDone) && (mCurrent != ']'))
            {
                cStart = CompileSetChar();

                if (mCurrent == '-')
                {
                    NextChar();
                    AssertParse(!mParseDone && mCurrent != ']', "End of range is not specified.");
                    cEnd = CompileSetChar();
                    set.AddRange(cStart, cEnd);
                }
                else
                {
                    set.AddChars(cStart.ToString());
                }
            }

            AssertParse(mCurrent == ']', "Expected ']'.");
            NextChar();
            return(atom);
        }
Exemple #4
0
        private RENode mReservedPath;              //The child node that must be chosen.
        //If this is not null then the node must repeat at least once

        internal RERepeatNode(RENode refNode, int minRepeat, int maxRepeat, bool sameValue)
        {
            //if this does not cover zero to infinity, then this node can be invalidated
            if (RECompiler.IsInvalidSection && (minRepeat > 0 || maxRepeat != -1))
            {
                RECompiler.InvalidableNodes.Add(this);
            }
            mMinRepeat      = minRepeat;
            mMaxRepeat      = maxRepeat;
            mSameValue      = sameValue;
            mRefNode        = refNode;
            mRefNode.Parent = this;
        }
Exemple #5
0
        internal override string Generate(Random random)
        {
            if (this == RECompiler.InvalidNode)
            {
                RENode.AssertParse(mNumChoices > 0, "No valid range specified in char set");

                //select from the elements that are not available (elements that are invalid)
                int randIndex = random.Next(mMapSize - mNumChoices);

                int i = -1;
                while (randIndex >= 0)  //seek to the available element
                {
                    i++;
                    //invert positive and negative sets
                    if ((mPositiveSet && mMap[i] == 0) || (!mPositiveSet && mMap[i] == 1))
                    {
                        randIndex--;
                    }
                }

                return(Convert.ToChar(i).ToString());
            }
            else
            {
                RENode.AssertParse(mNumChoices > 0, "No valid range specified in char set");
                //select from the elements that are available
                int randIndex = random.Next(mNumChoices);

                int i = -1;
                while (randIndex >= 0)  //seek to the available element
                {
                    i++;
                    if ((mPositiveSet && mMap[i] == 1) || (!mPositiveSet && mMap[i] == 0))
                    {
                        randIndex--;
                    }
                }

                return(Convert.ToChar(i).ToString());
            }
        }
Exemple #6
0
        /// <summary>
        /// Generates a string based on the given regular expression
        /// if any nodes are prepended with \i, then one of these nodes will be chosen
        /// at random to be invalidated
        /// </summary>
        /// <param name="random">Random object to use for generation</param>
        /// <param name="regex">Regular expression used to generate the string</param>
        /// <returns>generated string</returns>
        public static string NextString(Random random, string regex)
        {
            //reset the static variables
            RECompiler.IsInvalidSection = false;
            RECompiler.InvalidNode      = null;
            RECompiler.InvalidableNodes.Clear();

            //construct the RegEx tree
            RECompiler compiler = new RECompiler();
            RENode     node     = compiler.Compile(regex);

            //search for a signal to invalidate a node
            if (regex.IndexOf("\\i", StringComparison.Ordinal) != -1)
            {
                //something should have been invalidated
                //select a node to invalidate
                if (RECompiler.InvalidableNodes.Count == 0)
                {
                    throw new ArgumentException("Asked to generate invalid: Impossible to invalidate");
                }
                RECompiler.InvalidNode = RECompiler.InvalidableNodes[random.Next(RECompiler.InvalidableNodes.Count)];

                //Mark REOrNodes and RERepeatNodes to ensure that the invalid node will be part of the string
                RECompiler.InvalidNode.ReservePath(null);
            }

            //generate and return the string
            string result = node.Generate(random);

            if (RECompiler.InvalidNode != null)
            {
                //confirm that the generated string is invalid (e.g. [a-z]|[^a-z] will always fail)
                Regex compare = new Regex("^" + regex.Replace("\\i", "") + "$");
                if (compare.IsMatch(result))
                {
                    throw new ArgumentException(regex + ": Did not generate invalid string: " + result);
                }
            }

            return(result);
        }
Exemple #7
0
        //Compile node starting with |
        internal RENode CompileBranch()
        {
            RENode piece = CompilePiece();

            if (mParseDone || mCurrent == '|' || mCurrent == ')')
            {
                return(piece);
            }

            REAndNode andNode = new REAndNode();

            andNode.Children.Add(piece);
            piece.Parent = andNode;

            while (!(mParseDone || mCurrent == '|' || mCurrent == ')'))
            {
                RENode nextPiece = CompilePiece();
                andNode.Children.Add(nextPiece);
                nextPiece.Parent = andNode;
            }

            return(andNode);
        }
Exemple #8
0
        //Compile the expression i.e. main body or expr in paranthesis
        internal RENode CompileExpr()
        {
            RENode branch = CompileBranch();

            if (mCurrent != '|')
            {
                return(branch);
            }

            REOrNode expr = new REOrNode();

            expr.Children.Add(branch);
            branch.Parent = expr;

            while (mCurrent == '|')
            {
                NextChar();
                RENode nextBranch = CompileBranch();
                expr.Children.Add(nextBranch);
                nextBranch.Parent = expr;
            }

            return(expr);
        }
Exemple #9
0
        //Compile \d \D \s \S etc.
        internal RENode CompileSimpleMacro(char c)
        {
            RENode    node = null;
            RESetNode set  = null;

            if (@"[]{}()*-+.?\|".Contains(c.ToString()))
            {
                return(new RETextNode(c.ToString()));
            }

            switch (c)
            {
            case 'd':     // [0-9]
                node = set = new RESetNode(true);
                set.AddRange('0', '9');
                break;

            case 'D':     // [^0-9]
                node = set = new RESetNode(false);
                set.AddRange('0', '9');
                break;

            case 's':
                node = set = new RESetNode(true);
                set.AddChars(" \r\n\f\v\t");
                break;

            case 'S':
                node = set = new RESetNode(false);
                set.AddChars(" \r\n\f\v\t");
                break;

            case 'w':     // [a-zA-Z0-9_]
                node = set = new RESetNode(true);
                set.AddRange('a', 'z');
                set.AddRange('A', 'Z');
                set.AddRange('0', '9');
                set.AddChars("_");
                break;

            case 'W':     // [^a-zA-Z0-9_]
                node = set = new RESetNode(false);
                set.AddRange('a', 'z');
                set.AddRange('A', 'Z');
                set.AddRange('0', '9');
                set.AddChars("_");
                break;

            case 'f':
                node = new RETextNode("\f");
                break;

            case 'n':
                node = new RETextNode("\n");
                break;

            case 'r':
                node = new RETextNode("\r");
                break;

            case 't':
                node = new RETextNode("\t");
                break;

            case 'v':
                node = new RETextNode("\v");
                break;

            case 'A':
            case 'Z':
            case 'z':
                node = new RETextNode(String.Empty);
                break;

            default:
                AssertParse(false, "Invalid escape.");
                break;
            }

            return(node);
        }
Exemple #10
0
        //Compile token
        internal RENode CompileAtom()
        {
            RENode    atom  = null;
            RESetNode set   = null;
            int       start = 0;
            int       end   = 0;

            AssertParse(!mParseDone, "Reached end of string. No element found.");
            AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found.");

            switch (mCurrent)
            {
            case '.':     //Any single char
                atom = set = new RESetNode(true);
                set.AddRange(Convert.ToChar(0), Convert.ToChar(127));
                NextChar();
                break;

            case '[':     //Positive or negative set
                NextChar();
                atom = CompileSet();
                break;

            case '(':             //Sub expression
                int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture
                NextChar();

                //By default, subexpressions must be captured for future reference,
                if (mCurrent == '?')
                {
                    NextChar();
                    if (mCurrent == ':')     //If sub expression begins with ?: it means don't store reference
                    {
                        NextChar();
                        refIndex = -2;
                    }
                    else     //Named backreference, extract backreference name
                    {
                        ExtractBackrefName(ref start, ref end);
                        refIndex = -1;
                    }
                }     //else use indexed backreference

                atom = new RESubExprNode(CompileExpr());
                AssertParse(mCurrent == ')', "Expected ')'");
                NextChar();

                if (refIndex == -1)     //Named backreference
                {
                    (atom as RESubExprNode).Name = mRegex.ToString().Substring(start, end - start + 1);
                    mNamedBackRefs.Add(atom);
                }
                else if (refIndex == 0)     //Indexed backreference
                {
                    mBackRefs.Add(atom);
                }

                break;

            case '^':
            case '$':
                atom = new RETextNode(String.Empty);
                NextChar();
                break;

            case '\\':
                NextChar();

                if (Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'x' || Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'u' || mCurrent == '0')
                {
                    atom = new RETextNode(EscapeValue().ToString());
                }
                else if (Char.IsDigit(mCurrent))
                {
                    atom = GetBackRef((int)EscapeValue());
                    AssertParse(atom != null, "Couldn't find back reference");
                    atom = new RESubExprNode(atom);
                }
                else if (mCurrent == 'k')     //referencing a backreference by name
                {
                    NextChar();
                    ExtractBackrefName(ref start, ref end);
                    atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1));
                    AssertParse(atom != null, "Couldn't find back reference");
                    atom = new RESubExprNode(atom);     //Create a copy of the referenced node
                }
                else
                {
                    atom = CompileSimpleMacro(mCurrent);
                    NextChar();
                }
                break;

            default:
                int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1);

                if (closeIndex == -1)
                {
                    mParseDone = true;
                    closeIndex = mRegex.Length - 1;
                    atom       = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1));
                }
                else
                {
                    atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex));
                }

                mIndex   = closeIndex;
                mCurrent = mRegex[mIndex];
                break;
            }

            return(atom);
        }
Exemple #11
0
        //Compile token followed by *+?{}
        internal RENode CompilePiece()
        {
            RENode node = null;

            //store the old invalidating state for restoring after this node
            bool oldInvalidState = RECompiler.IsInvalidSection;

            //check if we want to invalidate the 'atom' node and subnodes
            if (mCurrent == '\\' && mRegex[mIndex + 1] == 'i') //entering invalidating nodes section
            {
                NextChar();
                NextChar();
                //invalidate the following node and subnodes
                RECompiler.IsInvalidSection = true;
            }

            RENode atom = CompileAtom();

            //revert the invalidating state
            RECompiler.IsInvalidSection = oldInvalidState;

            //check special case of invalidating a repeating node
            //have to confirm with "*+?{" to verify that it's not another type of node (that parses elsewhere)
            if (mCurrent == '\\' && mRegex[mIndex + 1] == 'i' && "*+?{".Contains(mRegex[mIndex + 2].ToString()))
            {
                NextChar();
                NextChar();
                //invalidate the repeating node
                RECompiler.IsInvalidSection = true;
            }

            const int MAXREPEAT = -1; //value representing infinity

            switch (mCurrent)
            {
            case '*':     //zero or more repetition
                node = new RERepeatNode(atom, 0, MAXREPEAT, false);
                NextChar();
                break;

            case '+':     //one or more repetition
                node = new RERepeatNode(atom, 1, MAXREPEAT, false);
                NextChar();
                break;

            case '?':     //zero or one repetition
                node = new RERepeatNode(atom, 0, 1, false);
                NextChar();
                break;

            case '{':     //Min and max repetition limits defined
                int  nMin     = 0;
                int  nMax     = 0;
                bool sameChar = false;
                NextChar();

                if (mCurrent == '=')
                {
                    sameChar = true;
                    NextChar();
                }

                int closeIndex = mRegex.ToString().IndexOf('}', mIndex);
                AssertParse(closeIndex != -1, "Expected '}'");

                string[] repeatTokens = mRegex.ToString().Substring(mIndex, closeIndex - mIndex).
                                        Split(new char[] { ',' });

                if (repeatTokens.Length == 1)
                {
                    nMin = nMax = int.Parse(repeatTokens[0], CultureInfo.InvariantCulture);
                }
                else if (repeatTokens.Length == 2)
                {
                    nMin = int.Parse(repeatTokens[0], CultureInfo.InvariantCulture);
                    //check for {n,} case
                    if (repeatTokens[1].Length > 0)
                    {
                        nMax = int.Parse(repeatTokens[1], CultureInfo.InvariantCulture);
                    }
                    else
                    {
                        nMax = MAXREPEAT;     //only lower bound specified
                    }
                }
                else
                {
                    AssertParse(false, "Repeat values cannot be parsed");
                }

                AssertParse(nMin <= nMax || repeatTokens[1].Length == 0, "Max repeat is less than min repeat");
                mIndex = closeIndex;
                NextChar();
                node = new RERepeatNode(atom, nMin, nMax, sameChar);
                break;

            default:
                node = atom;
                break;
            }

            //revert invalidation after generating the repeating node
            RECompiler.IsInvalidSection = oldInvalidState;

            return(node);
        }
Exemple #12
0
        internal string Name; //Identifies subexpression by name, used for named backreferences

        internal RESubExprNode(RENode subExpr)
        {
            mRefNode        = subExpr;
            mRefNode.Parent = this;
        }
Exemple #13
0
        private RENode mReservedPath; //The child node that this Or Node must choose
        //Chosen node is random if this is null

        internal override void ReservePath(RENode child)
        {
            //this child (in Children) must be called when generating the string
            mReservedPath = child;
            base.ReservePath(child);
        }
Exemple #14
0
 internal override void ReservePath(RENode child)
 {
     //this child (mRefNode) must be called when generating the string (cannot repeat zero times)
     mReservedPath = child;
     base.ReservePath(child);
 }