Пример #1
0
        public override string Generate(Random random)
        {
            if (this == RegexCompiler.InvalidNode)
            {
                //select a character
                int pos = random.Next(mNodeText.Length);

                //generate any other character using a negative SetNode
                RegexSetNode others = new RegexSetNode(false);
                others.AddChars(mNodeText[pos].ToString());

                //replace the character
                char backup = mNodeText[pos];
                mNodeText[pos] = others.Generate(random)[0];
                string result = mNodeText.ToString();

                //if this node is repeated it needs to be cleaned up for the next call
                mNodeText[pos] = backup;

                return(result);
            }
            else
            {
                return(mNodeText.ToString());
            }
        }
Пример #2
0
        //Compile a character set (i.e expressions like [abc], [A-Z])
        public RegexNode CompileSet()
        {
            RegexNode    atom = null;
            char         cStart, cEnd;
            RegexSetNode set;

            if (mCurrent == ':')
            {
                NextChar();
                int closeIndex = mRegex.ToString().IndexOf(":]", mIndex);
                atom   = CompileMacro(mIndex, closeIndex - mIndex);
                mIndex = closeIndex;
                NextChar();
                NextChar();
                return(atom);
            }

            if (mCurrent == '^')
            {
                atom = set = new RegexSetNode(false);
                NextChar();
            }
            else
            {
                atom = set = new RegexSetNode(true);
            }

            if (mCurrent == '-' || mCurrent == ']') //if - or ] are specified as the first char, escape is not required
            {
                set.AddChars(mCurrent.ToString());
                NextChar();
            }

            while ((!mParseDone) && (mCurrent != ']'))
            {
                cStart = CompileSetChar();

                if (mCurrent == '-')
                {
                    NextChar();
                    AssertParse(!mParseDone && mCurrent != ']', "End of range is not specified.");
                    cEnd = CompileSetChar();
                    set.AddRange(cStart, cEnd);
                }
                else
                {
                    set.AddChars(cStart.ToString());
                }
            }

            AssertParse(mCurrent == ']', "Expected ']'.");
            NextChar();
            return(atom);
        }
Пример #3
0
        //Compile \d \D \s \S etc.
        public RegexNode CompileSimpleMacro(char c)
        {
            RegexNode    node = null;
            RegexSetNode set  = null;

            if (@"[]{}()*-+.?\|".Contains(c.ToString()))
            {
                return(new RegexTextNode(c.ToString()));
            }

            switch (c)
            {
            case 'd':     // [0-9]
                node = set = new RegexSetNode(true);
                set.AddRange('0', '9');
                break;

            case 'D':     // [^0-9]
                node = set = new RegexSetNode(false);
                set.AddRange('0', '9');
                break;

            case 's':
                node = set = new RegexSetNode(true);
                set.AddChars(" \r\n\f\v\t");
                break;

            case 'S':
                node = set = new RegexSetNode(false);
                set.AddChars(" \r\n\f\v\t");
                break;

            case 'w':     // [a-zA-Z0-9_]
                node = set = new RegexSetNode(true);
                set.AddRange('a', 'z');
                set.AddRange('A', 'Z');
                set.AddRange('0', '9');
                set.AddChars("_");
                break;

            case 'W':     // [^a-zA-Z0-9_]
                node = set = new RegexSetNode(false);
                set.AddRange('a', 'z');
                set.AddRange('A', 'Z');
                set.AddRange('0', '9');
                set.AddChars("_");
                break;

            case 'f':
                node = new RegexTextNode("\f");
                break;

            case 'n':
                node = new RegexTextNode("\n");
                break;

            case 'r':
                node = new RegexTextNode("\r");
                break;

            case 't':
                node = new RegexTextNode("\t");
                break;

            case 'v':
                node = new RegexTextNode("\v");
                break;

            case 'A':
            case 'Z':
            case 'z':
                node = new RegexTextNode(String.Empty);
                break;

            default:
                AssertParse(false, "Invalid escape.");
                break;
            }

            return(node);
        }
Пример #4
0
        //Compile token
        public RegexNode CompileAtom()
        {
            RegexNode    atom  = null;
            RegexSetNode set   = null;
            int          start = 0;
            int          end   = 0;

            AssertParse(!mParseDone, "Reached end of string. No element found.");
            AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found.");

            switch (mCurrent)
            {
            case '.':     //Any single char
                atom = set = new RegexSetNode(true);
                set.AddRange(Convert.ToChar(0), Convert.ToChar(127));
                NextChar();
                break;

            case '[':     //Positive or negative set
                NextChar();
                atom = CompileSet();
                break;

            case '(':             //Sub expression
                int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture
                NextChar();

                //By default, subexpressions must be captured for future reference,
                if (mCurrent == '?')
                {
                    NextChar();
                    if (mCurrent == ':')     //If sub expression begins with ?: it means don't store reference
                    {
                        NextChar();
                        refIndex = -2;
                    }
                    else     //Named backreference, extract backreference name
                    {
                        ExtractBackrefName(ref start, ref end);
                        refIndex = -1;
                    }
                }     //else use indexed backreference

                atom = new RegexSubExpressionNode(CompileExpr());
                AssertParse(mCurrent == ')', "Expected ')'");
                NextChar();

                if (refIndex == -1)     //Named backreference
                {
                    (atom as RegexSubExpressionNode).Name = mRegex.ToString().Substring(start, end - start + 1);
                    mNamedBackRefs.Add(atom);
                }
                else if (refIndex == 0)     //Indexed backreference
                {
                    mBackRefs.Add(atom);
                }

                break;

            case '^':
            case '$':
                atom = new RegexTextNode(String.Empty);
                NextChar();
                break;

            case '\\':
                NextChar();

                if (Char.ToLower(mCurrent) == 'x' || Char.ToLower(mCurrent) == 'u' || mCurrent == '0')
                {
                    atom = new RegexTextNode(EscapeValue().ToString());
                }
                else if (Char.IsDigit(mCurrent))
                {
                    atom = GetBackRef((int)EscapeValue());
                    AssertParse(atom != null, "Couldn't find back reference");
                    atom = new RegexSubExpressionNode(atom);
                }
                else if (mCurrent == 'k')     //referencing a backreference by name
                {
                    NextChar();
                    ExtractBackrefName(ref start, ref end);
                    atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1));
                    AssertParse(atom != null, "Couldn't find back reference");
                    atom = new RegexSubExpressionNode(atom);     //Create a copy of the referenced node
                }
                else
                {
                    atom = CompileSimpleMacro(mCurrent);
                    NextChar();
                }
                break;

            default:
                int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1);

                if (closeIndex == -1)
                {
                    mParseDone = true;
                    closeIndex = mRegex.Length - 1;
                    atom       = new RegexTextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1));
                }
                else
                {
                    atom = new RegexTextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex));
                }

                mIndex   = closeIndex;
                mCurrent = mRegex[mIndex];
                break;
            }

            return(atom);
        }