Beispiel #1
0
        //Compile \d \D \s \S etc.
        internal RENode CompileSimpleMacro(char c)
        {
            RENode node = null;
            RESetNode set = null;

            if (@"[]{}()*-+.?\|".Contains(c.ToString()))
            {
                return new RETextNode(c.ToString());
            }

            switch (c)
            {
                case 'd': // [0-9]
                    node = set = new RESetNode(true);
                    set.AddRange('0', '9');
                    break;
                case 'D': // [^0-9]
                    node = set = new RESetNode(false);
                    set.AddRange('0', '9');
                    break;
                case 's':
                    node = set = new RESetNode(true);
                    set.AddChars(" \r\n\f\v\t");
                    break;
                case 'S':
                    node = set = new RESetNode(false);
                    set.AddChars(" \r\n\f\v\t");
                    break;
                case 'w': // [a-zA-Z0-9_]
                    node = set = new RESetNode(true);
                    set.AddRange('a', 'z');
                    set.AddRange('A', 'Z');
                    set.AddRange('0', '9');
                    set.AddChars("_");
                    break;
                case 'W': // [^a-zA-Z0-9_]
                    node = set = new RESetNode(false);
                    set.AddRange('a', 'z');
                    set.AddRange('A', 'Z');
                    set.AddRange('0', '9');
                    set.AddChars("_");
                    break;
                case 'f':
                    node = new RETextNode("\f");
                    break;
                case 'n':
                    node = new RETextNode("\n");
                    break;
                case 'r':
                    node = new RETextNode("\r");
                    break;
                case 't':
                    node = new RETextNode("\t");
                    break;
                case 'v':
                    node = new RETextNode("\v");
                    break;
                case 'A':
                case 'Z':
                case 'z':
                    node = new RETextNode(String.Empty);
                    break;
                default:
                    AssertParse(false, "Invalid escape.");
                    break;
            }

            return node;
        }
Beispiel #2
0
        //Compile token
        internal RENode CompileAtom()
        {
            RENode atom = null;
            RESetNode set = null;
            int start = 0;
            int end = 0;

            AssertParse(!mParseDone, "Reached end of string. No element found.");
            AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found.");

            switch (mCurrent)
            {
                case '.': //Any single char
                    atom = set = new RESetNode(true);
                    set.AddRange(Convert.ToChar(0), Convert.ToChar(127));
                    NextChar();
                    break;
                case '[': //Positive or negative set
                    NextChar();
                    atom = CompileSet();
                    break;
                case '(': //Sub expression
                    int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture
                    NextChar();

                    //By default, subexpressions must be captured for future reference,
                    if (mCurrent == '?')
                    {
                        NextChar();
                        if (mCurrent == ':') //If sub expression begins with ?: it means don't store reference
                        {
                            NextChar();
                            refIndex = -2;
                        }
                        else //Named backreference, extract backreference name
                        {
                            ExtractBackrefName(ref start, ref end);
                            refIndex = -1;
                        }
                    } //else use indexed backreference

                    atom = new RESubExprNode(CompileExpr());
                    AssertParse(mCurrent == ')', "Expected ')'");
                    NextChar();

                    if (refIndex == -1) //Named backreference
                    {
                        (atom as RESubExprNode).Name = mRegex.ToString().Substring(start, end - start + 1);
                        mNamedBackRefs.Add(atom);
                    }
                    else if (refIndex == 0) //Indexed backreference
                    {
                        mBackRefs.Add(atom);
                    }

                    break;
                case '^':
                case '$':
                    atom = new RETextNode(String.Empty);
                    NextChar();
                    break;
                case '\\':
                    NextChar();

                    if (Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'x' || Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'u' || mCurrent == '0')
                    {
                        atom = new RETextNode(EscapeValue().ToString());
                    }
                    else if (Char.IsDigit(mCurrent))
                    {
                        atom = GetBackRef((int)EscapeValue());
                        AssertParse(atom != null, "Couldn't find back reference");
                        atom = new RESubExprNode(atom);
                    }
                    else if (mCurrent == 'k') //referencing a backreference by name
                    {
                        NextChar();
                        ExtractBackrefName(ref start, ref end);
                        atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1));
                        AssertParse(atom != null, "Couldn't find back reference");
                        atom = new RESubExprNode(atom); //Create a copy of the referenced node
                    }
                    else
                    {
                        atom = CompileSimpleMacro(mCurrent);
                        NextChar();
                    }
                    break;
                default:
                    int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1);

                    if (closeIndex == -1)
                    {
                        mParseDone = true;
                        closeIndex = mRegex.Length - 1;
                        atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1));
                    }
                    else
                    {
                        atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex));
                    }

                    mIndex = closeIndex;
                    mCurrent = mRegex[mIndex];
                    break;
            }

            return atom;
        }
Beispiel #3
0
        //Compile a character set (i.e expressions like [abc], [A-Z])
        internal RENode CompileSet()
        {
            RENode atom = null;
            char cStart, cEnd;
            RESetNode set;

            if (mCurrent == ':')
            {
                NextChar();
                int closeIndex = mRegex.ToString().IndexOf(":]", StringComparison.Ordinal);
                atom = CompileMacro(mIndex, closeIndex - mIndex);
                mIndex = closeIndex;
                NextChar();
                NextChar();
                return atom;
            }

            if (mCurrent == '^')
            {
                atom = set = new RESetNode(false);
                NextChar();
            }
            else
            {
                atom = set = new RESetNode(true);
            }

            if (mCurrent == '-' || mCurrent == ']') //if - or ] are specified as the first char, escape is not required
            {
                set.AddChars(mCurrent.ToString());
                NextChar();
            }

            while ((!mParseDone) && (mCurrent != ']'))
            {
                cStart = CompileSetChar();

                if (mCurrent == '-')
                {
                    NextChar();
                    AssertParse(!mParseDone && mCurrent != ']', "End of range is not specified.");
                    cEnd = CompileSetChar();
                    set.AddRange(cStart, cEnd);
                }
                else
                {
                    set.AddChars(cStart.ToString());
                }
            }

            AssertParse(mCurrent == ']', "Expected ']'.");
            NextChar();
            return atom;
        }
Beispiel #4
0
        internal override string Generate(Random random)
        {
            if (this == RECompiler.InvalidNode)
            {
                //select a character
                int pos = random.Next(mNodeText.Length);

                //generate any other character using a negative SetNode
                RESetNode others = new RESetNode(false);
                others.AddChars(mNodeText[pos].ToString());

                //replace the character
                char backup = mNodeText[pos];
                mNodeText[pos] = others.Generate(random)[0];
                string result = mNodeText.ToString();

                //if this node is repeated it needs to be cleaned up for the next call
                mNodeText[pos] = backup;

                return result;
            }
            else
            {
                return mNodeText.ToString();
            }
        }