Example #1
0
        //Compile \d \D \s \S etc.
        internal RENode CompileSimpleMacro(char c)
        {
            RENode    node = null;
            RESetNode set  = null;

            if (@"[]{}()*-+.?\|".Contains(c.ToString()))
            {
                return(new RETextNode(c.ToString()));
            }

            switch (c)
            {
            case 'd':     // [0-9]
                node = set = new RESetNode(true);
                set.AddRange('0', '9');
                break;

            case 'D':     // [^0-9]
                node = set = new RESetNode(false);
                set.AddRange('0', '9');
                break;

            case 's':
                node = set = new RESetNode(true);
                set.AddChars(" \r\n\f\v\t");
                break;

            case 'S':
                node = set = new RESetNode(false);
                set.AddChars(" \r\n\f\v\t");
                break;

            case 'w':     // [a-zA-Z0-9_]
                node = set = new RESetNode(true);
                set.AddRange('a', 'z');
                set.AddRange('A', 'Z');
                set.AddRange('0', '9');
                set.AddChars("_");
                break;

            case 'W':     // [^a-zA-Z0-9_]
                node = set = new RESetNode(false);
                set.AddRange('a', 'z');
                set.AddRange('A', 'Z');
                set.AddRange('0', '9');
                set.AddChars("_");
                break;

            case 'f':
                node = new RETextNode("\f");
                break;

            case 'n':
                node = new RETextNode("\n");
                break;

            case 'r':
                node = new RETextNode("\r");
                break;

            case 't':
                node = new RETextNode("\t");
                break;

            case 'v':
                node = new RETextNode("\v");
                break;

            case 'A':
            case 'Z':
            case 'z':
                node = new RETextNode(String.Empty);
                break;

            default:
                AssertParse(false, "Invalid escape.");
                break;
            }

            return(node);
        }
Example #2
0
        //Compile token
        internal RENode CompileAtom()
        {
            RENode    atom  = null;
            RESetNode set   = null;
            int       start = 0;
            int       end   = 0;

            AssertParse(!mParseDone, "Reached end of string. No element found.");
            AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found.");

            switch (mCurrent)
            {
            case '.':     //Any single char
                atom = set = new RESetNode(true);
                set.AddRange(Convert.ToChar(0), Convert.ToChar(127));
                NextChar();
                break;

            case '[':     //Positive or negative set
                NextChar();
                atom = CompileSet();
                break;

            case '(':             //Sub expression
                int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture
                NextChar();

                //By default, subexpressions must be captured for future reference,
                if (mCurrent == '?')
                {
                    NextChar();
                    if (mCurrent == ':')     //If sub expression begins with ?: it means don't store reference
                    {
                        NextChar();
                        refIndex = -2;
                    }
                    else     //Named backreference, extract backreference name
                    {
                        ExtractBackrefName(ref start, ref end);
                        refIndex = -1;
                    }
                }     //else use indexed backreference

                atom = new RESubExprNode(CompileExpr());
                AssertParse(mCurrent == ')', "Expected ')'");
                NextChar();

                if (refIndex == -1)     //Named backreference
                {
                    (atom as RESubExprNode).Name = mRegex.ToString().Substring(start, end - start + 1);
                    mNamedBackRefs.Add(atom);
                }
                else if (refIndex == 0)     //Indexed backreference
                {
                    mBackRefs.Add(atom);
                }

                break;

            case '^':
            case '$':
                atom = new RETextNode(String.Empty);
                NextChar();
                break;

            case '\\':
                NextChar();

                if (Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'x' || Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'u' || mCurrent == '0')
                {
                    atom = new RETextNode(EscapeValue().ToString());
                }
                else if (Char.IsDigit(mCurrent))
                {
                    atom = GetBackRef((int)EscapeValue());
                    AssertParse(atom != null, "Couldn't find back reference");
                    atom = new RESubExprNode(atom);
                }
                else if (mCurrent == 'k')     //referencing a backreference by name
                {
                    NextChar();
                    ExtractBackrefName(ref start, ref end);
                    atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1));
                    AssertParse(atom != null, "Couldn't find back reference");
                    atom = new RESubExprNode(atom);     //Create a copy of the referenced node
                }
                else
                {
                    atom = CompileSimpleMacro(mCurrent);
                    NextChar();
                }
                break;

            default:
                int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1);

                if (closeIndex == -1)
                {
                    mParseDone = true;
                    closeIndex = mRegex.Length - 1;
                    atom       = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1));
                }
                else
                {
                    atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex));
                }

                mIndex   = closeIndex;
                mCurrent = mRegex[mIndex];
                break;
            }

            return(atom);
        }