Пример #1
0
        //Compile \d \D \s \S etc.
        internal RENode CompileSimpleMacro(char c)
        {
            RENode node = null;
            RESetNode set = null;

            if (@"[]{}()*-+.?\|".Contains(c.ToString()))
            {
                return new RETextNode(c.ToString());
            }

            switch (c)
            {
                case 'd': // [0-9]
                    node = set = new RESetNode(true);
                    set.AddRange('0', '9');
                    break;
                case 'D': // [^0-9]
                    node = set = new RESetNode(false);
                    set.AddRange('0', '9');
                    break;
                case 's':
                    node = set = new RESetNode(true);
                    set.AddChars(" \r\n\f\v\t");
                    break;
                case 'S':
                    node = set = new RESetNode(false);
                    set.AddChars(" \r\n\f\v\t");
                    break;
                case 'w': // [a-zA-Z0-9_]
                    node = set = new RESetNode(true);
                    set.AddRange('a', 'z');
                    set.AddRange('A', 'Z');
                    set.AddRange('0', '9');
                    set.AddChars("_");
                    break;
                case 'W': // [^a-zA-Z0-9_]
                    node = set = new RESetNode(false);
                    set.AddRange('a', 'z');
                    set.AddRange('A', 'Z');
                    set.AddRange('0', '9');
                    set.AddChars("_");
                    break;
                case 'f':
                    node = new RETextNode("\f");
                    break;
                case 'n':
                    node = new RETextNode("\n");
                    break;
                case 'r':
                    node = new RETextNode("\r");
                    break;
                case 't':
                    node = new RETextNode("\t");
                    break;
                case 'v':
                    node = new RETextNode("\v");
                    break;
                case 'A':
                case 'Z':
                case 'z':
                    node = new RETextNode(String.Empty);
                    break;
                default:
                    AssertParse(false, "Invalid escape.");
                    break;
            }

            return node;
        }
Пример #2
0
        //Compile token
        internal RENode CompileAtom()
        {
            RENode atom = null;
            RESetNode set = null;
            int start = 0;
            int end = 0;

            AssertParse(!mParseDone, "Reached end of string. No element found.");
            AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found.");

            switch (mCurrent)
            {
                case '.': //Any single char
                    atom = set = new RESetNode(true);
                    set.AddRange(Convert.ToChar(0), Convert.ToChar(127));
                    NextChar();
                    break;
                case '[': //Positive or negative set
                    NextChar();
                    atom = CompileSet();
                    break;
                case '(': //Sub expression
                    int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture
                    NextChar();

                    //By default, subexpressions must be captured for future reference,
                    if (mCurrent == '?')
                    {
                        NextChar();
                        if (mCurrent == ':') //If sub expression begins with ?: it means don't store reference
                        {
                            NextChar();
                            refIndex = -2;
                        }
                        else //Named backreference, extract backreference name
                        {
                            ExtractBackrefName(ref start, ref end);
                            refIndex = -1;
                        }
                    } //else use indexed backreference

                    atom = new RESubExprNode(CompileExpr());
                    AssertParse(mCurrent == ')', "Expected ')'");
                    NextChar();

                    if (refIndex == -1) //Named backreference
                    {
                        (atom as RESubExprNode).Name = mRegex.ToString().Substring(start, end - start + 1);
                        mNamedBackRefs.Add(atom);
                    }
                    else if (refIndex == 0) //Indexed backreference
                    {
                        mBackRefs.Add(atom);
                    }

                    break;
                case '^':
                case '$':
                    atom = new RETextNode(String.Empty);
                    NextChar();
                    break;
                case '\\':
                    NextChar();

                    if (Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'x' || Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'u' || mCurrent == '0')
                    {
                        atom = new RETextNode(EscapeValue().ToString());
                    }
                    else if (Char.IsDigit(mCurrent))
                    {
                        atom = GetBackRef((int)EscapeValue());
                        AssertParse(atom != null, "Couldn't find back reference");
                        atom = new RESubExprNode(atom);
                    }
                    else if (mCurrent == 'k') //referencing a backreference by name
                    {
                        NextChar();
                        ExtractBackrefName(ref start, ref end);
                        atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1));
                        AssertParse(atom != null, "Couldn't find back reference");
                        atom = new RESubExprNode(atom); //Create a copy of the referenced node
                    }
                    else
                    {
                        atom = CompileSimpleMacro(mCurrent);
                        NextChar();
                    }
                    break;
                default:
                    int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1);

                    if (closeIndex == -1)
                    {
                        mParseDone = true;
                        closeIndex = mRegex.Length - 1;
                        atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1));
                    }
                    else
                    {
                        atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex));
                    }

                    mIndex = closeIndex;
                    mCurrent = mRegex[mIndex];
                    break;
            }

            return atom;
        }