Exemplo n.º 1
0
 internal RegexFC(bool nullable)
 {
     _cc       = new RegexCharClass();
     _nullable = nullable;
 }
Exemplo n.º 2
0
        internal string OpcodeDescription(int offset)
        {
            var sb     = new StringBuilder();
            int opcode = _codes[offset];

            sb.AppendFormat("{0:D6} ", offset);
            sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' ');
            sb.Append(OperatorDescription(opcode));
            sb.Append('(');

            opcode &= Mask;

            switch (opcode)
            {
            case One:
            case Notone:
            case Onerep:
            case Notonerep:
            case Oneloop:
            case Notoneloop:
            case Onelazy:
            case Notonelazy:
                sb.Append("Ch = ");
                sb.Append(RegexCharClass.CharDescription((char)_codes[offset + 1]));
                break;

            case Set:
            case Setrep:
            case Setloop:
            case Setlazy:
                sb.Append("Set = ");
                sb.Append(RegexCharClass.SetDescription(_strings[_codes[offset + 1]]));
                break;

            case Multi:
                sb.Append("String = ");
                sb.Append(_strings[_codes[offset + 1]]);
                break;

            case Ref:
            case Testref:
                sb.Append("Index = ");
                sb.Append(_codes[offset + 1]);
                break;

            case Capturemark:
                sb.Append("Index = ");
                sb.Append(_codes[offset + 1]);
                if (_codes[offset + 2] != -1)
                {
                    sb.Append(", Unindex = ");
                    sb.Append(_codes[offset + 2]);
                }
                break;

            case Nullcount:
            case Setcount:
                sb.Append("Value = ");
                sb.Append(_codes[offset + 1]);
                break;

            case Goto:
            case Lazybranch:
            case Branchmark:
            case Lazybranchmark:
            case Branchcount:
            case Lazybranchcount:
                sb.Append("Addr = ");
                sb.Append(_codes[offset + 1]);
                break;
            }

            switch (opcode)
            {
            case Onerep:
            case Notonerep:
            case Oneloop:
            case Notoneloop:
            case Onelazy:
            case Notonelazy:
            case Setrep:
            case Setloop:
            case Setlazy:
                sb.Append(", Rep = ");
                if (_codes[offset + 2] == int.MaxValue)
                {
                    sb.Append("inf");
                }
                else
                {
                    sb.Append(_codes[offset + 2]);
                }
                break;

            case Branchcount:
            case Lazybranchcount:
                sb.Append(", Limit = ");
                if (_codes[offset + 2] == int.MaxValue)
                {
                    sb.Append("inf");
                }
                else
                {
                    sb.Append(_codes[offset + 2]);
                }
                break;
            }

            sb.Append(')');

            return(sb.ToString());
        }
 protected static bool CharInClass(char ch, string charClass)
 {
     return(RegexCharClass.CharInClass(ch, charClass));
 }
Exemplo n.º 4
0
        internal S CreateConditionFromSet(bool ignoreCase, string set)
        {
            //char at position 0 is 1 iff the set is negated
            //bool negate = ((int)set[0] == 1);
            bool negate = RegexCharClass.IsNegated(set);

            //following are conditions over characters in the set
            //these will become disjuncts of a single disjunction
            //or conjuncts of a conjunction in case negate is true
            //negation is pushed in when the conditions are created
            List <S> conditions = new List <S>();

            #region ranges
            var ranges = ComputeRanges(set);

            foreach (var range in ranges)
            {
                S cond = solver.MkRangeConstraint(range.First, range.Second, ignoreCase);
                conditions.Add(negate ? solver.MkNot(cond) : cond);
            }
            #endregion

            #region categories
            int setLength = set[SETLENGTH];
            int catLength = set[CATEGORYLENGTH];
            //int myEndPosition = SETSTART + setLength + catLength;

            int catStart = setLength + SETSTART;
            int j        = catStart;
            while (j < catStart + catLength)
            {
                //singleton categories are stored as unicode characters whose code is
                //1 + the unicode category code as a short
                //thus - 1 is applied to exctarct the actual code of the category
                //the category itself may be negated e.g. \D instead of \d
                short catCode = (short)set[j++];
                if (catCode != 0)
                {
                    //note that double negation cancels out the negation of the category
                    S cond = MapCategoryCodeToCondition(Math.Abs(catCode) - 1);
                    conditions.Add(catCode < 0 ^ negate ? solver.MkNot(cond) : cond);
                }
                else
                {
                    //special case for a whole group G of categories surrounded by 0's
                    //essentially 0 C1 C2 ... Cn 0 ==> G = (C1 | C2 | ... | Cn)
                    catCode = (short)set[j++];
                    if (catCode == 0)
                    {
                        continue; //empty set of categories
                    }
                    //collect individual category codes into this set
                    var catCodes = new HashSet <int>();
                    //if the first catCode is negated, the group as a whole is negated
                    bool negGroup = (catCode < 0);

                    while (catCode != 0)
                    {
                        catCodes.Add(Math.Abs(catCode) - 1);
                        catCode = (short)set[j++];
                    }

                    // C1 | C2 | ... | Cn
                    S catCondDisj = MapCategoryCodeSetToCondition(catCodes);

                    S catGroupCond = (negate ^ negGroup ? solver.MkNot(catCondDisj) : catCondDisj);
                    conditions.Add(catGroupCond);
                }
            }
            #endregion

            #region Subtractor
            S subtractorCond = default(S);
            if (set.Length > j)
            {
                //the set has a subtractor-set at the end
                //all characters in the subtractor-set are excluded from the set
                //note that the subtractor sets may be nested, e.g. in r=[a-z-[b-g-[cd]]]
                //the subtractor set [b-g-[cd]] has itself a subtractor set [cd]
                //thus r is the set of characters between a..z except b,e,f,g
                var subtractor = set.Substring(j);
                subtractorCond = CreateConditionFromSet(ignoreCase, subtractor);
            }

            #endregion

            S moveCond;
            //if there are no ranges and no groups then there are no conditions
            //this situation arises for SingleLine regegex option and .
            //and means that all characters are accepted
            if (conditions.Count == 0)
            {
                moveCond = (negate ? solver.False : solver.True);
            }
            else
            {
                moveCond = (negate ? solver.MkAnd(conditions) : solver.MkOr(conditions));
            }

            //Subtelty of regex sematics:
            //note that the subtractor is not within the scope of the negation (if there is a negation)
            //thus the negated subtractor is conjuncted with moveCond after the negation has been
            //performed above
            if (!object.Equals(subtractorCond, default(S)))
            {
                moveCond = solver.MkAnd(moveCond, solver.MkNot(subtractorCond));
            }

            return(moveCond);
        }
Exemplo n.º 5
0
    /*
     * Scans chars following a '\' (not counting the '\'), and returns
     * a RegexNode for the type of atom scanned.
     */
    private RegexNode ScanBackslash()
    {
        char           ch;
        RegexCharClass cc;

        if (CharsRight() == 0)
        {
            throw MakeException(Strings.IllegalEndEscape);
        }

        switch (ch = RightChar())
        {
        case 'b':
        case 'B':
        case 'A':
        case 'G':
        case 'Z':
        case 'z':
            MoveRight();
            return(new RegexNode(TypeFromCode(ch), _options));

        case 'w':
            MoveRight();
            if (UseOptionE())
            {
                return(new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMAWordClass));
            }
            return(new RegexNode(RegexNode.Set, _options, RegexCharClass.WordClass));

        case 'W':
            MoveRight();
            if (UseOptionE())
            {
                return(new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMAWordClass));
            }
            return(new RegexNode(RegexNode.Set, _options, RegexCharClass.NotWordClass));

        case 's':
            MoveRight();
            if (UseOptionE())
            {
                return(new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMASpaceClass));
            }
            return(new RegexNode(RegexNode.Set, _options, RegexCharClass.SpaceClass));

        case 'S':
            MoveRight();
            if (UseOptionE())
            {
                return(new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMASpaceClass));
            }
            return(new RegexNode(RegexNode.Set, _options, RegexCharClass.NotSpaceClass));

        case 'd':
            MoveRight();
            if (UseOptionE())
            {
                return(new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMADigitClass));
            }
            return(new RegexNode(RegexNode.Set, _options, RegexCharClass.DigitClass));

        case 'D':
            MoveRight();
            if (UseOptionE())
            {
                return(new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMADigitClass));
            }
            return(new RegexNode(RegexNode.Set, _options, RegexCharClass.NotDigitClass));

        case 'p':
        case 'P':
            MoveRight();
            cc = new RegexCharClass();
            cc.AddCategoryFromName(ParseProperty(), ch != 'p', UseOptionI(), _pattern);
            if (UseOptionI())
            {
                cc.AddLowercase(_culture);
            }

            return(new RegexNode(RegexNode.Set, _options, cc.ToStringClass()));

        default:
            return(ScanBasicBackslash());
        }
    }
        protected static bool CharInSet(char ch, string set, string category)
        {
            string charClass = RegexCharClass.ConvertOldStringsToClass(set, category);

            return(RegexCharClass.CharInClass(ch, charClass));
        }
Exemplo n.º 7
0
        /// <summary>
        /// Basic optimization. Single-letter alternations can be replaced
        /// by faster set specifications, and nested alternations with no
        /// intervening operators can be flattened:
        ///
        /// a|b|c|def|g|h -> [a-c]|def|[gh]
        /// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
        /// </summary>
        internal RegexNode ReduceAlternation()
        {
            // Combine adjacent sets/chars

            bool         wasLastSet;
            bool         lastNodeCannotMerge;
            RegexOptions optionsLast;
            RegexOptions optionsAt;
            int          i;
            int          j;
            RegexNode    at;
            RegexNode    prev;

            if (_children == null)
            {
                return(new RegexNode(Nothing, _options));
            }

            wasLastSet          = false;
            lastNodeCannotMerge = false;
            optionsLast         = 0;

            for (i = 0, j = 0; i < _children.Count; i++, j++)
            {
                at = _children[i];

                if (j < i)
                {
                    _children[j] = at;
                }

                for (; ;)
                {
                    if (at._type == Alternate)
                    {
                        for (int k = 0; k < at._children.Count; k++)
                        {
                            at._children[k]._next = this;
                        }

                        _children.InsertRange(i + 1, at._children);
                        j--;
                    }
                    else if (at._type == Set || at._type == One)
                    {
                        // Cannot merge sets if L or I options differ, or if either are negated.
                        optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);


                        if (at._type == Set)
                        {
                            if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str))
                            {
                                wasLastSet          = true;
                                lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str);
                                optionsLast         = optionsAt;
                                break;
                            }
                        }
                        else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge)
                        {
                            wasLastSet          = true;
                            lastNodeCannotMerge = false;
                            optionsLast         = optionsAt;
                            break;
                        }


                        // The last node was a Set or a One, we're a Set or One and our options are the same.
                        // Merge the two nodes.
                        j--;
                        prev = _children[j];

                        RegexCharClass prevCharClass;
                        if (prev._type == One)
                        {
                            prevCharClass = new RegexCharClass();
                            prevCharClass.AddChar(prev._ch);
                        }
                        else
                        {
                            prevCharClass = RegexCharClass.Parse(prev._str);
                        }

                        if (at._type == One)
                        {
                            prevCharClass.AddChar(at._ch);
                        }
                        else
                        {
                            RegexCharClass atCharClass = RegexCharClass.Parse(at._str);
                            prevCharClass.AddCharClass(atCharClass);
                        }

                        prev._type = Set;
                        prev._str  = prevCharClass.ToStringClass();
                    }
                    else if (at._type == Nothing)
                    {
                        j--;
                    }
                    else
                    {
                        wasLastSet          = false;
                        lastNodeCannotMerge = false;
                    }
                    break;
                }
            }

            if (j < i)
            {
                _children.RemoveRange(j, i - j);
            }

            return(StripEnation(Nothing));
        }
Exemplo n.º 8
0
        protected override void Go()
        {
            Goto(0);

            for (; ;)
            {
#if DEBUG
                if (runmatch.Debug)
                {
                    DumpState();
                }
#endif

                CheckTimeout();

                switch (Operator())
                {
                case RegexCode.Stop:
                    return;

                case RegexCode.Nothing:
                    break;

                case RegexCode.Goto:
                    Goto(Operand(0));
                    continue;

                case RegexCode.Testref:
                    if (!IsMatched(Operand(0)))
                    {
                        break;
                    }
                    Advance(1);
                    continue;

                case RegexCode.Lazybranch:
                    TrackPush(Textpos());
                    Advance(1);
                    continue;

                case RegexCode.Lazybranch | RegexCode.Back:
                    TrackPop();
                    Textto(TrackPeek());
                    Goto(Operand(0));
                    continue;

                case RegexCode.Setmark:
                    StackPush(Textpos());
                    TrackPush();
                    Advance();
                    continue;

                case RegexCode.Nullmark:
                    StackPush(-1);
                    TrackPush();
                    Advance();
                    continue;

                case RegexCode.Setmark | RegexCode.Back:
                case RegexCode.Nullmark | RegexCode.Back:
                    StackPop();
                    break;

                case RegexCode.Getmark:
                    StackPop();
                    TrackPush(StackPeek());
                    Textto(StackPeek());
                    Advance();
                    continue;

                case RegexCode.Getmark | RegexCode.Back:
                    TrackPop();
                    StackPush(TrackPeek());
                    break;

                case RegexCode.Capturemark:
                    if (Operand(1) != -1 && !IsMatched(Operand(1)))
                    {
                        break;
                    }
                    StackPop();
                    if (Operand(1) != -1)
                    {
                        TransferCapture(Operand(0), Operand(1), StackPeek(), Textpos());
                    }
                    else
                    {
                        Capture(Operand(0), StackPeek(), Textpos());
                    }
                    TrackPush(StackPeek());

                    Advance(2);

                    continue;

                case RegexCode.Capturemark | RegexCode.Back:
                    TrackPop();
                    StackPush(TrackPeek());
                    Uncapture();
                    if (Operand(0) != -1 && Operand(1) != -1)
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Branchmark:
                {
                    int matched;
                    StackPop();

                    matched = Textpos() - StackPeek();

                    if (matched != 0)
                    {                                      // Nonempty match -> loop now
                        TrackPush(StackPeek(), Textpos()); // Save old mark, textpos
                        StackPush(Textpos());              // Make new mark
                        Goto(Operand(0));                  // Loop
                    }
                    else
                    {                                          // Empty match -> straight now
                        TrackPush2(StackPeek());               // Save old mark
                        Advance(1);                            // Straight
                    }
                    continue;
                }

                case RegexCode.Branchmark | RegexCode.Back:
                    TrackPop(2);
                    StackPop();
                    Textto(TrackPeek(1));                           // Recall position
                    TrackPush2(TrackPeek());                        // Save old mark
                    Advance(1);                                     // Straight
                    continue;

                case RegexCode.Branchmark | RegexCode.Back2:
                    TrackPop();
                    StackPush(TrackPeek());                         // Recall old mark
                    break;                                          // Backtrack

                case RegexCode.Lazybranchmark:
                {
                    // We hit this the first time through a lazy loop and after each
                    // successful match of the inner expression.  It simply continues
                    // on and doesn't loop.
                    StackPop();

                    int oldMarkPos = StackPeek();

                    if (Textpos() != oldMarkPos)
                    {                      // Nonempty match -> try to loop again by going to 'back' state
                        if (oldMarkPos != -1)
                        {
                            TrackPush(oldMarkPos, Textpos());           // Save old mark, textpos
                        }
                        else
                        {
                            TrackPush(Textpos(), Textpos());
                        }
                    }
                    else
                    {
                        // The inner expression found an empty match, so we'll go directly to 'back2' if we
                        // backtrack.  In this case, we need to push something on the stack, since back2 pops.
                        // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
                        // position associated with that empty match.
                        StackPush(oldMarkPos);

                        TrackPush2(StackPeek());                        // Save old mark
                    }
                    Advance(1);
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back:
                {
                    // After the first time, Lazybranchmark | RegexCode.Back occurs
                    // with each iteration of the loop, and therefore with every attempted
                    // match of the inner expression.  We'll try to match the inner expression,
                    // then go back to Lazybranchmark if successful.  If the inner expression
                    // fails, we go to Lazybranchmark | RegexCode.Back2
                    int pos;

                    TrackPop(2);
                    pos = TrackPeek(1);
                    TrackPush2(TrackPeek());                        // Save old mark
                    StackPush(pos);                                 // Make new mark
                    Textto(pos);                                    // Recall position
                    Goto(Operand(0));                               // Loop
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back2:
                    // The lazy loop has failed.  We'll do a true backtrack and
                    // start over before the lazy loop.
                    StackPop();
                    TrackPop();
                    StackPush(TrackPeek());                          // Recall old mark
                    break;

                case RegexCode.Setcount:
                    StackPush(Textpos(), Operand(0));
                    TrackPush();
                    Advance(1);
                    continue;

                case RegexCode.Nullcount:
                    StackPush(-1, Operand(0));
                    TrackPush();
                    Advance(1);
                    continue;

                case RegexCode.Setcount | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Nullcount | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Branchcount:
                    // StackPush:
                    //  0: Mark
                    //  1: Count
                {
                    StackPop(2);
                    int mark    = StackPeek();
                    int count   = StackPeek(1);
                    int matched = Textpos() - mark;

                    if (count >= Operand(1) || (matched == 0 && count >= 0))
                    {                                           // Max loops or empty match -> straight now
                        TrackPush2(mark, count);                // Save old mark, count
                        Advance(2);                             // Straight
                    }
                    else
                    {                                          // Nonempty match -> count+loop now
                        TrackPush(mark);                       // remember mark
                        StackPush(Textpos(), count + 1);       // Make new mark, incr count
                        Goto(Operand(0));                      // Loop
                    }
                    continue;
                }

                case RegexCode.Branchcount | RegexCode.Back:
                    // TrackPush:
                    //  0: Previous mark
                    // StackPush:
                    //  0: Mark (= current pos, discarded)
                    //  1: Count
                    TrackPop();
                    StackPop(2);
                    if (StackPeek(1) > 0)
                    {                                              // Positive -> can go straight
                        Textto(StackPeek());                       // Zap to mark
                        TrackPush2(TrackPeek(), StackPeek(1) - 1); // Save old mark, old count
                        Advance(2);                                // Straight
                        continue;
                    }
                    StackPush(TrackPeek(), StackPeek(1) - 1);           // recall old mark, old count
                    break;

                case RegexCode.Branchcount | RegexCode.Back2:
                    // TrackPush:
                    //  0: Previous mark
                    //  1: Previous count
                    TrackPop(2);
                    StackPush(TrackPeek(), TrackPeek(1));               // Recall old mark, old count
                    break;                                              // Backtrack


                case RegexCode.Lazybranchcount:
                    // StackPush:
                    //  0: Mark
                    //  1: Count
                {
                    StackPop(2);
                    int mark  = StackPeek();
                    int count = StackPeek(1);

                    if (count < 0)
                    {                                    // Negative count -> loop now
                        TrackPush2(mark);                // Save old mark
                        StackPush(Textpos(), count + 1); // Make new mark, incr count
                        Goto(Operand(0));                // Loop
                    }
                    else
                    {                                          // Nonneg count -> straight now
                        TrackPush(mark, count, Textpos());     // Save mark, count, position
                        Advance(2);                            // Straight
                    }
                    continue;
                }

                case RegexCode.Lazybranchcount | RegexCode.Back:
                    // TrackPush:
                    //  0: Mark
                    //  1: Count
                    //  2: Textpos
                {
                    TrackPop(3);
                    int mark    = TrackPeek();
                    int textpos = TrackPeek(2);

                    if (TrackPeek(1) < Operand(1) && textpos != mark)
                    {                                         // Under limit and not empty match -> loop
                        Textto(textpos);                      // Recall position
                        StackPush(textpos, TrackPeek(1) + 1); // Make new mark, incr count
                        TrackPush2(mark);                     // Save old mark
                        Goto(Operand(0));                     // Loop
                        continue;
                    }
                    else
                    {                                                  // Max loops or empty match -> backtrack
                        StackPush(TrackPeek(), TrackPeek(1));          // Recall old mark, count
                        break;                                         // backtrack
                    }
                }

                case RegexCode.Lazybranchcount | RegexCode.Back2:
                    // TrackPush:
                    //  0: Previous mark
                    // StackPush:
                    //  0: Mark (== current pos, discarded)
                    //  1: Count
                    TrackPop();
                    StackPop(2);
                    StackPush(TrackPeek(), StackPeek(1) - 1);       // Recall old mark, count
                    break;                                          // Backtrack

                case RegexCode.Setjump:
                    StackPush(Trackpos(), Crawlpos());
                    TrackPush();
                    Advance();
                    continue;

                case RegexCode.Setjump | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Backjump:
                    // StackPush:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    StackPop(2);
                    Trackto(StackPeek());

                    while (Crawlpos() != StackPeek(1))
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Forejump:
                    // StackPush:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    StackPop(2);
                    Trackto(StackPeek());
                    TrackPush(StackPeek(1));
                    Advance();
                    continue;

                case RegexCode.Forejump | RegexCode.Back:
                    // TrackPush:
                    //  0: Crawlpos
                    TrackPop();

                    while (Crawlpos() != TrackPeek())
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Bol:
                    if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n')
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Eol:
                    if (Rightchars() > 0 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Boundary:
                    if (!IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Nonboundary:
                    if (IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.ECMABoundary:
                    if (!IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.NonECMABoundary:
                    if (IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Beginning:
                    if (Leftchars() > 0)
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Start:
                    if (Textpos() != Textstart())
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.EndZ:
                    if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.End:
                    if (Rightchars() > 0)
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.One:
                    if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    Advance(1);
                    continue;

                case RegexCode.Notone:
                    if (Forwardchars() < 1 || Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    Advance(1);
                    continue;

                case RegexCode.Set:
                    if (Forwardchars() < 1 || !RegexCharClass.CharInClass(Forwardcharnext(), _code._strings[Operand(0)]))
                    {
                        break;
                    }

                    Advance(1);
                    continue;

                case RegexCode.Multi:
                {
                    if (!Stringmatch(_code._strings[Operand(0)]))
                    {
                        break;
                    }

                    Advance(1);
                    continue;
                }

                case RegexCode.Ref:
                {
                    int capnum = Operand(0);

                    if (IsMatched(capnum))
                    {
                        if (!Refmatch(MatchIndex(capnum), MatchLength(capnum)))
                        {
                            break;
                        }
                    }
                    else
                    {
                        if ((runregex.roptions & RegexOptions.ECMAScript) == 0)
                        {
                            break;
                        }
                    }

                    Advance(1);
                    continue;
                }

                case RegexCode.Onerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Notonerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setrep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    string set = _code._strings[Operand(0)];

                    while (c-- > 0)
                    {
                        if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
                        {
                            goto BreakBackward;
                        }
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Oneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Notoneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    string set = _code._strings[Operand(0)];
                    int    i;

                    for (i = c; i > 0; i--)
                    {
                        if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Oneloop | RegexCode.Back:
                case RegexCode.Notoneloop | RegexCode.Back:
                {
                    TrackPop(2);
                    int i   = TrackPeek();
                    int pos = TrackPeek(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setloop | RegexCode.Back:
                {
                    TrackPop(2);
                    int i   = TrackPeek();
                    int pos = TrackPeek(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Onelazy:
                case RegexCode.Notonelazy:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        TrackPush(c - 1, Textpos());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setlazy:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        TrackPush(c - 1, Textpos());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Onelazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Notonelazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setlazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (!RegexCharClass.CharInClass(Forwardcharnext(), _code._strings[Operand(0)]))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.ResetMatchStart:
                    TrackPush(MatchStart());            // Enable backtracking, saving the current match start
                    SetMatchStart(Textpos());           // Set the match start to the current position in text
                    Advance();
                    continue;

                case RegexCode.ResetMatchStart | RegexCode.Back:
                    TrackPop();
                    SetMatchStart(TrackPeek());         // Restore the previously saved value as the match start
                    break;                              // Continue backtracking

                default:
                    throw new NotImplementedException(SR.UnimplementedState);
                }

BreakBackward:
                ;

                // "break Backward" comes here:
                Backtrack();
            }
        }
Exemplo n.º 9
0
        internal RegexFC(string charClass, bool nullable, bool caseInsensitive)
        {
            _cc = RegexCharClass.Parse(charClass);

            _nullable = nullable;
            _caseInsensitive = caseInsensitive;
        }
Exemplo n.º 10
0
        private void GenerateFindFirstChar()
        {
            var boyerMooreCulture = BoyerMoorePrefix != null
                ? Writer.DeclareField($@"private static readonly CultureInfo BoyerMooreCulture = CultureInfo.GetCultureInfo(""{BoyerMoorePrefix._culture.ToString()}"");")
                : null;

            if (!(Anchors.Beginning || Anchors.Start || Anchors.EndZ || Anchors.End) && BoyerMoorePrefix != null)
            {
                GenerateBoyerMoorePrefixScan(boyerMooreCulture);
            }

            using (Writer.Method("protected override bool FindFirstChar()"))
            {
                #if DEBUG_OUTPUT
                Writer.Write($@"Debug.WriteLine("""")");
                Writer.Write($@"Debug.WriteLine($""Search range: from {{{runtextbeg}.ToString(CultureInfo.InvariantCulture)}} to {{{runtextend}.ToString(CultureInfo.InvariantCulture)}}"")");
                Writer.Write($@"Debug.WriteLine($""Firstchar search starting at {{{runtextpos}.ToString(CultureInfo.InvariantCulture)}} stopping at {{{(IsRightToLeft ? runtextbeg : runtextend)}.ToString(CultureInfo.InvariantCulture)}}"")");
                #endif

                if (Anchors.Beginning || Anchors.Start || Anchors.EndZ || Anchors.End)
                {
                    GenerateAnchorChecks(boyerMooreCulture);
                }
                else if (BoyerMoorePrefix != null)
                {
                    GenerateBoyerMoorePrefixScanCheck();
                }
                else if (FirstCharacterPrefix == null)
                {
                    Writer.Write($"return true;");
                }
                else
                {
                    var culture = DeclareCulture();
                    var set     = FirstCharacterPrefix.GetValueOrDefault().Prefix;

                    if (RegexCharClass.IsSingleton(set))
                    {
                        var ch = RegexCharClass.SingletonChar(set);
                        var i  = Local.Parse("i");
                        using (Writer.For($"int {i} = {Forwardchars()}; {i} > 0; {i}--"))
                        {
                            using (Writer.If($"'{ch}' == {Forwardcharnext(culture)}"))
                            {
                                Backwardnext();
                                Writer.Write($"return true;");
                            }
                        }
                    }
                    else
                    {
                        var i = Local.Parse("i");
                        using (Writer.For($"int {i} = {Forwardchars()}; i > 0; i--"))
                        {
                            using (Writer.If($@"{CharInClass(Forwardcharnext(culture), set)}"))
                            {
                                Backwardnext();
                                Writer.Write($"return true;");
                            }
                        }
                    }

                    Writer.Write($"return false;");
                }
            }
        }
Exemplo n.º 11
0
        internal RegexFC(char ch, bool not, bool nullable, bool caseInsensitive)
        {
            _cc = new RegexCharClass();

            if (not)
            {
                if (ch > 0)
                    _cc.AddRange('\0', (char)(ch - 1));
                if (ch < 0xFFFF)
                    _cc.AddRange((char)(ch + 1), '\uFFFF');
            }
            else
            {
                _cc.AddRange(ch, ch);
            }

            _caseInsensitive = caseInsensitive;
            _nullable = nullable;
        }
Exemplo n.º 12
0
 internal RegexFC(bool nullable)
 {
     _cc = new RegexCharClass();
     _nullable = nullable;
 }
Exemplo n.º 13
0
    /*
     * Scans \-style backreferences and character escapes
     */
    private RegexNode ScanBasicBackslash()
    {
        if (CharsRight() == 0)
        {
            throw MakeException(Strings.IllegalEndEscape);
        }

        char ch;
        var  angled  = false;
        var  close   = '\0';
        var  backpos = Textpos();

        ch = RightChar();

        // allow \k<foo> instead of \<foo>, which is now deprecated

        if (ch == 'k')
        {
            if (CharsRight() >= 2)
            {
                MoveRight();
                ch = MoveRightGetChar();

                if (ch == '<' || ch == '\'')
                {
                    angled = true;
                    close  = (ch == '\'') ? '\'' : '>';
                }
            }

            if (!angled || CharsRight() <= 0)
            {
                throw MakeException(Strings.MalformedNameRef);
            }

            ch = RightChar();
        }

        // Note angle without \g

        else if ((ch == '<' || ch == '\'') && CharsRight() > 1)
        {
            angled = true;
            close  = (ch == '\'') ? '\'' : '>';

            MoveRight();
            ch = RightChar();
        }

        // Try to parse backreference: \<1> or \<cap>

        if (angled && ch >= '0' && ch <= '9')
        {
            _ = ScanDecimal();

            if (CharsRight() > 0 && MoveRightGetChar() == close)
            {
                throw MakeException(Strings.BackRefCaptureGroupNotSupported);
            }
        }

        // Try to parse backreference or octal: \1

        else if (!angled && ch >= '1' && ch <= '9')
        {
            if (UseOptionE())
            {
                throw MakeException(Strings.BackRefCaptureGroupNotSupported);
            }
            else
            {
                throw MakeException(Strings.BackRefCaptureGroupNotSupported);
            }
        }
        else if (angled && RegexCharClass.IsWordChar(ch))
        {
            throw MakeException(Strings.BackRefCaptureGroupNotSupported);
        }

        // Not backreference: must be char code

        Textto(backpos);
        ch = ScanCharEscape();

        if (UseOptionI())
        {
            ch = _culture.TextInfo.ToLower(ch);
        }

        return(new RegexNode(RegexNode.One, _options, ch));
    }
Exemplo n.º 14
0
        /// <summary>
        /// Basic optimization. Single-letter alternations can be replaced
        /// by faster set specifications, and nested alternations with no
        /// intervening operators can be flattened:
        ///
        /// a|b|c|def|g|h -> [a-c]|def|[gh]
        /// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
        /// </summary>
        internal RegexNode ReduceAlternation()
        {
            // Combine adjacent sets/chars

            bool wasLastSet;
            bool lastNodeCannotMerge;
            RegexOptions optionsLast;
            RegexOptions optionsAt;
            int i;
            int j;
            RegexNode at;
            RegexNode prev;

            if (_children == null)
                return new RegexNode(Nothing, _options);

            wasLastSet = false;
            lastNodeCannotMerge = false;
            optionsLast = 0;

            for (i = 0, j = 0; i < _children.Count; i++, j++)
            {
                at = _children[i];

                if (j < i)
                    _children[j] = at;

                for (; ;)
                {
                    if (at._type == Alternate)
                    {
                        for (int k = 0; k < at._children.Count; k++)
                            at._children[k]._next = this;

                        _children.InsertRange(i + 1, at._children);
                        j--;
                    }
                    else if (at._type == Set || at._type == One)
                    {
                        // Cannot merge sets if L or I options differ, or if either are negated.
                        optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);


                        if (at._type == Set)
                        {
                            if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str))
                            {
                                wasLastSet = true;
                                lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str);
                                optionsLast = optionsAt;
                                break;
                            }
                        }
                        else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge)
                        {
                            wasLastSet = true;
                            lastNodeCannotMerge = false;
                            optionsLast = optionsAt;
                            break;
                        }


                        // The last node was a Set or a One, we're a Set or One and our options are the same.
                        // Merge the two nodes.
                        j--;
                        prev = _children[j];

                        RegexCharClass prevCharClass;
                        if (prev._type == One)
                        {
                            prevCharClass = new RegexCharClass();
                            prevCharClass.AddChar(prev._ch);
                        }
                        else
                        {
                            prevCharClass = RegexCharClass.Parse(prev._str);
                        }

                        if (at._type == One)
                        {
                            prevCharClass.AddChar(at._ch);
                        }
                        else
                        {
                            RegexCharClass atCharClass = RegexCharClass.Parse(at._str);
                            prevCharClass.AddCharClass(atCharClass);
                        }

                        prev._type = Set;
                        prev._str = prevCharClass.ToStringClass();
                    }
                    else if (at._type == Nothing)
                    {
                        j--;
                    }
                    else
                    {
                        wasLastSet = false;
                        lastNodeCannotMerge = false;
                    }
                    break;
                }
            }

            if (j < i)
                _children.RemoveRange(j, i - j);

            return StripEnation(Nothing);
        }
Exemplo n.º 15
0
        internal string Description()
        {
            var ArgSb = new StringBuilder();

            ArgSb.Append(TypeStr[_type]);

            if ((_options & RegexOptions.ExplicitCapture) != 0)
            {
                ArgSb.Append("-C");
            }
            if ((_options & RegexOptions.IgnoreCase) != 0)
            {
                ArgSb.Append("-I");
            }
            if ((_options & RegexOptions.RightToLeft) != 0)
            {
                ArgSb.Append("-L");
            }
            if ((_options & RegexOptions.Multiline) != 0)
            {
                ArgSb.Append("-M");
            }
            if ((_options & RegexOptions.Singleline) != 0)
            {
                ArgSb.Append("-S");
            }
            if ((_options & RegexOptions.IgnorePatternWhitespace) != 0)
            {
                ArgSb.Append("-X");
            }
            if ((_options & RegexOptions.ECMAScript) != 0)
            {
                ArgSb.Append("-E");
            }

            switch (_type)
            {
            case Oneloop:
            case Notoneloop:
            case Onelazy:
            case Notonelazy:
            case One:
            case Notone:
                ArgSb.Append("(Ch = " + RegexCharClass.CharDescription(_ch) + ")");
                break;

            case Capture:
                ArgSb.Append("(index = " + _m.ToString(CultureInfo.InvariantCulture) + ", unindex = " + _n.ToString(CultureInfo.InvariantCulture) + ")");
                break;

            case Ref:
            case Testref:
                ArgSb.Append("(index = " + _m.ToString(CultureInfo.InvariantCulture) + ")");
                break;

            case Multi:
                ArgSb.Append("(String = " + _str + ")");
                break;

            case Set:
            case Setloop:
            case Setlazy:
                ArgSb.Append("(Set = " + RegexCharClass.SetDescription(_str) + ")");
                break;
            }

            switch (_type)
            {
            case Oneloop:
            case Notoneloop:
            case Onelazy:
            case Notonelazy:
            case Setloop:
            case Setlazy:
            case Loop:
            case Lazyloop:
                ArgSb.Append("(Min = " + _m.ToString(CultureInfo.InvariantCulture) + ", Max = " + (_n == int.MaxValue ? "inf" : Convert.ToString(_n, CultureInfo.InvariantCulture)) + ")");
                break;
            }

            return(ArgSb.ToString());
        }
Exemplo n.º 16
0
        protected override bool FindFirstChar()
        {
            int    i;
            string set;

            if (0 != (_code._anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)))
            {
                if (!_code._rightToLeft)
                {
                    if ((0 != (_code._anchors & RegexFCD.Beginning) && runtextpos > runtextbeg) ||
                        (0 != (_code._anchors & RegexFCD.Start) && runtextpos > runtextstart))
                    {
                        runtextpos = runtextend;
                        return(false);
                    }
                    if (0 != (_code._anchors & RegexFCD.EndZ) && runtextpos < runtextend - 1)
                    {
                        runtextpos = runtextend - 1;
                    }
                    else if (0 != (_code._anchors & RegexFCD.End) && runtextpos < runtextend)
                    {
                        runtextpos = runtextend;
                    }
                }
                else
                {
                    if ((0 != (_code._anchors & RegexFCD.End) && runtextpos < runtextend) ||
                        (0 != (_code._anchors & RegexFCD.EndZ) && (runtextpos < runtextend - 1 ||
                                                                   (runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'))) ||
                        (0 != (_code._anchors & RegexFCD.Start) && runtextpos < runtextstart))
                    {
                        runtextpos = runtextbeg;
                        return(false);
                    }
                    if (0 != (_code._anchors & RegexFCD.Beginning) && runtextpos > runtextbeg)
                    {
                        runtextpos = runtextbeg;
                    }
                }

                if (_code._bmPrefix != null)
                {
                    return(_code._bmPrefix.IsMatch(runtext, runtextpos, runtextbeg, runtextend));
                }

                return(true); // found a valid start or end anchor
            }
            else if (_code._bmPrefix != null)
            {
                runtextpos = _code._bmPrefix.Scan(runtext, runtextpos, runtextbeg, runtextend);

                if (runtextpos == -1)
                {
                    runtextpos = (_code._rightToLeft ? runtextbeg : runtextend);
                    return(false);
                }

                return(true);
            }
            else if (_code._fcPrefix == null)
            {
                return(true);
            }

            _rightToLeft     = _code._rightToLeft;
            _caseInsensitive = _code._fcPrefix.CaseInsensitive;
            set = _code._fcPrefix.Prefix;

            if (RegexCharClass.IsSingleton(set))
            {
                char ch = RegexCharClass.SingletonChar(set);

                for (i = Forwardchars(); i > 0; i--)
                {
                    if (ch == Forwardcharnext())
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }
            else
            {
                for (i = Forwardchars(); i > 0; i--)
                {
                    if (RegexCharClass.CharInClass(Forwardcharnext(), set))
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }
            return(false);
        }
Exemplo n.º 17
0
 protected bool IsECMABoundary(int index, int startpos, int endpos)
 {
     return((index > startpos && RegexCharClass.IsECMAWordChar(runtext[index - 1])) !=
            (index < endpos && RegexCharClass.IsECMAWordChar(runtext[index])));
 }
Exemplo n.º 18
0
        /*
         * Scans chars following a '\' (not counting the '\'), and returns
         * a RegexNode for the type of atom scanned.
         */
        internal RegexNode ScanBackslash()
        {
            char ch;
            RegexCharClass cc;

            if (CharsRight() == 0)
                throw MakeException(SR.IllegalEndEscape);

            switch (ch = RightChar())
            {
                case 'b':
                case 'B':
                case 'A':
                case 'G':
                case 'Z':
                case 'z':
                    MoveRight();
                    return new RegexNode(TypeFromCode(ch), _options);

                case 'w':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMAWordClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.WordClass);

                case 'W':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMAWordClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotWordClass);

                case 's':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMASpaceClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.SpaceClass);

                case 'S':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMASpaceClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotSpaceClass);

                case 'd':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMADigitClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.DigitClass);

                case 'D':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMADigitClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotDigitClass);

                case 'p':
                case 'P':
                    MoveRight();
                    cc = new RegexCharClass();
                    cc.AddCategoryFromName(ParseProperty(), (ch != 'p'), UseOptionI(), _pattern);
                    if (UseOptionI())
                        cc.AddLowercase(_culture);

                    return new RegexNode(RegexNode.Set, _options, cc.ToStringClass());

                default:
                    return ScanBasicBackslash();
            }
        }
Exemplo n.º 19
0
    /*
     * Scans chars following a '(' (not counting the '('), and returns
     * a RegexNode for the type of group scanned, or null if the group
     * simply changed options (?cimsx-cimsx) or was a comment (#...).
     */
    private RegexNode ScanGroupOpen()
    {
        var ch = '\0';
        int NodeType;
        var close = '>';

        // just return a RegexNode if we have:
        // 1. "(" followed by nothing
        // 2. "(x" where x != ?
        // 3. "(?)"
        if (CharsRight() == 0 || RightChar() != '?' || (RightChar() == '?' && (CharsRight() > 1 && RightChar(1) == ')')))
        {
            if (UseOptionN() || _ignoreNextParen)
            {
                _ignoreNextParen = false;
                return(new RegexNode(RegexNode.Group, _options));
            }
            else
            {
                return(new RegexNode(RegexNode.Capture, _options, _autocap++, -1));
            }
        }

        MoveRight();

        for (; ;)
        {
            if (CharsRight() == 0)
            {
                break;
            }

            switch (_ = MoveRightGetChar())
            {
            case ':':
                NodeType = RegexNode.Group;
                break;

            case '=':
                _options &= ~(RegexOptions.RightToLeft);
                NodeType  = RegexNode.Require;
                break;

            case '!':
                _options &= ~(RegexOptions.RightToLeft);
                NodeType  = RegexNode.Prevent;
                break;

            case '>':
                NodeType = RegexNode.Greedy;
                break;

            case '\'':
                close = '\'';
                goto case '<';
            // fallthrough

            case '<':
                if (CharsRight() == 0)
                {
                    goto BreakRecognize;
                }

                switch (ch = MoveRightGetChar())
                {
                case '=':
                    if (close == '\'')
                    {
                        goto BreakRecognize;
                    }

                    _options |= RegexOptions.RightToLeft;
                    NodeType  = RegexNode.Require;
                    break;

                case '!':
                    if (close == '\'')
                    {
                        goto BreakRecognize;
                    }

                    _options |= RegexOptions.RightToLeft;
                    NodeType  = RegexNode.Prevent;
                    break;

                default:
                    MoveLeft();
                    const int capnum   = -1;
                    const int uncapnum = -1;
                    var       proceed  = false;

                    // grab part before -

                    if (ch >= '0' && ch <= '9')
                    {
                        throw MakeException(Strings.BackRefCaptureGroupNotSupported);
                    }
                    else if (RegexCharClass.IsWordChar(ch))
                    {
                        throw MakeException(Strings.BackRefCaptureGroupNotSupported);
                    }
                    else if (ch == '-')
                    {
                        proceed = true;
                    }
                    else
                    {
                        // bad group name - starts with something other than a word character and isn't a number
                        throw MakeException(Strings.InvalidGroupName);
                    }

                    // grab part after - if any

                    if ((capnum != -1 || proceed) && CharsRight() > 0 && RightChar() == '-')
                    {
                        MoveRight();
                        ch = RightChar();

                        if (ch >= '0' && ch <= '9')
                        {
                            throw MakeException(Strings.BackRefCaptureGroupNotSupported);
                        }
                        else if (RegexCharClass.IsWordChar(ch))
                        {
                            throw MakeException(Strings.BackRefCaptureGroupNotSupported);
                        }
                        else
                        {
                            // bad group name - starts with something other than a word character and isn't a number
                            throw MakeException(Strings.InvalidGroupName);
                        }
                    }

                    // actually make the node

                    if ((capnum != -1 || uncapnum != -1) && CharsRight() > 0 && MoveRightGetChar() == close)
                    {
                        return(new RegexNode(RegexNode.Capture, _options, capnum, uncapnum));
                    }
                    goto BreakRecognize;
                }
                break;

            case '(':
                // alternation construct (?(...) | )

                var parenPos = Textpos();
                if (CharsRight() > 0)
                {
                    ch = RightChar();

                    // check if the alternation condition is a backref
                    if (ch >= '0' && ch <= '9')
                    {
                        throw MakeException(Strings.BackRefCaptureGroupNotSupported);
                    }
                    else if (RegexCharClass.IsWordChar(ch))
                    {
                        throw MakeException(Strings.BackRefCaptureGroupNotSupported);
                    }
                }
                // not a backref
                NodeType = RegexNode.Testgroup;
                Textto(parenPos - 1);           // jump to the start of the parentheses
                _ignoreNextParen = true;        // but make sure we don't try to capture the insides

                var charsRight = CharsRight();
                if (charsRight >= 3 && RightChar(1) == '?')
                {
                    var rightchar2 = RightChar(2);
                    // disallow comments in the condition
                    if (rightchar2 == '#')
                    {
                        throw MakeException(Strings.AlternationCantHaveComment);
                    }

                    // disallow named capture group (?<..>..) in the condition
                    if (rightchar2 == '\'')
                    {
                        throw MakeException(Strings.AlternationCantCapture);
                    }
                    else if (charsRight >= 4 && (rightchar2 == '<' && RightChar(3) != '!' && RightChar(3) != '='))
                    {
                        throw MakeException(Strings.AlternationCantCapture);
                    }
                }

                break;

            default:
                MoveLeft();

                NodeType = RegexNode.Group;
                // Disallow options in the children of a testgroup node
                if (_group._type != RegexNode.Testgroup)
                {
                    ScanOptions();
                }
                if (CharsRight() == 0)
                {
                    goto BreakRecognize;
                }

                if ((ch = MoveRightGetChar()) == ')')
                {
                    return(null);
                }

                if (ch != ':')
                {
                    goto BreakRecognize;
                }
                break;
            }

            return(new RegexNode(NodeType, _options));
        }

BreakRecognize:
        // break Recognize comes here
        throw MakeException(Strings.UnrecognizedGrouping);
    }