Beispiel #1
0
        internal RegexFC(string charClass, bool nullable, bool caseInsensitive)
        {
            _cc = RegexCharClass.Parse(charClass);

            _nullable = nullable;
            _caseInsensitive = caseInsensitive;
        }
Beispiel #2
0
 internal RegexFC(string set, bool nullable, bool caseInsensitive)
 {
     this._cc = new RegexCharClass();
     this._cc.AddSet(set);
     this._nullable = nullable;
     this._caseInsensitive = caseInsensitive;
 }
Beispiel #3
0
        internal RegexFC(char ch, bool not, bool nullable, bool caseInsensitive)
        {
            _cc = new RegexCharClass();

            if (not)
            {
                if (ch > 0)
                    _cc.AddRange('\0', (char)(ch - 1));
                if (ch < 0xFFFF)
                    _cc.AddRange((char)(ch + 1), '\uFFFF');
            }
            else
            {
                _cc.AddRange(ch, ch);
            }

            _caseInsensitive = caseInsensitive;
            _nullable = nullable;
        }
 internal RegexFC(char ch, bool not, bool nullable, bool caseInsensitive)
 {
     this._cc = new RegexCharClass();
     if (not)
     {
         if (ch > '\0')
         {
             this._cc.AddRange('\0', (char) (ch - '\x0001'));
         }
         if (ch < 0xffff)
         {
             this._cc.AddRange((char) (ch + '\x0001'), 0xffff);
         }
     }
     else
     {
         this._cc.AddRange(ch, ch);
     }
     this._caseInsensitive = caseInsensitive;
     this._nullable = nullable;
 }
 internal void AddUnitSet(RegexCharClass cc)
 {
     this._unit = new RegexNode(11, this._options, cc.ToSetCi(this.UseOptionI(), this._culture), cc.Category);
 }
Beispiel #6
0
        /// <summary>
        /// Basic optimization. Single-letter alternations can be replaced
        /// by faster set specifications, and nested alternations with no
        /// intervening operators can be flattened:
        ///
        /// a|b|c|def|g|h -> [a-c]|def|[gh]
        /// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
        /// </summary>
        internal RegexNode ReduceAlternation()
        {
            // Combine adjacent sets/chars

            bool         wasLastSet;
            bool         lastNodeCannotMerge;
            RegexOptions optionsLast;
            RegexOptions optionsAt;
            int          i;
            int          j;
            RegexNode    at;
            RegexNode    prev;

            if (_children == null)
            {
                return(new RegexNode(RegexNode.Nothing, _options));
            }

            wasLastSet          = false;
            lastNodeCannotMerge = false;
            optionsLast         = 0;

            for (i = 0, j = 0; i < _children.Count; i++, j++)
            {
                at = _children[i];

                if (j < i)
                {
                    _children[j] = at;
                }

                for (; ;)
                {
                    if (at._type == Alternate)
                    {
                        for (int k = 0; k < at._children.Count; k++)
                        {
                            at._children[k]._next = this;
                        }

                        _children.InsertRange(i + 1, at._children);
                        j--;
                    }
                    else if (at._type == Set || at._type == One)
                    {
                        // Cannot merge sets if L or I options differ, or if either are negated.
                        optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);


                        if (at._type == Set)
                        {
                            if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str))
                            {
                                wasLastSet          = true;
                                lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str);
                                optionsLast         = optionsAt;
                                break;
                            }
                        }
                        else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge)
                        {
                            wasLastSet          = true;
                            lastNodeCannotMerge = false;
                            optionsLast         = optionsAt;
                            break;
                        }


                        // The last node was a Set or a One, we're a Set or One and our options are the same.
                        // Merge the two nodes.
                        j--;
                        prev = _children[j];

                        RegexCharClass prevCharClass;
                        if (prev._type == RegexNode.One)
                        {
                            prevCharClass = new RegexCharClass();
                            prevCharClass.AddChar(prev._ch);
                        }
                        else
                        {
                            prevCharClass = RegexCharClass.Parse(prev._str);
                        }

                        if (at._type == RegexNode.One)
                        {
                            prevCharClass.AddChar(at._ch);
                        }
                        else
                        {
                            RegexCharClass atCharClass = RegexCharClass.Parse(at._str);
                            prevCharClass.AddCharClass(atCharClass);
                        }

                        prev._type = RegexNode.Set;
                        prev._str  = prevCharClass.ToStringClass();
                    }
                    else if (at._type == RegexNode.Nothing)
                    {
                        j--;
                    }
                    else
                    {
                        wasLastSet          = false;
                        lastNodeCannotMerge = false;
                    }
                    break;
                }
            }

            if (j < i)
            {
                _children.RemoveRange(j, i - j);
            }

            return(StripEnation(RegexNode.Nothing));
        }
Beispiel #7
0
        // The top level RegexCode generator. It does a depth-first walk
        // through the tree and calls EmitFragment to emits code before
        // and after each child of an interior node, and at each leaf.
        //
        // It runs two passes, first to count the size of the generated
        // code, and second to generate the code.
        //
        internal RegexCode RegexCodeFromRegexTree(RegexTree tree)
        {
            RegexNode       curNode;
            int             curChild;
            int             capsize;
            RegexPrefix     fcPrefix;
            RegexPrefix     scPrefix;
            RegexPrefix     prefix;
            int             anchors;
            RegexBoyerMoore bmPrefix;
            bool            rtl;

            // construct sparse capnum mapping if some numbers are unused

            if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length)
            {
                capsize = tree._captop;
                _caps   = null;
            }
            else
            {
                capsize = tree._capnumlist.Length;
                _caps   = tree._caps;
                for (int i = 0; i < tree._capnumlist.Length; i++)
                {
                    _caps[tree._capnumlist[i]] = i;
                }
            }

            _counting = true;

            for (;;)
            {
                if (!_counting)
                {
                    _emitted = new int[_count];
                }

                curNode  = tree._root;
                curChild = 0;

                Emit(RegexCode.Lazybranch, 0);

                for (;;)
                {
                    if (curNode._children == null)
                    {
                        EmitFragment(curNode._type, curNode, 0);
                    }
                    else if (curChild < curNode._children.Count)
                    {
                        EmitFragment(curNode._type | BeforeChild, curNode, curChild);

                        curNode = (RegexNode)curNode._children[curChild];
                        PushInt(curChild);
                        curChild = 0;
                        continue;
                    }

                    if (EmptyStack())
                    {
                        break;
                    }

                    curChild = PopInt();
                    curNode  = curNode._next;

                    EmitFragment(curNode._type | AfterChild, curNode, curChild);
                    curChild++;
                }

                PatchJump(0, CurPos());
                Emit(RegexCode.Stop);

                if (!_counting)
                {
                    break;
                }

                _counting = false;
            }

            // if the set of possible first chars is very large,
            // don't bother scanning for it (common case: . == [^\n])

            fcPrefix = RegexFCD.FirstChars(tree);


            if (fcPrefix != null && RegexCharClass.SetSize(fcPrefix.Prefix) > 0)
            {
                fcPrefix = null;
            }

            scPrefix = null; //RegexFCD.ScanChars(tree);
            prefix   = RegexFCD.Prefix(tree);
            rtl      = ((tree._options & RegexOptions.RightToLeft) != 0);

            CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;

            if (prefix != null && prefix.Prefix.Length > 0)
            {
                bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture);
            }
            else
            {
                bmPrefix = null;
            }

            anchors = RegexFCD.Anchors(tree);

            return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, scPrefix, anchors, rtl));
        }
Beispiel #8
0
 internal RegexFC(bool nullable)
 {
     _cc       = new RegexCharClass();
     _nullable = nullable;
 }
 private RegexCharClass(bool negate, List <SingleRange> ranges, StringBuilder categories, RegexCharClass subtraction)
 {
     this._rangelist  = ranges;
     this._categories = categories;
     this._canonical  = true;
     this._negate     = negate;
     this._subtractor = subtraction;
 }
Beispiel #10
0
 internal RegexFC(bool nullable)
 {
     _cc = new RegexCharClass();
     _nullable = nullable;
 }
Beispiel #11
0
        /*
         * Scans chars following a '\' (not counting the '\'), and returns
         * a RegexNode for the type of atom scanned.
         */
        internal RegexNode ScanBackslash()
        {
            char ch;
            RegexCharClass cc;

            if (CharsRight() == 0)
                throw MakeException(SR.IllegalEndEscape);

            switch (ch = RightChar())
            {
                case 'b':
                case 'B':
                case 'A':
                case 'G':
                case 'Z':
                case 'z':
                    MoveRight();
                    return new RegexNode(TypeFromCode(ch), _options);

                case 'w':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMAWordClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.WordClass);

                case 'W':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMAWordClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotWordClass);

                case 's':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMASpaceClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.SpaceClass);

                case 'S':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMASpaceClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotSpaceClass);

                case 'd':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMADigitClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.DigitClass);

                case 'D':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMADigitClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotDigitClass);

                case 'p':
                case 'P':
                    MoveRight();
                    cc = new RegexCharClass();
                    cc.AddCategoryFromName(ParseProperty(), (ch != 'p'), UseOptionI(), _pattern);
                    if (UseOptionI())
                        cc.AddLowercase(_culture);

                    return new RegexNode(RegexNode.Set, _options, cc.ToStringClass());

                default:
                    return ScanBasicBackslash();
            }
        }
Beispiel #12
0
 // Sets the current unit to a single set node
 internal void AddUnitSet(RegexCharClass cc) {
     _unit = new RegexNode(RegexNode.Set, _options, cc.ToSetCi(UseOptionI(), _culture), cc.Category);
 }
Beispiel #13
0
        protected override void Go()
        {
            Goto(0);

            for (;;)
            {
#if DBG
                if (runmatch.Debug)
                {
                    DumpState();
                }
#endif

                switch (Operator())
                {
                case RegexCode.Stop:
                    return;

                case RegexCode.Nothing:
                    break;

                case RegexCode.Goto:
                    Goto(Operand(0));
                    continue;

                case RegexCode.Testref:
                    if (!IsMatched(Operand(0)))
                    {
                        break;
                    }
                    Advance(1);
                    continue;

                case RegexCode.Lazybranch:
                    Track(Textpos());
                    Advance(1);
                    continue;

                case RegexCode.Lazybranch | RegexCode.Back:
                    Trackframe(1);
                    Textto(Tracked(0));
                    Goto(Operand(0));
                    continue;

                case RegexCode.Setmark:
                    Stack(Textpos());
                    Track();
                    Advance();
                    continue;

                case RegexCode.Nullmark:
                    Stack(-1);
                    Track();
                    Advance();
                    continue;

                case RegexCode.Setmark | RegexCode.Back:
                case RegexCode.Nullmark | RegexCode.Back:
                    Stackframe(1);
                    break;

                case RegexCode.Getmark:
                    Stackframe(1);
                    Track(Stacked(0));
                    Textto(Stacked(0));
                    Advance();
                    continue;

                case RegexCode.Getmark | RegexCode.Back:
                    Trackframe(1);
                    Stack(Tracked(0));
                    break;

                case RegexCode.Capturemark:
                    if (Operand(1) != -1 && !IsMatched(Operand(1)))
                    {
                        break;
                    }
                    Stackframe(1);
                    if (Operand(1) != -1)
                    {
                        TransferCapture(Operand(0), Operand(1), Stacked(0), Textpos());
                    }
                    else
                    {
                        Capture(Operand(0), Stacked(0), Textpos());
                    }
                    Track(Stacked(0));

                    Advance(2);

                    /*
                     *
                     */
                    continue;

                case RegexCode.Capturemark | RegexCode.Back:
                    Trackframe(1);
                    Stack(Tracked(0));
                    Uncapture();
                    if (Operand(0) != -1 && Operand(1) != -1)
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Branchmark:
                {
                    int matched;
                    Stackframe(1);

                    matched = Textpos() - Stacked(0);

                    if (matched != 0)                           // Nonempty match -> loop now
                    {
                        Track(Stacked(0), Textpos());           // Save old mark, textpos
                        Stack(Textpos());                       // Make new mark
                        Goto(Operand(0));                       // Loop
                    }
                    else                                        // Empty match -> straight now
                    {
                        Track2(Stacked(0));                     // Save old mark
                        Advance(1);                             // Straight
                    }
                    continue;
                }

                case RegexCode.Branchmark | RegexCode.Back:
                    Trackframe(2);
                    Stackframe(1);
                    Textto(Tracked(1));                         // Recall position
                    Track2(Tracked(0));                         // Save old mark
                    Advance(1);                                 // Straight
                    continue;

                case RegexCode.Branchmark | RegexCode.Back2:
                    Trackframe(1);
                    Stack(Tracked(0));                          // Recall old mark
                    break;                                      // Backtrack

                case RegexCode.Lazybranchmark:
                {
                    int matched;
                    Stackframe(1);

                    matched = Textpos() - Stacked(0);

                    if (matched != 0)                           // Nonempty match -> next loop
                    {
                        Track(Stacked(0), Textpos());           // Save old mark, textpos
                    }
                    else                                        // Empty match -> no loop
                    {
                        Track2(Stacked(0));                     // Save old mark
                    }
                    Advance(1);
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back:
                {
                    int pos;

                    Trackframe(2);
                    pos = Tracked(1);
                    Track2(Tracked(0));                         // Save old mark
                    Stack(pos);                                 // Make new mark
                    Textto(pos);                                // Recall position
                    Goto(Operand(0));                           // Loop
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back2:
                    Stackframe(1);
                    Trackframe(1);
                    Stack(Tracked(0));                          // Recall old mark
                    break;

                case RegexCode.Setcount:
                    Stack(Textpos(), Operand(0));
                    Track();
                    Advance(1);
                    continue;

                case RegexCode.Nullcount:
                    Stack(-1, Operand(0));
                    Track();
                    Advance(1);
                    continue;

                case RegexCode.Setcount | RegexCode.Back:
                    Stackframe(2);
                    break;

                case RegexCode.Nullcount | RegexCode.Back:
                    Stackframe(2);
                    break;

                case RegexCode.Branchcount:
                    // Stack:
                    //  0: Mark
                    //  1: Count
                {
                    Stackframe(2);
                    int mark    = Stacked(0);
                    int count   = Stacked(1);
                    int matched = Textpos() - mark;

                    if (count >= Operand(1) || (matched == 0 && count >= 0)) // Max loops or empty match -> straight now
                    {
                        Track2(mark, count);                                 // Save old mark, count
                        Advance(2);                                          // Straight
                    }
                    else                                                     // Nonempty match -> count+loop now
                    {
                        Track(mark);                                         // remember mark
                        Stack(Textpos(), count + 1);                         // Make new mark, incr count
                        Goto(Operand(0));                                    // Loop
                    }
                    continue;
                }

                case RegexCode.Branchcount | RegexCode.Back:
                    // Track:
                    //  0: Previous mark
                    // Stack:
                    //  0: Mark (= current pos, discarded)
                    //  1: Count
                    Trackframe(1);
                    Stackframe(2);
                    if (Stacked(1) > 0)                         // Positive -> can go straight
                    {
                        Textto(Stacked(0));                     // Zap to mark
                        Track2(Tracked(0), Stacked(1) - 1);     // Save old mark, old count
                        Advance(2);                             // Straight
                        continue;
                    }
                    Stack(Tracked(0), Stacked(1) - 1);          // recall old mark, old count
                    break;

                case RegexCode.Branchcount | RegexCode.Back2:
                    // Track:
                    //  0: Previous mark
                    //  1: Previous count
                    Trackframe(2);
                    Stack(Tracked(0), Tracked(1));              // Recall old mark, old count
                    break;                                      // Backtrack


                case RegexCode.Lazybranchcount:
                    // Stack:
                    //  0: Mark
                    //  1: Count
                {
                    Stackframe(2);
                    int mark  = Stacked(0);
                    int count = Stacked(1);

                    if (count < 0)                              // Negative count -> loop now
                    {
                        Track2(mark);                           // Save old mark
                        Stack(Textpos(), count + 1);            // Make new mark, incr count
                        Goto(Operand(0));                       // Loop
                    }
                    else                                        // Nonneg count -> straight now
                    {
                        Track(mark, count, Textpos());          // Save mark, count, position
                        Advance(2);                             // Straight
                    }
                    continue;
                }

                case RegexCode.Lazybranchcount | RegexCode.Back:
                    // Track:
                    //  0: Mark
                    //  1: Count
                    //  2: Textpos
                {
                    Trackframe(3);
                    int mark    = Tracked(0);
                    int textpos = Tracked(2);
                    if (Tracked(1) <= Operand(1) && textpos != mark) // Under limit and not empty match -> loop
                    {
                        Textto(textpos);                             // Recall position
                        Stack(textpos, Tracked(1) + 1);              // Make new mark, incr count
                        Track2(mark);                                // Save old mark
                        Goto(Operand(0));                            // Loop
                        continue;
                    }
                    else                                            // Max loops or empty match -> backtrack
                    {
                        Stack(Tracked(0), Tracked(1));              // Recall old mark, count
                        break;                                      // backtrack
                    }
                }

                case RegexCode.Lazybranchcount | RegexCode.Back2:
                    // Track:
                    //  0: Previous mark
                    // Stack:
                    //  0: Mark (== current pos, discarded)
                    //  1: Count
                    Trackframe(1);
                    Stackframe(2);
                    Stack(Tracked(0), Stacked(1) - 1);      // Recall old mark, count
                    break;                                  // Backtrack

                case RegexCode.Setjump:
                    Stack(Trackpos(), Crawlpos());
                    Track();
                    Advance();
                    continue;

                case RegexCode.Setjump | RegexCode.Back:
                    Stackframe(2);
                    break;

                case RegexCode.Backjump:
                    // Stack:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    Stackframe(2);
                    Trackto(Stacked(0));

                    while (Crawlpos() != Stacked(1))
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Forejump:
                    // Stack:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    Stackframe(2);
                    Trackto(Stacked(0));
                    Track(Stacked(1));
                    Advance();
                    continue;

                case RegexCode.Forejump | RegexCode.Back:
                    // Track:
                    //  0: Crawlpos
                    Trackframe(1);

                    while (Crawlpos() != Tracked(0))
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Bol:
                    if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n')
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Eol:
                    if (Rightchars() > 0 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Boundary:
                    if (!IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Nonboundary:
                    if (IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.ECMABoundary:
                    if (!IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.NonECMABoundary:
                    if (IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Beginning:
                    if (Leftchars() > 0)
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Start:
                    if (Textpos() != Textstart())
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.EndZ:
                    if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.End:
                    if (Rightchars() > 0)
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.One:
                    if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    Advance(1);
                    continue;

                case RegexCode.Notone:
                    if (Forwardchars() < 1 || Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    Advance(1);
                    continue;

                case RegexCode.Set:
                    if (Forwardchars() < 1 || !RegexCharClass.CharInSet(Forwardcharnext(), runstrings[Operand(0)], runstrings[Operand(1)]))
                    {
                        break;
                    }

                    Advance(2);
                    continue;

                case RegexCode.Multi:
                {
                    if (!Stringmatch(runstrings[Operand(0)]))
                    {
                        break;
                    }

                    Advance(1);
                    continue;
                }

                case RegexCode.Ref:
                {
                    int capnum = Operand(0);

                    if (IsMatched(capnum))
                    {
                        if (!Refmatch(MatchIndex(capnum), MatchLength(capnum)))
                        {
                            break;
                        }
                    }
                    else
                    {
                        if ((runregex.roptions & RegexOptions.ECMAScript) == 0)
                        {
                            break;
                        }
                    }

                    Advance(1);
                    continue;
                }

                case RegexCode.Onerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Notonerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setrep:
                {
                    int c = Operand(2);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    String set = runstrings[Operand(0)];
                    String cat = runstrings[Operand(1)];

                    while (c-- > 0)
                    {
                        if (!RegexCharClass.CharInSet(Forwardcharnext(), set, cat))
                        {
                            goto BreakBackward;
                        }
                    }

                    Advance(3);
                    continue;
                }

                case RegexCode.Oneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        Track(c - i - 1, Textpos() - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Notoneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        Track(c - i - 1, Textpos() - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setloop:
                {
                    int c = Operand(2);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    String set = runstrings[Operand(0)];
                    String cat = runstrings[Operand(1)];
                    int    i;

                    for (i = c; i > 0; i--)
                    {
                        if (!RegexCharClass.CharInSet(Forwardcharnext(), set, cat))
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        Track(c - i - 1, Textpos() - Bump());
                    }

                    Advance(3);
                    continue;
                }

                case RegexCode.Oneloop | RegexCode.Back:
                case RegexCode.Notoneloop | RegexCode.Back:
                {
                    Trackframe(2);
                    int i   = Tracked(0);
                    int pos = Tracked(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        Track(i - 1, pos - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setloop | RegexCode.Back:
                {
                    Trackframe(2);
                    int i   = Tracked(0);
                    int pos = Tracked(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        Track(i - 1, pos - Bump());
                    }

                    Advance(3);
                    continue;
                }

                case RegexCode.Onelazy:
                case RegexCode.Notonelazy:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        Track(c - 1, Textpos());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setlazy:
                {
                    int c = Operand(2);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        Track(c - 1, Textpos());
                    }

                    Advance(3);
                    continue;
                }

                case RegexCode.Onelazy | RegexCode.Back:
                {
                    Trackframe(2);
                    int pos = Tracked(1);
                    Textto(pos);

                    if (Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    int i = Tracked(0);

                    if (i > 0)
                    {
                        Track(i - 1, pos + Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Notonelazy | RegexCode.Back:
                {
                    Trackframe(2);
                    int pos = Tracked(1);
                    Textto(pos);

                    if (Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    int i = Tracked(0);

                    if (i > 0)
                    {
                        Track(i - 1, pos + Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setlazy | RegexCode.Back:
                {
                    Trackframe(2);
                    int pos = Tracked(1);
                    Textto(pos);

                    if (!RegexCharClass.CharInSet(Forwardcharnext(), runstrings[Operand(0)], runstrings[Operand(1)]))
                    {
                        break;
                    }

                    int i = Tracked(0);

                    if (i > 0)
                    {
                        Track(i - 1, pos + Bump());
                    }

                    Advance(3);
                    continue;
                }

                default:
                    throw new NotImplementedException(SR.GetString(SR.UnimplementedState));
                }

BreakBackward:
                ;

                // "break Backward" comes here:
                Backtrack();
            }
        }
Beispiel #14
0
        protected override bool FindFirstChar()
        {
            int    i;
            String set;

            if (0 != (runanchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)))
            {
                if (!runcode._rightToLeft)
                {
                    if ((0 != (runanchors & RegexFCD.Beginning) && runtextpos > runtextbeg) ||
                        (0 != (runanchors & RegexFCD.Start) && runtextpos > runtextstart))
                    {
                        runtextpos = runtextend;
                        return(false);
                    }
                    if (0 != (runanchors & RegexFCD.EndZ) && runtextpos < runtextend - 1)
                    {
                        runtextpos = runtextend - 1;
                    }
                    else if (0 != (runanchors & RegexFCD.End) && runtextpos < runtextend)
                    {
                        runtextpos = runtextend;
                    }
                }
                else
                {
                    if ((0 != (runanchors & RegexFCD.End) && runtextpos < runtextend) ||
                        (0 != (runanchors & RegexFCD.EndZ) && (runtextpos < runtextend - 1 ||
                                                               (runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'))) ||
                        (0 != (runanchors & RegexFCD.Start) && runtextpos < runtextstart))
                    {
                        runtextpos = runtextbeg;
                        return(false);
                    }
                    if (0 != (runanchors & RegexFCD.Beginning) && runtextpos > runtextbeg)
                    {
                        runtextpos = runtextbeg;
                    }
                }

                if (runbmPrefix != null)
                {
                    return(runbmPrefix.IsMatch(runtext, runtextpos, runtextbeg, runtextend));
                }
            }
            else if (runbmPrefix != null)
            {
                runtextpos = runbmPrefix.Scan(runtext, runtextpos, runtextbeg, runtextend);

                if (runtextpos == -1)
                {
                    runtextpos = (runcode._rightToLeft ? runtextbeg : runtextend);
                    return(false);
                }

                return(true);
            }

            if (runfcPrefix == null)
            {
                return(true);
            }

            runrtl = runcode._rightToLeft;
            runci  = runfcPrefix.CaseInsensitive;
            set    = runfcPrefix.Prefix;

            if (RegexCharClass.IsSingleton(set))
            {
                char ch = RegexCharClass.SingletonChar(set);

                for (i = Forwardchars(); i > 0; i--)
                {
                    if (ch == Forwardcharnext())
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }
            else
            {
                for (i = Forwardchars(); i > 0; i--)
                {
                    if (RegexCharClass.CharInSet(Forwardcharnext(), set, String.Empty))
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }
            return(false);
        }
 internal void AddSubtraction(RegexCharClass sub)
 {
     this._subtractor = sub;
 }
Beispiel #16
0
        // AddCharClass()
        //
        // Adds a regex char class
        internal void AddCharClass(RegexCharClass cc) {
            int i;

            if (_canonical && RangeCount() > 0 && cc.RangeCount() > 0 && 
                cc.Range(cc.RangeCount() - 1)._last <= Range(RangeCount() - 1)._last)
                _canonical = false;

            for (i = 0; i < cc.RangeCount(); i += 1) {
                _rangelist.Add(cc.Range(i));
            }

            _categories.Append(cc._categories.ToString());
        }
        /*
         * AddCharClass()
         *
         * Adds a regex char class
         */
        internal void AddCharClass(RegexCharClass cc) {
            int i;

            Debug.Assert(cc.CanMerge && this.CanMerge, "Both character classes added together must be able to merge" );
            
            if (!cc._canonical) {
                // if the new char class to add isn't canonical, we're not either.
                _canonical = false;
            }
            else if (_canonical && RangeCount() > 0 && cc.RangeCount() > 0 && cc.GetRangeAt(0)._first <= GetRangeAt(RangeCount() - 1)._last)
                _canonical = false;

            for (i = 0; i < cc.RangeCount(); i += 1) {
                _rangelist.Add(cc.GetRangeAt(i));
            }

            _categories.Append(cc._categories.ToString());
        }
Beispiel #18
0
 public RegexFC(bool nullable)
 {
     _cc       = new RegexCharClass();
     _nullable = nullable;
 }
Beispiel #19
0
        protected override bool FindFirstChar()
        {
            int    i;
            String set;

            if (0 != (_code._anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)))
            {
                if (!_code._rightToLeft)
                {
                    if ((0 != (_code._anchors & RegexFCD.Beginning) && _runtextpos > _runtextbeg) ||
                        (0 != (_code._anchors & RegexFCD.Start) && _runtextpos > _runtextstart))
                    {
                        _runtextpos = _runtextend;
                        return(false);
                    }
                    if (0 != (_code._anchors & RegexFCD.EndZ) && _runtextpos < _runtextend - 1)
                    {
                        _runtextpos = _runtextend - 1;
                    }
                    else if (0 != (_code._anchors & RegexFCD.End) && _runtextpos < _runtextend)
                    {
                        _runtextpos = _runtextend;
                    }
                }
                else
                {
                    if ((0 != (_code._anchors & RegexFCD.End) && _runtextpos < _runtextend) ||
                        (0 != (_code._anchors & RegexFCD.EndZ) && (_runtextpos < _runtextend - 1 ||
                                                                   (_runtextpos == _runtextend - 1 && CharAt(_runtextpos) != '\n'))) ||
                        (0 != (_code._anchors & RegexFCD.Start) && _runtextpos < _runtextstart))
                    {
                        _runtextpos = _runtextbeg;
                        return(false);
                    }
                    if (0 != (_code._anchors & RegexFCD.Beginning) && _runtextpos > _runtextbeg)
                    {
                        _runtextpos = _runtextbeg;
                    }
                }

                if (_code._bmPrefix != null)
                {
                    return(_code._bmPrefix.IsMatch(_runtext, _runtextpos, _runtextbeg, _runtextend));
                }

                return(true); // found a valid start or end anchor
            }
            else if (_code._bmPrefix != null)
            {
                _runtextpos = _code._bmPrefix.Scan(_runtext, _runtextpos, _runtextbeg, _runtextend);

                if (_runtextpos == -1)
                {
                    _runtextpos = (_code._rightToLeft ? _runtextbeg : _runtextend);
                    return(false);
                }

                return(true);
            }
            else if (_code._fcPrefix == null)
            {
                return(true);
            }

            _rightToLeft     = _code._rightToLeft;
            _caseInsensitive = _code._fcPrefix.CaseInsensitive;
            set = _code._fcPrefix.Prefix;

            if (RegexCharClass.IsSingleton(set))
            {
                char ch = RegexCharClass.SingletonChar(set);

                for (i = Forwardchars(); i > 0; i--)
                {
                    if (ch == Forwardcharnext())
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }
            else
            {
                for (i = Forwardchars(); i > 0; i--)
                {
                    if (RegexCharClass.CharInClass(Forwardcharnext(), set))
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }
            return(false);
        }
        protected override bool FindFirstChar()
        {
            if (0 != (_code.Anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)))
            {
                if (!_code.RightToLeft)
                {
                    if ((0 != (_code.Anchors & RegexFCD.Beginning) && runtextpos > runtextbeg) ||
                        (0 != (_code.Anchors & RegexFCD.Start) && runtextpos > runtextstart))
                    {
                        runtextpos = runtextend;
                        return(false);
                    }
                    if (0 != (_code.Anchors & RegexFCD.EndZ) && runtextpos < runtextend - 1)
                    {
                        runtextpos = runtextend - 1;
                    }
                    else if (0 != (_code.Anchors & RegexFCD.End) && runtextpos < runtextend)
                    {
                        runtextpos = runtextend;
                    }
                }
                else
                {
                    if ((0 != (_code.Anchors & RegexFCD.End) && runtextpos < runtextend) ||
                        (0 != (_code.Anchors & RegexFCD.EndZ) && (runtextpos < runtextend - 1 ||
                                                                  (runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'))) ||
                        (0 != (_code.Anchors & RegexFCD.Start) && runtextpos < runtextstart))
                    {
                        runtextpos = runtextbeg;
                        return(false);
                    }
                    if (0 != (_code.Anchors & RegexFCD.Beginning) && runtextpos > runtextbeg)
                    {
                        runtextpos = runtextbeg;
                    }
                }

                if (_code.BMPrefix != null)
                {
                    return(_code.BMPrefix.IsMatch(runtext, runtextpos, runtextbeg, runtextend));
                }

                return(true); // found a valid start or end anchor
            }
            else if (_code.BMPrefix != null)
            {
                runtextpos = _code.BMPrefix.Scan(runtext, runtextpos, runtextbeg, runtextend);

                if (runtextpos == -1)
                {
                    runtextpos = (_code.RightToLeft ? runtextbeg : runtextend);
                    return(false);
                }

                return(true);
            }
            else if (_code.FCPrefix == null)
            {
                return(true);
            }

            _rightToLeft     = _code.RightToLeft;
            _caseInsensitive = _code.FCPrefix.GetValueOrDefault().CaseInsensitive;
            string set = _code.FCPrefix.GetValueOrDefault().Prefix;

            if (RegexCharClass.IsSingleton(set))
            {
                char ch = RegexCharClass.SingletonChar(set);

                for (int i = Forwardchars(); i > 0; i--)
                {
                    if (ch == Forwardcharnext())
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }
            else
            {
                for (int i = Forwardchars(); i > 0; i--)
                {
                    if (RegexCharClass.CharInClass(Forwardcharnext(), set))
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }

            return(false);
        }
 internal void AddCharClass(RegexCharClass cc)
 {
     if ((this._canonical && (this.RangeCount() > 0)) && ((cc.RangeCount() > 0) && (cc.Range(cc.RangeCount() - 1)._last <= this.Range(this.RangeCount() - 1)._last)))
     {
         this._canonical = false;
     }
     for (int i = 0; i < cc.RangeCount(); i++)
     {
         this._rangelist.Add(cc.Range(i));
     }
     this._categories.Append(cc._categories.ToString());
 }
        protected override void Go()
        {
            Goto(0);

            int advance = -1;

            for (; ;)
            {
                if (advance >= 0)
                {
                    // https://github.com/dotnet/coreclr/pull/14850#issuecomment-342256447
                    // Single common Advance call to reduce method size; and single method inline point
                    Advance(advance);
                    advance = -1;
                }
#if DEBUG
                if (runmatch.Debug)
                {
                    DumpState();
                }
#endif

                CheckTimeout();

                switch (Operator())
                {
                case RegexCode.Stop:
                    return;

                case RegexCode.Nothing:
                    break;

                case RegexCode.Goto:
                    Goto(Operand(0));
                    continue;

                case RegexCode.Testref:
                    if (!IsMatched(Operand(0)))
                    {
                        break;
                    }
                    advance = 1;
                    continue;

                case RegexCode.Lazybranch:
                    TrackPush(Textpos());
                    advance = 1;
                    continue;

                case RegexCode.Lazybranch | RegexCode.Back:
                    TrackPop();
                    Textto(TrackPeek());
                    Goto(Operand(0));
                    continue;

                case RegexCode.Setmark:
                    StackPush(Textpos());
                    TrackPush();
                    advance = 0;
                    continue;

                case RegexCode.Nullmark:
                    StackPush(-1);
                    TrackPush();
                    advance = 0;
                    continue;

                case RegexCode.Setmark | RegexCode.Back:
                case RegexCode.Nullmark | RegexCode.Back:
                    StackPop();
                    break;

                case RegexCode.Getmark:
                    StackPop();
                    TrackPush(StackPeek());
                    Textto(StackPeek());
                    advance = 0;
                    continue;

                case RegexCode.Getmark | RegexCode.Back:
                    TrackPop();
                    StackPush(TrackPeek());
                    break;

                case RegexCode.Capturemark:
                    if (Operand(1) != -1 && !IsMatched(Operand(1)))
                    {
                        break;
                    }
                    StackPop();
                    if (Operand(1) != -1)
                    {
                        TransferCapture(Operand(0), Operand(1), StackPeek(), Textpos());
                    }
                    else
                    {
                        Capture(Operand(0), StackPeek(), Textpos());
                    }
                    TrackPush(StackPeek());

                    advance = 2;

                    continue;

                case RegexCode.Capturemark | RegexCode.Back:
                    TrackPop();
                    StackPush(TrackPeek());
                    Uncapture();
                    if (Operand(0) != -1 && Operand(1) != -1)
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Branchmark:
                {
                    int matched;
                    StackPop();

                    matched = Textpos() - StackPeek();

                    if (matched != 0)
                    {                                      // Nonempty match -> loop now
                        TrackPush(StackPeek(), Textpos()); // Save old mark, textpos
                        StackPush(Textpos());              // Make new mark
                        Goto(Operand(0));                  // Loop
                    }
                    else
                    {                                          // Empty match -> straight now
                        TrackPush2(StackPeek());               // Save old mark
                        advance = 1;                           // Straight
                    }
                    continue;
                }

                case RegexCode.Branchmark | RegexCode.Back:
                    TrackPop(2);
                    StackPop();
                    Textto(TrackPeek(1));                           // Recall position
                    TrackPush2(TrackPeek());                        // Save old mark
                    advance = 1;                                    // Straight
                    continue;

                case RegexCode.Branchmark | RegexCode.Back2:
                    TrackPop();
                    StackPush(TrackPeek());                         // Recall old mark
                    break;                                          // Backtrack

                case RegexCode.Lazybranchmark:
                {
                    // We hit this the first time through a lazy loop and after each
                    // successful match of the inner expression.  It simply continues
                    // on and doesn't loop.
                    StackPop();

                    int oldMarkPos = StackPeek();

                    if (Textpos() != oldMarkPos)
                    {                      // Nonempty match -> try to loop again by going to 'back' state
                        if (oldMarkPos != -1)
                        {
                            TrackPush(oldMarkPos, Textpos());           // Save old mark, textpos
                        }
                        else
                        {
                            TrackPush(Textpos(), Textpos());
                        }
                    }
                    else
                    {
                        // The inner expression found an empty match, so we'll go directly to 'back2' if we
                        // backtrack.  In this case, we need to push something on the stack, since back2 pops.
                        // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
                        // position associated with that empty match.
                        StackPush(oldMarkPos);

                        TrackPush2(StackPeek());                        // Save old mark
                    }
                    advance = 1;
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back:
                {
                    // After the first time, Lazybranchmark | RegexCode.Back occurs
                    // with each iteration of the loop, and therefore with every attempted
                    // match of the inner expression.  We'll try to match the inner expression,
                    // then go back to Lazybranchmark if successful.  If the inner expression
                    // fails, we go to Lazybranchmark | RegexCode.Back2
                    int pos;

                    TrackPop(2);
                    pos = TrackPeek(1);
                    TrackPush2(TrackPeek());                        // Save old mark
                    StackPush(pos);                                 // Make new mark
                    Textto(pos);                                    // Recall position
                    Goto(Operand(0));                               // Loop
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back2:
                    // The lazy loop has failed.  We'll do a true backtrack and
                    // start over before the lazy loop.
                    StackPop();
                    TrackPop();
                    StackPush(TrackPeek());                          // Recall old mark
                    break;

                case RegexCode.Setcount:
                    StackPush(Textpos(), Operand(0));
                    TrackPush();
                    advance = 1;
                    continue;

                case RegexCode.Nullcount:
                    StackPush(-1, Operand(0));
                    TrackPush();
                    advance = 1;
                    continue;

                case RegexCode.Setcount | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Nullcount | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Branchcount:
                    // StackPush:
                    //  0: Mark
                    //  1: Count
                {
                    StackPop(2);
                    int mark    = StackPeek();
                    int count   = StackPeek(1);
                    int matched = Textpos() - mark;

                    if (count >= Operand(1) || (matched == 0 && count >= 0))
                    {                                           // Max loops or empty match -> straight now
                        TrackPush2(mark, count);                // Save old mark, count
                        advance = 2;                            // Straight
                    }
                    else
                    {                                          // Nonempty match -> count+loop now
                        TrackPush(mark);                       // remember mark
                        StackPush(Textpos(), count + 1);       // Make new mark, incr count
                        Goto(Operand(0));                      // Loop
                    }
                    continue;
                }

                case RegexCode.Branchcount | RegexCode.Back:
                    // TrackPush:
                    //  0: Previous mark
                    // StackPush:
                    //  0: Mark (= current pos, discarded)
                    //  1: Count
                    TrackPop();
                    StackPop(2);
                    if (StackPeek(1) > 0)
                    {                                              // Positive -> can go straight
                        Textto(StackPeek());                       // Zap to mark
                        TrackPush2(TrackPeek(), StackPeek(1) - 1); // Save old mark, old count
                        advance = 2;                               // Straight
                        continue;
                    }
                    StackPush(TrackPeek(), StackPeek(1) - 1);           // recall old mark, old count
                    break;

                case RegexCode.Branchcount | RegexCode.Back2:
                    // TrackPush:
                    //  0: Previous mark
                    //  1: Previous count
                    TrackPop(2);
                    StackPush(TrackPeek(), TrackPeek(1));               // Recall old mark, old count
                    break;                                              // Backtrack


                case RegexCode.Lazybranchcount:
                    // StackPush:
                    //  0: Mark
                    //  1: Count
                {
                    StackPop(2);
                    int mark  = StackPeek();
                    int count = StackPeek(1);

                    if (count < 0)
                    {                                    // Negative count -> loop now
                        TrackPush2(mark);                // Save old mark
                        StackPush(Textpos(), count + 1); // Make new mark, incr count
                        Goto(Operand(0));                // Loop
                    }
                    else
                    {                                          // Nonneg count -> straight now
                        TrackPush(mark, count, Textpos());     // Save mark, count, position
                        advance = 2;                           // Straight
                    }
                    continue;
                }

                case RegexCode.Lazybranchcount | RegexCode.Back:
                    // TrackPush:
                    //  0: Mark
                    //  1: Count
                    //  2: Textpos
                {
                    TrackPop(3);
                    int mark    = TrackPeek();
                    int textpos = TrackPeek(2);

                    if (TrackPeek(1) < Operand(1) && textpos != mark)
                    {                                         // Under limit and not empty match -> loop
                        Textto(textpos);                      // Recall position
                        StackPush(textpos, TrackPeek(1) + 1); // Make new mark, incr count
                        TrackPush2(mark);                     // Save old mark
                        Goto(Operand(0));                     // Loop
                        continue;
                    }
                    else
                    {                                                  // Max loops or empty match -> backtrack
                        StackPush(TrackPeek(), TrackPeek(1));          // Recall old mark, count
                        break;                                         // backtrack
                    }
                }

                case RegexCode.Lazybranchcount | RegexCode.Back2:
                    // TrackPush:
                    //  0: Previous mark
                    // StackPush:
                    //  0: Mark (== current pos, discarded)
                    //  1: Count
                    TrackPop();
                    StackPop(2);
                    StackPush(TrackPeek(), StackPeek(1) - 1);       // Recall old mark, count
                    break;                                          // Backtrack

                case RegexCode.Setjump:
                    StackPush(Trackpos(), Crawlpos());
                    TrackPush();
                    advance = 0;
                    continue;

                case RegexCode.Setjump | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Backjump:
                    // StackPush:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    StackPop(2);
                    Trackto(StackPeek());

                    while (Crawlpos() != StackPeek(1))
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Forejump:
                    // StackPush:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    StackPop(2);
                    Trackto(StackPeek());
                    TrackPush(StackPeek(1));
                    advance = 0;
                    continue;

                case RegexCode.Forejump | RegexCode.Back:
                    // TrackPush:
                    //  0: Crawlpos
                    TrackPop();

                    while (Crawlpos() != TrackPeek())
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Bol:
                    if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n')
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Eol:
                    if (Rightchars() > 0 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Boundary:
                    if (!IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Nonboundary:
                    if (IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.ECMABoundary:
                    if (!IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.NonECMABoundary:
                    if (IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Beginning:
                    if (Leftchars() > 0)
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Start:
                    if (Textpos() != Textstart())
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.EndZ:
                    if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.End:
                    if (Rightchars() > 0)
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.One:
                    if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    advance = 1;
                    continue;

                case RegexCode.Notone:
                    if (Forwardchars() < 1 || Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    advance = 1;
                    continue;

                case RegexCode.Set:
                    if (Forwardchars() < 1 || !RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[Operand(0)]))
                    {
                        break;
                    }

                    advance = 1;
                    continue;

                case RegexCode.Multi:
                {
                    if (!Stringmatch(_code.Strings[Operand(0)]))
                    {
                        break;
                    }

                    advance = 1;
                    continue;
                }

                case RegexCode.Ref:
                {
                    int capnum = Operand(0);

                    if (IsMatched(capnum))
                    {
                        if (!Refmatch(MatchIndex(capnum), MatchLength(capnum)))
                        {
                            break;
                        }
                    }
                    else
                    {
                        if ((runregex.roptions & RegexOptions.ECMAScript) == 0)
                        {
                            break;
                        }
                    }

                    advance = 1;
                    continue;
                }

                case RegexCode.Onerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Notonerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setrep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    string set = _code.Strings[Operand(0)];

                    while (c-- > 0)
                    {
                        if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
                        {
                            goto BreakBackward;
                        }
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Oneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Notoneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    string set = _code.Strings[Operand(0)];
                    int    i;

                    for (i = c; i > 0; i--)
                    {
                        if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Oneloop | RegexCode.Back:
                case RegexCode.Notoneloop | RegexCode.Back:
                {
                    TrackPop(2);
                    int i   = TrackPeek();
                    int pos = TrackPeek(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setloop | RegexCode.Back:
                {
                    TrackPop(2);
                    int i   = TrackPeek();
                    int pos = TrackPeek(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Onelazy:
                case RegexCode.Notonelazy:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        TrackPush(c - 1, Textpos());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setlazy:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        TrackPush(c - 1, Textpos());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Onelazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Notonelazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setlazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (!RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[Operand(0)]))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    advance = 2;
                    continue;
                }

                default:
                    throw NotImplemented.ByDesignWithMessage(SR.UnimplementedState);
                }

BreakBackward:
                ;

                // "break Backward" comes here:
                Backtrack();
            }
        }
Beispiel #23
0
        /*
         * This is a related computation: it takes a RegexTree and computes the
         * leading []* construct if it see one. It's quite trivial and gives up easily.
         */
        internal static RegexPrefix ScanChars(RegexTree tree)
        {
            RegexNode curNode;
            RegexNode concatNode      = null;
            int       nextChild       = 0;
            String    foundSet        = null;
            bool      caseInsensitive = false;

            curNode = tree._root;

            for (;;)
            {
                switch (curNode._type)
                {
                case RegexNode.Concatenate:
                    if (curNode.ChildCount() > 0)
                    {
                        concatNode = curNode;
                        nextChild  = 0;
                    }
                    break;

                case RegexNode.Greedy:
                case RegexNode.Capture:
                    curNode    = curNode.Child(0);
                    concatNode = null;
                    continue;

                case RegexNode.Bol:
                case RegexNode.Eol:
                case RegexNode.Boundary:
#if ECMA
                case RegexNode.ECMABoundary:
#endif
                case RegexNode.Beginning:
                case RegexNode.Start:
                case RegexNode.EndZ:
                case RegexNode.End:
                case RegexNode.Empty:
                case RegexNode.Require:
                case RegexNode.Prevent:
                    break;

                case RegexNode.Oneloop:
                case RegexNode.Onelazy:
                    if (curNode._n != infinite)
                    {
                        return(null);
                    }

                    foundSet        = RegexCharClass.SetFromChar(curNode._ch);
                    caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase));
                    break;

                case RegexNode.Notoneloop:
                case RegexNode.Notonelazy:
                    if (curNode._n != infinite)
                    {
                        return(null);
                    }

                    foundSet        = RegexCharClass.SetInverseFromChar(curNode._ch);
                    caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase));
                    break;

                case RegexNode.Setloop:
                case RegexNode.Setlazy:
                    if (curNode._n != infinite || (curNode._str2 != null && curNode._str2.Length != 0))
                    {
                        return(null);
                    }

                    foundSet        = curNode._str;
                    caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase));
                    break;

                default:
                    return(null);
                }

                if (foundSet != null)
                {
                    return(new RegexPrefix(foundSet, caseInsensitive));
                }

                if (concatNode == null || nextChild >= concatNode.ChildCount())
                {
                    return(null);
                }

                curNode = concatNode.Child(nextChild++);
            }
        }
 internal RegexFC(string charClass, bool nullable, bool caseInsensitive)
 {
     this._cc              = RegexCharClass.Parse(charClass);
     this._nullable        = nullable;
     this._caseInsensitive = caseInsensitive;
 }
 internal RegexNode ReduceAlternation()
 {
     if (this._children == null)
     {
         return new RegexNode(0x16, this._options);
     }
     bool flag = false;
     bool flag2 = false;
     RegexOptions none = RegexOptions.None;
     int num = 0;
     int index = 0;
     while (num < this._children.Count)
     {
         RegexCharClass class2;
         RegexNode node = this._children[num];
         if (index < num)
         {
             this._children[index] = node;
         }
         if (node._type == 0x18)
         {
             for (int i = 0; i < node._children.Count; i++)
             {
                 node._children[i]._next = this;
             }
             this._children.InsertRange(num + 1, node._children);
             index--;
             goto Label_01C2;
         }
         if ((node._type != 11) && (node._type != 9))
         {
             goto Label_01AB;
         }
         RegexOptions options2 = node._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);
         if (node._type == 11)
         {
             if ((flag && (none == options2)) && (!flag2 && RegexCharClass.IsMergeable(node._str)))
             {
                 goto Label_011B;
             }
             flag = true;
             flag2 = !RegexCharClass.IsMergeable(node._str);
             none = options2;
             goto Label_01C2;
         }
         if ((!flag || (none != options2)) || flag2)
         {
             flag = true;
             flag2 = false;
             none = options2;
             goto Label_01C2;
         }
     Label_011B:
         index--;
         RegexNode node2 = this._children[index];
         if (node2._type == 9)
         {
             class2 = new RegexCharClass();
             class2.AddChar(node2._ch);
         }
         else
         {
             class2 = RegexCharClass.Parse(node2._str);
         }
         if (node._type == 9)
         {
             class2.AddChar(node._ch);
         }
         else
         {
             RegexCharClass cc = RegexCharClass.Parse(node._str);
             class2.AddCharClass(cc);
         }
         node2._type = 11;
         node2._str = class2.ToStringClass();
         goto Label_01C2;
     Label_01AB:
         if (node._type == 0x16)
         {
             index--;
         }
         else
         {
             flag = false;
             flag2 = false;
         }
     Label_01C2:
         num++;
         index++;
     }
     if (index < num)
     {
         this._children.RemoveRange(index, num - index);
     }
     return this.StripEnation(0x16);
 }
        protected override void Go()
        {
            this.Goto(0);
Label_0007:
            switch (this.Operator())
            {
            case 0:
            {
                int num12 = this.Operand(1);
                if (this.Forwardchars() < num12)
                {
                    goto Label_0EA3;
                }
                char ch = (char)this.Operand(0);
                while (num12-- > 0)
                {
                    if (this.Forwardcharnext() != ch)
                    {
                        goto Label_0EA3;
                    }
                }
                this.Advance(2);
                goto Label_0007;
            }

            case 1:
            {
                int num13 = this.Operand(1);
                if (this.Forwardchars() < num13)
                {
                    goto Label_0EA3;
                }
                char ch2 = (char)this.Operand(0);
                while (num13-- > 0)
                {
                    if (this.Forwardcharnext() == ch2)
                    {
                        goto Label_0EA3;
                    }
                }
                this.Advance(2);
                goto Label_0007;
            }

            case 2:
            {
                int num14 = this.Operand(2);
                if (this.Forwardchars() < num14)
                {
                    goto Label_0EA3;
                }
                string set      = this.runstrings[this.Operand(0)];
                string category = this.runstrings[this.Operand(1)];
                while (num14-- > 0)
                {
                    if (!RegexCharClass.CharInSet(this.Forwardcharnext(), set, category))
                    {
                        goto Label_0EA3;
                    }
                }
                this.Advance(3);
                goto Label_0007;
            }

            case 3:
            {
                int num15 = this.Operand(1);
                if (num15 > this.Forwardchars())
                {
                    num15 = this.Forwardchars();
                }
                char ch3   = (char)this.Operand(0);
                int  num16 = num15;
                while (num16 > 0)
                {
                    if (this.Forwardcharnext() != ch3)
                    {
                        this.Backwardnext();
                        break;
                    }
                    num16--;
                }
                if (num15 > num16)
                {
                    this.Track((num15 - num16) - 1, this.Textpos() - this.Bump());
                }
                this.Advance(2);
                goto Label_0007;
            }

            case 4:
            {
                int num17 = this.Operand(1);
                if (num17 > this.Forwardchars())
                {
                    num17 = this.Forwardchars();
                }
                char ch4   = (char)this.Operand(0);
                int  num18 = num17;
                while (num18 > 0)
                {
                    if (this.Forwardcharnext() == ch4)
                    {
                        this.Backwardnext();
                        break;
                    }
                    num18--;
                }
                if (num17 > num18)
                {
                    this.Track((num17 - num18) - 1, this.Textpos() - this.Bump());
                }
                this.Advance(2);
                goto Label_0007;
            }

            case 5:
            {
                int num19 = this.Operand(2);
                if (num19 > this.Forwardchars())
                {
                    num19 = this.Forwardchars();
                }
                string str3  = this.runstrings[this.Operand(0)];
                string str4  = this.runstrings[this.Operand(1)];
                int    num20 = num19;
                while (num20 > 0)
                {
                    if (!RegexCharClass.CharInSet(this.Forwardcharnext(), str3, str4))
                    {
                        this.Backwardnext();
                        break;
                    }
                    num20--;
                }
                if (num19 > num20)
                {
                    this.Track((num19 - num20) - 1, this.Textpos() - this.Bump());
                }
                this.Advance(3);
                goto Label_0007;
            }

            case 6:
            case 7:
            {
                int num25 = this.Operand(1);
                if (num25 > this.Forwardchars())
                {
                    num25 = this.Forwardchars();
                }
                if (num25 > 0)
                {
                    this.Track(num25 - 1, this.Textpos());
                }
                this.Advance(2);
                goto Label_0007;
            }

            case 8:
            {
                int num26 = this.Operand(2);
                if (num26 > this.Forwardchars())
                {
                    num26 = this.Forwardchars();
                }
                if (num26 > 0)
                {
                    this.Track(num26 - 1, this.Textpos());
                }
                this.Advance(3);
                goto Label_0007;
            }

            case 9:
                if ((this.Forwardchars() < 1) || (this.Forwardcharnext() != ((char)this.Operand(0))))
                {
                    goto Label_0EA3;
                }
                this.Advance(1);
                goto Label_0007;

            case 10:
                if ((this.Forwardchars() < 1) || (this.Forwardcharnext() == ((char)this.Operand(0))))
                {
                    goto Label_0EA3;
                }
                this.Advance(1);
                goto Label_0007;

            case 11:
                if ((this.Forwardchars() < 1) || !RegexCharClass.CharInSet(this.Forwardcharnext(), this.runstrings[this.Operand(0)], this.runstrings[this.Operand(1)]))
                {
                    goto Label_0EA3;
                }
                this.Advance(2);
                goto Label_0007;

            case 12:
                if (!this.Stringmatch(this.runstrings[this.Operand(0)]))
                {
                    goto Label_0EA3;
                }
                this.Advance(1);
                goto Label_0007;

            case 13:
            {
                int cap = this.Operand(0);
                if (!base.IsMatched(cap))
                {
                    if ((base.runregex.roptions & RegexOptions.ECMAScript) == RegexOptions.None)
                    {
                        goto Label_0EA3;
                    }
                    goto Label_0A06;
                }
                if (this.Refmatch(base.MatchIndex(cap), base.MatchLength(cap)))
                {
                    goto Label_0A06;
                }
                goto Label_0EA3;
            }

            case 14:
                if ((this.Leftchars() > 0) && (this.CharAt(this.Textpos() - 1) != '\n'))
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 15:
                if ((this.Rightchars() > 0) && (this.CharAt(this.Textpos()) != '\n'))
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 0x10:
                if (!base.IsBoundary(this.Textpos(), base.runtextbeg, base.runtextend))
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 0x11:
                if (base.IsBoundary(this.Textpos(), base.runtextbeg, base.runtextend))
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 0x12:
                if (this.Leftchars() > 0)
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 0x13:
                if (this.Textpos() != this.Textstart())
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 20:
                if ((this.Rightchars() > 1) || ((this.Rightchars() == 1) && (this.CharAt(this.Textpos()) != '\n')))
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 0x15:
                if (this.Rightchars() > 0)
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 0x16:
                goto Label_0EA3;

            case 0x17:
                this.Track(this.Textpos());
                this.Advance(1);
                goto Label_0007;

            case 0x18:
                this.Stackframe(1);
                if ((this.Textpos() - this.Stacked(0)) == 0)
                {
                    this.Track2(this.Stacked(0));
                    this.Advance(1);
                }
                else
                {
                    this.Track(this.Stacked(0), this.Textpos());
                    this.Stack(this.Textpos());
                    this.Goto(this.Operand(0));
                }
                goto Label_0007;

            case 0x19:
                this.Stackframe(1);
                if ((this.Textpos() - this.Stacked(0)) == 0)
                {
                    this.Track2(this.Stacked(0));
                    break;
                }
                this.Track(this.Stacked(0), this.Textpos());
                break;

            case 0x1a:
                this.Stack(-1, this.Operand(0));
                this.Track();
                this.Advance(1);
                goto Label_0007;

            case 0x1b:
                this.Stack(this.Textpos(), this.Operand(0));
                this.Track();
                this.Advance(1);
                goto Label_0007;

            case 0x1c:
            {
                this.Stackframe(2);
                int num4 = this.Stacked(0);
                int num5 = this.Stacked(1);
                int num6 = this.Textpos() - num4;
                if ((num5 < this.Operand(1)) && ((num6 != 0) || (num5 < 0)))
                {
                    this.Track(num4);
                    this.Stack(this.Textpos(), num5 + 1);
                    this.Goto(this.Operand(0));
                }
                else
                {
                    this.Track2(num4, num5);
                    this.Advance(2);
                }
                goto Label_0007;
            }

            case 0x1d:
            {
                this.Stackframe(2);
                int num7 = this.Stacked(0);
                int num8 = this.Stacked(1);
                if (num8 >= 0)
                {
                    this.Track(num7, num8, this.Textpos());
                    this.Advance(2);
                }
                else
                {
                    this.Track2(num7);
                    this.Stack(this.Textpos(), num8 + 1);
                    this.Goto(this.Operand(0));
                }
                goto Label_0007;
            }

            case 30:
                this.Stack(-1);
                this.Track();
                this.Advance();
                goto Label_0007;

            case 0x1f:
                this.Stack(this.Textpos());
                this.Track();
                this.Advance();
                goto Label_0007;

            case 0x20:
                if ((this.Operand(1) != -1) && !base.IsMatched(this.Operand(1)))
                {
                    goto Label_0EA3;
                }
                this.Stackframe(1);
                if (this.Operand(1) != -1)
                {
                    base.TransferCapture(this.Operand(0), this.Operand(1), this.Stacked(0), this.Textpos());
                }
                else
                {
                    base.Capture(this.Operand(0), this.Stacked(0), this.Textpos());
                }
                this.Track(this.Stacked(0));
                this.Advance(2);
                goto Label_0007;

            case 0x21:
                this.Stackframe(1);
                this.Track(this.Stacked(0));
                this.Textto(this.Stacked(0));
                this.Advance();
                goto Label_0007;

            case 0x22:
                this.Stack(this.Trackpos(), base.Crawlpos());
                this.Track();
                this.Advance();
                goto Label_0007;

            case 0x23:
                this.Stackframe(2);
                this.Trackto(this.Stacked(0));
                while (base.Crawlpos() != this.Stacked(1))
                {
                    base.Uncapture();
                }
                goto Label_0EA3;

            case 0x24:
                this.Stackframe(2);
                this.Trackto(this.Stacked(0));
                this.Track(this.Stacked(1));
                this.Advance();
                goto Label_0007;

            case 0x25:
                if (!base.IsMatched(this.Operand(0)))
                {
                    goto Label_0EA3;
                }
                this.Advance(1);
                goto Label_0007;

            case 0x26:
                this.Goto(this.Operand(0));
                goto Label_0007;

            case 40:
                return;

            case 0x29:
                if (!base.IsECMABoundary(this.Textpos(), base.runtextbeg, base.runtextend))
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 0x2a:
                if (base.IsECMABoundary(this.Textpos(), base.runtextbeg, base.runtextend))
                {
                    goto Label_0EA3;
                }
                this.Advance();
                goto Label_0007;

            case 0x83:
            case 0x84:
            {
                this.Trackframe(2);
                int num21  = this.Tracked(0);
                int newpos = this.Tracked(1);
                this.Textto(newpos);
                if (num21 > 0)
                {
                    this.Track(num21 - 1, newpos - this.Bump());
                }
                this.Advance(2);
                goto Label_0007;
            }

            case 0x85:
            {
                this.Trackframe(2);
                int num23 = this.Tracked(0);
                int num24 = this.Tracked(1);
                this.Textto(num24);
                if (num23 > 0)
                {
                    this.Track(num23 - 1, num24 - this.Bump());
                }
                this.Advance(3);
                goto Label_0007;
            }

            case 0x86:
            {
                this.Trackframe(2);
                int num27 = this.Tracked(1);
                this.Textto(num27);
                if (this.Forwardcharnext() != ((char)this.Operand(0)))
                {
                    goto Label_0EA3;
                }
                int num28 = this.Tracked(0);
                if (num28 > 0)
                {
                    this.Track(num28 - 1, num27 + this.Bump());
                }
                this.Advance(2);
                goto Label_0007;
            }

            case 0x87:
            {
                this.Trackframe(2);
                int num29 = this.Tracked(1);
                this.Textto(num29);
                if (this.Forwardcharnext() == ((char)this.Operand(0)))
                {
                    goto Label_0EA3;
                }
                int num30 = this.Tracked(0);
                if (num30 > 0)
                {
                    this.Track(num30 - 1, num29 + this.Bump());
                }
                this.Advance(2);
                goto Label_0007;
            }

            case 0x88:
            {
                this.Trackframe(2);
                int num31 = this.Tracked(1);
                this.Textto(num31);
                if (!RegexCharClass.CharInSet(this.Forwardcharnext(), this.runstrings[this.Operand(0)], this.runstrings[this.Operand(1)]))
                {
                    goto Label_0EA3;
                }
                int num32 = this.Tracked(0);
                if (num32 > 0)
                {
                    this.Track(num32 - 1, num31 + this.Bump());
                }
                this.Advance(3);
                goto Label_0007;
            }

            case 0x97:
                this.Trackframe(1);
                this.Textto(this.Tracked(0));
                this.Goto(this.Operand(0));
                goto Label_0007;

            case 0x98:
                this.Trackframe(2);
                this.Stackframe(1);
                this.Textto(this.Tracked(1));
                this.Track2(this.Tracked(0));
                this.Advance(1);
                goto Label_0007;

            case 0x99:
            {
                this.Trackframe(2);
                int num3 = this.Tracked(1);
                this.Track2(this.Tracked(0));
                this.Stack(num3);
                this.Textto(num3);
                this.Goto(this.Operand(0));
                goto Label_0007;
            }

            case 0x9a:
                this.Stackframe(2);
                goto Label_0EA3;

            case 0x9b:
                this.Stackframe(2);
                goto Label_0EA3;

            case 0x9c:
                this.Trackframe(1);
                this.Stackframe(2);
                if (this.Stacked(1) <= 0)
                {
                    this.Stack(this.Tracked(0), this.Stacked(1) - 1);
                    goto Label_0EA3;
                }
                this.Textto(this.Stacked(0));
                this.Track2(this.Tracked(0), this.Stacked(1) - 1);
                this.Advance(2);
                goto Label_0007;

            case 0x9d:
            {
                this.Trackframe(3);
                int num9  = this.Tracked(0);
                int num10 = this.Tracked(2);
                if ((this.Tracked(1) > this.Operand(1)) || (num10 == num9))
                {
                    this.Stack(this.Tracked(0), this.Tracked(1));
                    goto Label_0EA3;
                }
                this.Textto(num10);
                this.Stack(num10, this.Tracked(1) + 1);
                this.Track2(num9);
                this.Goto(this.Operand(0));
                goto Label_0007;
            }

            case 0x9e:
            case 0x9f:
                this.Stackframe(1);
                goto Label_0EA3;

            case 160:
                this.Trackframe(1);
                this.Stack(this.Tracked(0));
                base.Uncapture();
                if ((this.Operand(0) != -1) && (this.Operand(1) != -1))
                {
                    base.Uncapture();
                }
                goto Label_0EA3;

            case 0xa1:
                this.Trackframe(1);
                this.Stack(this.Tracked(0));
                goto Label_0EA3;

            case 0xa2:
                this.Stackframe(2);
                goto Label_0EA3;

            case 0xa4:
                this.Trackframe(1);
                while (base.Crawlpos() != this.Tracked(0))
                {
                    base.Uncapture();
                }
                goto Label_0EA3;

            case 280:
                this.Trackframe(1);
                this.Stack(this.Tracked(0));
                goto Label_0EA3;

            case 0x119:
                this.Stackframe(1);
                this.Trackframe(1);
                this.Stack(this.Tracked(0));
                goto Label_0EA3;

            case 0x11c:
                this.Trackframe(2);
                this.Stack(this.Tracked(0), this.Tracked(1));
                goto Label_0EA3;

            case 0x11d:
                this.Trackframe(1);
                this.Stackframe(2);
                this.Stack(this.Tracked(0), this.Stacked(1) - 1);
                goto Label_0EA3;

            default:
                throw new Exception(RegExRes.GetString(3));
            }
            this.Advance(1);
            goto Label_0007;
Label_0A06:
            this.Advance(1);
            goto Label_0007;
Label_0EA3:
            this.Backtrack();
            goto Label_0007;
        }
Beispiel #27
0
        internal String OpcodeDescription(int offset)
        {
            StringBuilder sb     = new StringBuilder();
            int           opcode = _codes[offset];

            sb.AppendFormat("{0:D6} ", offset);
            sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' ');
            sb.Append(OperatorDescription(opcode));
            sb.Append('(');

            opcode &= Mask;

            switch (opcode)
            {
            case One:
            case Notone:
            case Onerep:
            case Notonerep:
            case Oneloop:
            case Notoneloop:
            case Onelazy:
            case Notonelazy:
                sb.Append("Ch = ");
                sb.Append(RegexCharClass.CharDescription((char)_codes[offset + 1]));
                break;

            case Set:
            case Setrep:
            case Setloop:
            case Setlazy:
                sb.Append("Set = ");
                sb.Append(RegexCharClass.SetDescription(_strings[_codes[offset + 1]]));
                break;

            case Multi:
                sb.Append("String = ");
                sb.Append(_strings[_codes[offset + 1]]);
                break;

            case Ref:
            case Testref:
                sb.Append("Index = ");
                sb.Append(_codes[offset + 1]);
                break;

            case Capturemark:
                sb.Append("Index = ");
                sb.Append(_codes[offset + 1]);
                if (_codes[offset + 2] != -1)
                {
                    sb.Append(", Unindex = ");
                    sb.Append(_codes[offset + 2]);
                }
                break;

            case Nullcount:
            case Setcount:
                sb.Append("Value = ");
                sb.Append(_codes[offset + 1]);
                break;

            case Goto:
            case Lazybranch:
            case Branchmark:
            case Lazybranchmark:
            case Branchcount:
            case Lazybranchcount:
                sb.Append("Addr = ");
                sb.Append(_codes[offset + 1]);
                break;
            }

            switch (opcode)
            {
            case Onerep:
            case Notonerep:
            case Oneloop:
            case Notoneloop:
            case Onelazy:
            case Notonelazy:
            case Setrep:
            case Setloop:
            case Setlazy:
                sb.Append(", Rep = ");
                if (_codes[offset + 2] == Int32.MaxValue)
                {
                    sb.Append("inf");
                }
                else
                {
                    sb.Append(_codes[offset + 2]);
                }
                break;

            case Branchcount:
            case Lazybranchcount:
                sb.Append(", Limit = ");
                if (_codes[offset + 2] == Int32.MaxValue)
                {
                    sb.Append("inf");
                }
                else
                {
                    sb.Append(_codes[offset + 2]);
                }
                break;
            }

            sb.Append(')');

            return(sb.ToString());
        }
        protected override bool FindFirstChar()
        {
            int num;

            if ((this.runanchors & 0x35) != 0)
            {
                if (!this.runcode._rightToLeft)
                {
                    if ((((this.runanchors & 1) != 0) && (base.runtextpos > base.runtextbeg)) || (((this.runanchors & 4) != 0) && (base.runtextpos > base.runtextstart)))
                    {
                        base.runtextpos = base.runtextend;
                        return(false);
                    }
                    if (((this.runanchors & 0x10) != 0) && (base.runtextpos < (base.runtextend - 1)))
                    {
                        base.runtextpos = base.runtextend - 1;
                    }
                    else if (((this.runanchors & 0x20) != 0) && (base.runtextpos < base.runtextend))
                    {
                        base.runtextpos = base.runtextend;
                    }
                }
                else
                {
                    if (((((this.runanchors & 0x20) != 0) && (base.runtextpos < base.runtextend)) || (((this.runanchors & 0x10) != 0) && ((base.runtextpos < (base.runtextend - 1)) || ((base.runtextpos == (base.runtextend - 1)) && (this.CharAt(base.runtextpos) != '\n'))))) || (((this.runanchors & 4) != 0) && (base.runtextpos < base.runtextstart)))
                    {
                        base.runtextpos = base.runtextbeg;
                        return(false);
                    }
                    if (((this.runanchors & 1) != 0) && (base.runtextpos > base.runtextbeg))
                    {
                        base.runtextpos = base.runtextbeg;
                    }
                }
                if (this.runbmPrefix != null)
                {
                    return(this.runbmPrefix.IsMatch(base.runtext, base.runtextpos, base.runtextbeg, base.runtextend));
                }
            }
            else if (this.runbmPrefix != null)
            {
                base.runtextpos = this.runbmPrefix.Scan(base.runtext, base.runtextpos, base.runtextbeg, base.runtextend);
                if (base.runtextpos == -1)
                {
                    base.runtextpos = this.runcode._rightToLeft ? base.runtextbeg : base.runtextend;
                    return(false);
                }
                return(true);
            }
            if (this.runfcPrefix == null)
            {
                return(true);
            }
            this.runrtl = this.runcode._rightToLeft;
            this.runci  = this.runfcPrefix.CaseInsensitive;
            string prefix = this.runfcPrefix.Prefix;

            if (RegexCharClass.IsSingleton(prefix))
            {
                char ch = RegexCharClass.SingletonChar(prefix);
                for (num = this.Forwardchars(); num > 0; num--)
                {
                    if (ch == this.Forwardcharnext())
                    {
                        this.Backwardnext();
                        return(true);
                    }
                }
            }
            else
            {
                for (num = this.Forwardchars(); num > 0; num--)
                {
                    if (RegexCharClass.CharInSet(this.Forwardcharnext(), prefix, string.Empty))
                    {
                        this.Backwardnext();
                        return(true);
                    }
                }
            }
            return(false);
        }
Beispiel #29
0
        internal String Description()
        {
            StringBuilder ArgSb = new StringBuilder();

            ArgSb.Append(TypeStr[_type]);

            if ((_options & RegexOptions.ExplicitCapture) != 0)
            {
                ArgSb.Append("-C");
            }
            if ((_options & RegexOptions.IgnoreCase) != 0)
            {
                ArgSb.Append("-I");
            }
            if ((_options & RegexOptions.RightToLeft) != 0)
            {
                ArgSb.Append("-L");
            }
            if ((_options & RegexOptions.Multiline) != 0)
            {
                ArgSb.Append("-M");
            }
            if ((_options & RegexOptions.Singleline) != 0)
            {
                ArgSb.Append("-S");
            }
            if ((_options & RegexOptions.IgnorePatternWhitespace) != 0)
            {
                ArgSb.Append("-X");
            }
            if ((_options & RegexOptions.ECMAScript) != 0)
            {
                ArgSb.Append("-E");
            }

            switch (_type)
            {
            case Oneloop:
            case Notoneloop:
            case Onelazy:
            case Notonelazy:
            case One:
            case Notone:
                ArgSb.Append("(Ch = " + RegexCharClass.CharDescription(_ch) + ")");
                break;

            case Capture:
                ArgSb.Append("(index = " + _m.ToString(CultureInfo.InvariantCulture) + ", unindex = " + _n.ToString(CultureInfo.InvariantCulture) + ")");
                break;

            case Ref:
            case Testref:
                ArgSb.Append("(index = " + _m.ToString(CultureInfo.InvariantCulture) + ")");
                break;

            case Multi:
                ArgSb.Append("(String = " + _str + ")");
                break;

            case Set:
            case Setloop:
            case Setlazy:
                ArgSb.Append("(Set = " + RegexCharClass.SetDescription(_str) + ")");
                break;
            }

            switch (_type)
            {
            case Oneloop:
            case Notoneloop:
            case Onelazy:
            case Notonelazy:
            case Setloop:
            case Setlazy:
            case Loop:
            case Lazyloop:
                ArgSb.Append("(Min = " + _m.ToString(CultureInfo.InvariantCulture) + ", Max = " + (_n == Int32.MaxValue ? "inf" : Convert.ToString(_n, CultureInfo.InvariantCulture)) + ")");
                break;
            }

            return(ArgSb.ToString());
        }
 internal RegexFC(bool nullable)
 {
     this._cc       = new RegexCharClass();
     this._nullable = nullable;
 }
Beispiel #31
0
        internal static string OpcodeDescription(int offset, int[] codes, string[] strings)
        {
            var sb     = new StringBuilder();
            int opcode = codes[offset];

            sb.Append($"{offset:D6} ");
            sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' ');
            sb.Append(OperatorDescription(opcode));

            opcode &= Mask;

            switch (opcode)
            {
            case One:
            case Notone:
            case Onerep:
            case Notonerep:
            case Oneloop:
            case Oneloopatomic:
            case Notoneloop:
            case Notoneloopatomic:
            case Onelazy:
            case Notonelazy:
                sb.Append(Indent()).Append('\'').Append(RegexCharClass.CharDescription((char)codes[offset + 1])).Append('\'');
                break;

            case Set:
            case Setrep:
            case Setloop:
            case Setloopatomic:
            case Setlazy:
                sb.Append(Indent()).Append(RegexCharClass.SetDescription(strings[codes[offset + 1]]));
                break;

            case Multi:
                sb.Append(Indent()).Append('"').Append(strings[codes[offset + 1]]).Append('"');
                break;

            case Ref:
            case Testref:
                sb.Append(Indent()).Append("index = ").Append(codes[offset + 1]);
                break;

            case Capturemark:
                sb.Append(Indent()).Append("index = ").Append(codes[offset + 1]);
                if (codes[offset + 2] != -1)
                {
                    sb.Append(", unindex = ").Append(codes[offset + 2]);
                }
                break;

            case Nullcount:
            case Setcount:
                sb.Append(Indent()).Append("value = ").Append(codes[offset + 1]);
                break;

            case Goto:
            case Lazybranch:
            case Branchmark:
            case Lazybranchmark:
            case Branchcount:
            case Lazybranchcount:
                sb.Append(Indent()).Append("addr = ").Append(codes[offset + 1]);
                break;
            }

            switch (opcode)
            {
            case Onerep:
            case Notonerep:
            case Oneloop:
            case Oneloopatomic:
            case Notoneloop:
            case Notoneloopatomic:
            case Onelazy:
            case Notonelazy:
            case Setrep:
            case Setloop:
            case Setloopatomic:
            case Setlazy:
                sb.Append(", rep = ");
                if (codes[offset + 2] == int.MaxValue)
                {
                    sb.Append("inf");
                }
                else
                {
                    sb.Append(codes[offset + 2]);
                }
                break;

            case Branchcount:
            case Lazybranchcount:
                sb.Append(", limit = ");
                if (codes[offset + 2] == int.MaxValue)
                {
                    sb.Append("inf");
                }
                else
                {
                    sb.Append(codes[offset + 2]);
                }
                break;
            }

            string Indent() => new string(' ', Math.Max(1, 25 - sb.Length));

            return(sb.ToString());
        }
 /// <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.CharInSet"]/*' />
 protected static bool CharInSet(char ch, String set, String category)
 {
     return(RegexCharClass.CharInSet(ch, set, category));
 }
 private  RegexCharClass(bool negate, ArrayList ranges, StringBuilder categories, RegexCharClass subtraction) {
     _rangelist = ranges;
     _categories = categories;
     _canonical = true;
     _negate=negate;
     _subtractor = subtraction;
 }
Beispiel #34
0
        internal string DescribeInstruction(int opcodeOffset)
        {
            RegexOpcode opcode = (RegexOpcode)Codes[opcodeOffset];

            var sb = new StringBuilder();

            sb.Append($"{opcodeOffset:D6} ");
            sb.Append(OpcodeBacktracks(opcode & RegexOpcode.OperatorMask) ? '~' : ' ');
            sb.Append(opcode & RegexOpcode.OperatorMask);
            if ((opcode & RegexOpcode.CaseInsensitive) != 0)
            {
                sb.Append("-Ci");
            }
            if ((opcode & RegexOpcode.RightToLeft) != 0)
            {
                sb.Append("-Rtl");
            }
            if ((opcode & RegexOpcode.Backtracking) != 0)
            {
                sb.Append("-Back");
            }
            if ((opcode & RegexOpcode.BacktrackingSecond) != 0)
            {
                sb.Append("-Back2");
            }

            opcode &= RegexOpcode.OperatorMask;

            switch (opcode)
            {
            case RegexOpcode.One:
            case RegexOpcode.Onerep:
            case RegexOpcode.Oneloop:
            case RegexOpcode.Oneloopatomic:
            case RegexOpcode.Onelazy:
            case RegexOpcode.Notone:
            case RegexOpcode.Notonerep:
            case RegexOpcode.Notoneloop:
            case RegexOpcode.Notoneloopatomic:
            case RegexOpcode.Notonelazy:
                sb.Append(Indent()).Append('\'').Append(RegexCharClass.DescribeChar((char)Codes[opcodeOffset + 1])).Append('\'');
                break;

            case RegexOpcode.Set:
            case RegexOpcode.Setrep:
            case RegexOpcode.Setloop:
            case RegexOpcode.Setloopatomic:
            case RegexOpcode.Setlazy:
                sb.Append(Indent()).Append(RegexCharClass.DescribeSet(Strings[Codes[opcodeOffset + 1]]));
                break;

            case RegexOpcode.Multi:
                sb.Append(Indent()).Append('"').Append(Strings[Codes[opcodeOffset + 1]]).Append('"');
                break;

            case RegexOpcode.Backreference:
            case RegexOpcode.TestBackreference:
                sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]);
                break;

            case RegexOpcode.Capturemark:
                sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]);
                if (Codes[opcodeOffset + 2] != -1)
                {
                    sb.Append(", unindex = ").Append(Codes[opcodeOffset + 2]);
                }
                break;

            case RegexOpcode.Nullcount:
            case RegexOpcode.Setcount:
                sb.Append(Indent()).Append("value = ").Append(Codes[opcodeOffset + 1]);
                break;

            case RegexOpcode.Goto:
            case RegexOpcode.Lazybranch:
            case RegexOpcode.Branchmark:
            case RegexOpcode.Lazybranchmark:
            case RegexOpcode.Branchcount:
            case RegexOpcode.Lazybranchcount:
                sb.Append(Indent()).Append("addr = ").Append(Codes[opcodeOffset + 1]);
                break;
            }

            switch (opcode)
            {
            case RegexOpcode.Onerep:
            case RegexOpcode.Oneloop:
            case RegexOpcode.Oneloopatomic:
            case RegexOpcode.Onelazy:
            case RegexOpcode.Notonerep:
            case RegexOpcode.Notoneloop:
            case RegexOpcode.Notoneloopatomic:
            case RegexOpcode.Notonelazy:
            case RegexOpcode.Setrep:
            case RegexOpcode.Setloop:
            case RegexOpcode.Setloopatomic:
            case RegexOpcode.Setlazy:
                sb.Append(", rep = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]);
                break;

            case RegexOpcode.Branchcount:
            case RegexOpcode.Lazybranchcount:
                sb.Append(", limit = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]);
                break;
            }

            return(sb.ToString());
 internal void AddSubtraction(RegexCharClass sub) {
     Debug.Assert(_subtractor == null, "Can't add two subtractions to a char class. ");
     _subtractor = sub;
 }
        internal static RegexPrefix ScanChars(RegexTree tree)
        {
            RegexNode node2  = null;
            int       num    = 0;
            string    prefix = null;
            bool      ci     = false;
            RegexNode node   = tree._root;

Label_0010:
            switch (node._type)
            {
            case 3:
            case 6:
                if (node._n == 0x7fffffff)
                {
                    prefix = RegexCharClass.SetFromChar(node._ch);
                    ci     = RegexOptions.None != (node._options & RegexOptions.IgnoreCase);
                    break;
                }
                return(null);

            case 4:
            case 7:
                if (node._n == 0x7fffffff)
                {
                    prefix = RegexCharClass.SetInverseFromChar(node._ch);
                    ci     = RegexOptions.None != (node._options & RegexOptions.IgnoreCase);
                    break;
                }
                return(null);

            case 5:
            case 8:
                if ((node._n == 0x7fffffff) && ((node._str2 == null) || (node._str2.Length == 0)))
                {
                    prefix = node._str;
                    ci     = RegexOptions.None != (node._options & RegexOptions.IgnoreCase);
                    break;
                }
                return(null);

            case 14:
            case 15:
            case 0x10:
            case 0x12:
            case 0x13:
            case 20:
            case 0x15:
            case 0x17:
            case 30:
            case 0x1f:
            case 0x29:
                break;

            case 0x19:
                if (node.ChildCount() > 0)
                {
                    node2 = node;
                    num   = 0;
                }
                break;

            case 0x1c:
            case 0x20:
                node  = node.Child(0);
                node2 = null;
                goto Label_0010;

            default:
                return(null);
            }
            if (prefix != null)
            {
                return(new RegexPrefix(prefix, ci));
            }
            if ((node2 == null) || (num >= node2.ChildCount()))
            {
                return(null);
            }
            node = node2.Child(num++);
            goto Label_0010;
        }
 internal RegexFC(bool nullable)
 {
     this._cc = new RegexCharClass();
     this._nullable = nullable;
 }
Beispiel #38
0
 protected bool IsECMABoundary(int index, int startpos, int endpos)
 {
     return((index > startpos && RegexCharClass.IsECMAWordChar(_runtext[index - 1])) !=
            (index < endpos && RegexCharClass.IsECMAWordChar(_runtext[index])));
 }
        /*
         * ReduceAlternation:
         *
         * Basic optimization. Single-letter alternations can be replaced
         * by faster set specifications, and nested alternations with no
         * intervening operators can be flattened:
         *
         * a|b|c|def|g|h -> [a-c]|def|[gh]
         * apple|(?:orange|pear)|grape -> apple|orange|pear|grape
         *
         * <
*/

        internal RegexNode ReduceAlternation() {
            // Combine adjacent sets/chars

            bool wasLastSet;
            bool lastNodeCannotMerge;
            RegexOptions optionsLast;
            RegexOptions optionsAt;
            int i;
            int j;
            RegexNode at;
            RegexNode prev;

            if (_children == null)
                return new RegexNode(RegexNode.Nothing, _options);

            wasLastSet = false;
            lastNodeCannotMerge = false;
            optionsLast = 0;

            for (i = 0, j = 0; i < _children.Count; i++, j++) {
                at = _children[i];

                if (j < i)
                    _children[j] = at;

                for (;;) {
                    if (at._type == Alternate) {
                        for (int k = 0; k < at._children.Count; k++)
                            at._children[k]._next = this;

                        _children.InsertRange(i + 1, at._children);
                        j--;
                    }
                    else if (at._type == Set || at._type == One) {
                        // Cannot merge sets if L or I options differ, or if either are negated.
                        optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase);


                        if (at._type == Set) {
                            if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str)) {
                                wasLastSet = true;
                                lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str);
                                optionsLast = optionsAt;
                                break;
                            }
                        }
                        else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge) {
                            wasLastSet = true;
                            lastNodeCannotMerge = false;
                            optionsLast = optionsAt;
                            break;
                        }

                        
                        // The last node was a Set or a One, we're a Set or One and our options are the same.
                        // Merge the two nodes.
                        j--;
                        prev = _children[j];
                        
                        RegexCharClass prevCharClass;
                        if (prev._type == RegexNode.One) {
                            prevCharClass = new RegexCharClass();
                            prevCharClass.AddChar(prev._ch);
                        }
                        else {
                            prevCharClass = RegexCharClass.Parse(prev._str);
                        }
                        
                        if (at._type == RegexNode.One) {
                            prevCharClass.AddChar(at._ch);
                        }
                        else {
                            RegexCharClass atCharClass = RegexCharClass.Parse(at._str);
                            prevCharClass.AddCharClass(atCharClass);
                        }
                        
                        prev._type = RegexNode.Set;
                        prev._str  = prevCharClass.ToStringClass();
                        
                    }
                    else if (at._type == RegexNode.Nothing) {
                        j--;
                    }
                    else {
                        wasLastSet = false;
                        lastNodeCannotMerge = false;
                    }
                    break;
                }
            }

            if (j < i)
                _children.RemoveRange(j, i - j);

            return StripEnation(RegexNode.Nothing);
        }
Beispiel #40
0
        protected static bool CharInSet(char ch, String set, String category)
        {
            string charClass = RegexCharClass.ConvertOldStringsToClass(set, category);

            return(RegexCharClass.CharInClass(ch, charClass));
        }
 internal static RegexCharClass CreateFromCategory(string categoryName, bool invert, bool caseInsensitive, string pattern)
 {
     RegexCharClass class2 = new RegexCharClass();
     class2.AddCategoryFromName(categoryName, invert, caseInsensitive, pattern);
     return class2;
 }
Beispiel #42
0
 protected static bool CharInClass(char ch, String charClass)
 {
     return(RegexCharClass.CharInClass(ch, charClass));
 }
        internal RegexNode ScanBackslash()
        {
            char ch;
            if (this.CharsRight() == 0)
            {
                throw this.MakeException(SR.GetString("IllegalEndEscape"));
            }
            switch ((ch = this.RightChar()))
            {
                case 'S':
                    this.MoveRight();
                    if (this.UseOptionE())
                    {
                        return new RegexNode(11, this._options, "\x0001\x0004\0\t\x000e !");
                    }
                    return new RegexNode(11, this._options, RegexCharClass.NotSpaceClass);

                case 'W':
                    this.MoveRight();
                    if (this.UseOptionE())
                    {
                        return new RegexNode(11, this._options, "\x0001\n\00:A[_`a{İı");
                    }
                    return new RegexNode(11, this._options, RegexCharClass.NotWordClass);

                case 'Z':
                case 'A':
                case 'B':
                case 'G':
                case 'b':
                case 'z':
                    this.MoveRight();
                    return new RegexNode(this.TypeFromCode(ch), this._options);

                case 'D':
                    this.MoveRight();
                    if (!this.UseOptionE())
                    {
                        return new RegexNode(11, this._options, RegexCharClass.NotDigitClass);
                    }
                    return new RegexNode(11, this._options, "\x0001\x0002\00:");

                case 'P':
                case 'p':
                {
                    this.MoveRight();
                    RegexCharClass class2 = new RegexCharClass();
                    class2.AddCategoryFromName(this.ParseProperty(), ch != 'p', this.UseOptionI(), this._pattern);
                    if (this.UseOptionI())
                    {
                        class2.AddLowercase(this._culture);
                    }
                    return new RegexNode(11, this._options, class2.ToStringClass());
                }
                case 'd':
                    this.MoveRight();
                    if (!this.UseOptionE())
                    {
                        return new RegexNode(11, this._options, RegexCharClass.DigitClass);
                    }
                    return new RegexNode(11, this._options, "\0\x0002\00:");

                case 's':
                    this.MoveRight();
                    if (this.UseOptionE())
                    {
                        return new RegexNode(11, this._options, "\0\x0004\0\t\x000e !");
                    }
                    return new RegexNode(11, this._options, RegexCharClass.SpaceClass);

                case 'w':
                    this.MoveRight();
                    if (this.UseOptionE())
                    {
                        return new RegexNode(11, this._options, "\0\n\00:A[_`a{İı");
                    }
                    return new RegexNode(11, this._options, RegexCharClass.WordClass);
            }
            return this.ScanBasicBackslash();
        }
Beispiel #44
0
 protected bool IsBoundary(int index, int startpos, int endpos)
 {
     return(((index > startpos) && RegexCharClass.IsWordChar(this.runtext[index - 1])) != ((index < endpos) && RegexCharClass.IsWordChar(this.runtext[index])));
 }