コード例 #1
0
ファイル: CharClass.cs プロジェクト: zyj0021/Cyjb.Compilers
        /// <summary>
        /// 获取字符类中包含的所有字符。
        /// </summary>
        /// <param name="charClass">要获取所有字符的字符类。</param>
        /// <returns>字符类中包含的所有字符。</returns>
        private static CharSet GetCharClassSet(string charClass)
        {
            CharSet set = new CharSet();

            if (RegexCharClass.IsSubtraction(charClass) || RegexCharClass.ContainsCategory(charClass))
            {
                for (int i = 0; i <= char.MaxValue; i++)
                {
                    if (RegexCharClass.CharInClass((char)i, charClass))
                    {
                        set.Add((char)i);
                    }
                }
            }
            else
            {
                // 如果不包含差集和 Unicode 字符分类的话,可以更快。
                string ranges = RegexCharClass.GetCharClassRanges(charClass);
                if (RegexCharClass.IsNegated(charClass))
                {
                    int s = 0;
                    for (int i = 0; i < ranges.Length; i++)
                    {
                        for (int j = s; j < ranges[i]; j++)
                        {
                            set.Add((char)j);
                        }
                        i++;
                        s = i < ranges.Length ? ranges[i] : char.MaxValue + 1;
                    }
                    for (int j = s; j <= char.MaxValue; j++)
                    {
                        set.Add((char)j);
                    }
                }
                else
                {
                    for (int i = 0; i < ranges.Length; i++)
                    {
                        int j   = ranges[i++];
                        int end = i < ranges.Length ? ranges[i] : char.MaxValue + 1;
                        for (; j < end; j++)
                        {
                            set.Add((char)j);
                        }
                    }
                }
            }
            return(set);
        }
コード例 #2
0
        protected override void Go()
        {
            Goto(0);

            for (;;)
            {
#if DBG
                if (runmatch.Debug)
                {
                    DumpState();
                }
#endif

                CheckTimeout();

                switch (Operator())
                {
                case RegexCode.Stop:
                    return;

                case RegexCode.Nothing:
                    break;

                case RegexCode.Goto:
                    Goto(Operand(0));
                    continue;

                case RegexCode.Testref:
                    if (!IsMatched(Operand(0)))
                    {
                        break;
                    }
                    Advance(1);
                    continue;

                case RegexCode.Lazybranch:
                    TrackPush(Textpos());
                    Advance(1);
                    continue;

                case RegexCode.Lazybranch | RegexCode.Back:
                    TrackPop();
                    Textto(TrackPeek());
                    Goto(Operand(0));
                    continue;

                case RegexCode.Setmark:
                    StackPush(Textpos());
                    TrackPush();
                    Advance();
                    continue;

                case RegexCode.Nullmark:
                    StackPush(-1);
                    TrackPush();
                    Advance();
                    continue;

                case RegexCode.Setmark | RegexCode.Back:
                case RegexCode.Nullmark | RegexCode.Back:
                    StackPop();
                    break;

                case RegexCode.Getmark:
                    StackPop();
                    TrackPush(StackPeek());
                    Textto(StackPeek());
                    Advance();
                    continue;

                case RegexCode.Getmark | RegexCode.Back:
                    TrackPop();
                    StackPush(TrackPeek());
                    break;

                case RegexCode.Capturemark:
                    if (Operand(1) != -1 && !IsMatched(Operand(1)))
                    {
                        break;
                    }
                    StackPop();
                    if (Operand(1) != -1)
                    {
                        TransferCapture(Operand(0), Operand(1), StackPeek(), Textpos());
                    }
                    else
                    {
                        Capture(Operand(0), StackPeek(), Textpos());
                    }
                    TrackPush(StackPeek());

                    Advance(2);

                    continue;

                case RegexCode.Capturemark | RegexCode.Back:
                    TrackPop();
                    StackPush(TrackPeek());
                    Uncapture();
                    if (Operand(0) != -1 && Operand(1) != -1)
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Branchmark:
                {
                    int matched;
                    StackPop();

                    matched = Textpos() - StackPeek();

                    if (matched != 0)                               // Nonempty match -> loop now
                    {
                        TrackPush(StackPeek(), Textpos());          // Save old mark, textpos
                        StackPush(Textpos());                       // Make new mark
                        Goto(Operand(0));                           // Loop
                    }
                    else                                            // Empty match -> straight now
                    {
                        TrackPush2(StackPeek());                    // Save old mark
                        Advance(1);                                 // Straight
                    }
                    continue;
                }

                case RegexCode.Branchmark | RegexCode.Back:
                    TrackPop(2);
                    StackPop();
                    Textto(TrackPeek(1));                           // Recall position
                    TrackPush2(TrackPeek());                        // Save old mark
                    Advance(1);                                     // Straight
                    continue;

                case RegexCode.Branchmark | RegexCode.Back2:
                    TrackPop();
                    StackPush(TrackPeek());                         // Recall old mark
                    break;                                          // Backtrack

                case RegexCode.Lazybranchmark:
                {
                    // We hit this the first time through a lazy loop and after each
                    // successful match of the inner expression.  It simply continues
                    // on and doesn't loop.
                    StackPop();

                    int oldMarkPos = StackPeek();

                    if (Textpos() != oldMarkPos)                        // Nonempty match -> try to loop again by going to 'back' state
                    {
                        if (oldMarkPos != -1)
                        {
                            TrackPush(oldMarkPos, Textpos());           // Save old mark, textpos
                        }
                        else
                        {
                            TrackPush(Textpos(), Textpos());
                        }
                    }
                    else
                    {
                        // The inner expression found an empty match, so we'll go directly to 'back2' if we
                        // backtrack.  In this case, we need to push something on the stack, since back2 pops.
                        // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
                        // position associated with that empty match.
                        StackPush(oldMarkPos);

                        TrackPush2(StackPeek());                        // Save old mark
                    }
                    Advance(1);
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back:
                {
                    // After the first time, Lazybranchmark | RegexCode.Back occurs
                    // with each iteration of the loop, and therefore with every attempted
                    // match of the inner expression.  We'll try to match the inner expression,
                    // then go back to Lazybranchmark if successful.  If the inner expression
                    // failes, we go to Lazybranchmark | RegexCode.Back2
                    int pos;

                    TrackPop(2);
                    pos = TrackPeek(1);
                    TrackPush2(TrackPeek());                        // Save old mark
                    StackPush(pos);                                 // Make new mark
                    Textto(pos);                                    // Recall position
                    Goto(Operand(0));                               // Loop
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back2:
                    // The lazy loop has failed.  We'll do a true backtrack and
                    // start over before the lazy loop.
                    StackPop();
                    TrackPop();
                    StackPush(TrackPeek());                          // Recall old mark
                    break;

                case RegexCode.Setcount:
                    StackPush(Textpos(), Operand(0));
                    TrackPush();
                    Advance(1);
                    continue;

                case RegexCode.Nullcount:
                    StackPush(-1, Operand(0));
                    TrackPush();
                    Advance(1);
                    continue;

                case RegexCode.Setcount | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Nullcount | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Branchcount:
                    // StackPush:
                    //  0: Mark
                    //  1: Count
                {
                    StackPop(2);
                    int mark    = StackPeek();
                    int count   = StackPeek(1);
                    int matched = Textpos() - mark;

                    if (count >= Operand(1) || (matched == 0 && count >= 0)) // Max loops or empty match -> straight now
                    {
                        TrackPush2(mark, count);                             // Save old mark, count
                        Advance(2);                                          // Straight
                    }
                    else                                                     // Nonempty match -> count+loop now
                    {
                        TrackPush(mark);                                     // remember mark
                        StackPush(Textpos(), count + 1);                     // Make new mark, incr count
                        Goto(Operand(0));                                    // Loop
                    }
                    continue;
                }

                case RegexCode.Branchcount | RegexCode.Back:
                    // TrackPush:
                    //  0: Previous mark
                    // StackPush:
                    //  0: Mark (= current pos, discarded)
                    //  1: Count
                    TrackPop();
                    StackPop(2);
                    if (StackPeek(1) > 0)                               // Positive -> can go straight
                    {
                        Textto(StackPeek());                            // Zap to mark
                        TrackPush2(TrackPeek(), StackPeek(1) - 1);      // Save old mark, old count
                        Advance(2);                                     // Straight
                        continue;
                    }
                    StackPush(TrackPeek(), StackPeek(1) - 1);           // recall old mark, old count
                    break;

                case RegexCode.Branchcount | RegexCode.Back2:
                    // TrackPush:
                    //  0: Previous mark
                    //  1: Previous count
                    TrackPop(2);
                    StackPush(TrackPeek(), TrackPeek(1));               // Recall old mark, old count
                    break;                                              // Backtrack


                case RegexCode.Lazybranchcount:
                    // StackPush:
                    //  0: Mark
                    //  1: Count
                {
                    StackPop(2);
                    int mark  = StackPeek();
                    int count = StackPeek(1);

                    if (count < 0)                                  // Negative count -> loop now
                    {
                        TrackPush2(mark);                           // Save old mark
                        StackPush(Textpos(), count + 1);            // Make new mark, incr count
                        Goto(Operand(0));                           // Loop
                    }
                    else                                            // Nonneg count -> straight now
                    {
                        TrackPush(mark, count, Textpos());          // Save mark, count, position
                        Advance(2);                                 // Straight
                    }
                    continue;
                }

                case RegexCode.Lazybranchcount | RegexCode.Back:
                    // TrackPush:
                    //  0: Mark
                    //  1: Count
                    //  2: Textpos
                {
                    TrackPop(3);
                    int mark    = TrackPeek();
                    int textpos = TrackPeek(2);

                    if (TrackPeek(1) < Operand(1) && textpos != mark)       // Under limit and not empty match -> loop
                    {
                        Textto(textpos);                                    // Recall position
                        StackPush(textpos, TrackPeek(1) + 1);               // Make new mark, incr count
                        TrackPush2(mark);                                   // Save old mark
                        Goto(Operand(0));                                   // Loop
                        continue;
                    }
                    else                                                    // Max loops or empty match -> backtrack
                    {
                        StackPush(TrackPeek(), TrackPeek(1));               // Recall old mark, count
                        break;                                              // backtrack
                    }
                }

                case RegexCode.Lazybranchcount | RegexCode.Back2:
                    // TrackPush:
                    //  0: Previous mark
                    // StackPush:
                    //  0: Mark (== current pos, discarded)
                    //  1: Count
                    TrackPop();
                    StackPop(2);
                    StackPush(TrackPeek(), StackPeek(1) - 1);       // Recall old mark, count
                    break;                                          // Backtrack

                case RegexCode.Setjump:
                    StackPush(Trackpos(), Crawlpos());
                    TrackPush();
                    Advance();
                    continue;

                case RegexCode.Setjump | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Backjump:
                    // StackPush:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    StackPop(2);
                    Trackto(StackPeek());

                    while (Crawlpos() != StackPeek(1))
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Forejump:
                    // StackPush:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    StackPop(2);
                    Trackto(StackPeek());
                    TrackPush(StackPeek(1));
                    Advance();
                    continue;

                case RegexCode.Forejump | RegexCode.Back:
                    // TrackPush:
                    //  0: Crawlpos
                    TrackPop();

                    while (Crawlpos() != TrackPeek())
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Bol:
                    if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n')
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Eol:
                    if (Rightchars() > 0 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Boundary:
                    if (!IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Nonboundary:
                    if (IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.ECMABoundary:
                    if (!IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.NonECMABoundary:
                    if (IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Beginning:
                    if (Leftchars() > 0)
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.Start:
                    if (Textpos() != Textstart())
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.EndZ:
                    if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.End:
                    if (Rightchars() > 0)
                    {
                        break;
                    }
                    Advance();
                    continue;

                case RegexCode.One:
                    if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    Advance(1);
                    continue;

                case RegexCode.Notone:
                    if (Forwardchars() < 1 || Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    Advance(1);
                    continue;

                case RegexCode.Set:
                    if (Forwardchars() < 1 || !RegexCharClass.CharInClass(Forwardcharnext(), runstrings[Operand(0)]))
                    {
                        break;
                    }

                    Advance(1);
                    continue;

                case RegexCode.Multi:
                {
                    if (!Stringmatch(runstrings[Operand(0)]))
                    {
                        break;
                    }

                    Advance(1);
                    continue;
                }

                case RegexCode.Ref:
                {
                    int capnum = Operand(0);

                    if (IsMatched(capnum))
                    {
                        if (!Refmatch(MatchIndex(capnum), MatchLength(capnum)))
                        {
                            break;
                        }
                    }
                    else
                    {
                        if ((runregex.roptions & RegexOptions.ECMAScript) == 0)
                        {
                            break;
                        }
                    }

                    Advance(1);
                    continue;
                }

                case RegexCode.Onerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Notonerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setrep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    String set = runstrings[Operand(0)];

                    while (c-- > 0)
                    {
                        if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
                        {
                            goto BreakBackward;
                        }
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Oneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Notoneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    String set = runstrings[Operand(0)];
                    int    i;

                    for (i = c; i > 0; i--)
                    {
                        if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Oneloop | RegexCode.Back:
                case RegexCode.Notoneloop | RegexCode.Back:
                {
                    TrackPop(2);
                    int i   = TrackPeek();
                    int pos = TrackPeek(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setloop | RegexCode.Back:
                {
                    TrackPop(2);
                    int i   = TrackPeek();
                    int pos = TrackPeek(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos - Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Onelazy:
                case RegexCode.Notonelazy:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        TrackPush(c - 1, Textpos());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setlazy:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        TrackPush(c - 1, Textpos());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Onelazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Notonelazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    Advance(2);
                    continue;
                }

                case RegexCode.Setlazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (!RegexCharClass.CharInClass(Forwardcharnext(), runstrings[Operand(0)]))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    Advance(2);
                    continue;
                }

                default:
                    throw new NotImplementedException();
                }

BreakBackward:
                ;

                // "break Backward" comes here:
                Backtrack();
            }
        }
コード例 #3
0
        // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        // !!!! This function must be kept synchronized with GenerateFindFirstChar !!!!
        // !!!! in RegexCompiler.cs                                                !!!!
        // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        protected override bool FindFirstChar()
        {
            int    i;
            String set;

            if (0 != (runanchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)))
            {
                if (!runcode._rightToLeft)
                {
                    if ((0 != (runanchors & RegexFCD.Beginning) && runtextpos > runtextbeg) ||
                        (0 != (runanchors & RegexFCD.Start) && runtextpos > runtextstart))
                    {
                        runtextpos = runtextend;
                        return(false);
                    }
                    if (0 != (runanchors & RegexFCD.EndZ) && runtextpos < runtextend - 1)
                    {
                        runtextpos = runtextend - 1;
                    }
                    else if (0 != (runanchors & RegexFCD.End) && runtextpos < runtextend)
                    {
                        runtextpos = runtextend;
                    }
                }
                else
                {
                    if ((0 != (runanchors & RegexFCD.End) && runtextpos < runtextend) ||
                        (0 != (runanchors & RegexFCD.EndZ) && (runtextpos < runtextend - 1 ||
                                                               (runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'))) ||
                        (0 != (runanchors & RegexFCD.Start) && runtextpos < runtextstart))
                    {
                        runtextpos = runtextbeg;
                        return(false);
                    }
                    if (0 != (runanchors & RegexFCD.Beginning) && runtextpos > runtextbeg)
                    {
                        runtextpos = runtextbeg;
                    }
                }

                if (runbmPrefix != null)
                {
                    return(runbmPrefix.IsMatch(runtext, runtextpos, runtextbeg, runtextend));
                }

                return(true); // found a valid start or end anchor
            }
            else if (runbmPrefix != null)
            {
                runtextpos = runbmPrefix.Scan(runtext, runtextpos, runtextbeg, runtextend);

                if (runtextpos == -1)
                {
                    runtextpos = (runcode._rightToLeft ? runtextbeg : runtextend);
                    return(false);
                }

                return(true);
            }
            else if (runfcPrefix == null)
            {
                return(true);
            }

            runrtl = runcode._rightToLeft;
            runci  = runfcPrefix.CaseInsensitive;
            set    = runfcPrefix.Prefix;

            if (RegexCharClass.IsSingleton(set))
            {
                char ch = RegexCharClass.SingletonChar(set);

                for (i = Forwardchars(); i > 0; i--)
                {
                    if (ch == Forwardcharnext())
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }
            else
            {
                for (i = Forwardchars(); i > 0; i--)
                {
                    if (RegexCharClass.CharInClass(Forwardcharnext(), set))
                    {
                        Backwardnext();
                        return(true);
                    }
                }
            }
            return(false);
        }
コード例 #4
0
ファイル: RegexRunner.cs プロジェクト: zcf7822/monodevelop
 protected static bool CharInClass(char ch, String charClass)
 {
     return(RegexCharClass.CharInClass(ch, charClass));
 }
コード例 #5
0
ファイル: RegexRunner.cs プロジェクト: zcf7822/monodevelop
        protected static bool CharInSet(char ch, String set, String category)
        {
            string charClass = RegexCharClass.ConvertOldStringsToClass(set, category);

            return(RegexCharClass.CharInClass(ch, charClass));
        }
コード例 #6
0
        protected override void Go()
        {
            Goto(0);

            int advance = -1;

            while (true)
            {
                if (advance >= 0)
                {
                    // https://github.com/dotnet/coreclr/pull/14850#issuecomment-342256447
                    // Single common Advance call to reduce method size; and single method inline point
                    Advance(advance);
                    advance = -1;
                }
#if DEBUG
                if (runmatch.Debug)
                {
                    DumpState();
                }
#endif

                //CheckTimeout();

                switch (Operator())
                {
                case RegexCode.Stop:
                    return;

                case RegexCode.Nothing:
                    break;

                case RegexCode.Goto:
                    Goto(Operand(0));
                    continue;

                case RegexCode.Testref:
                    if (!IsMatched(Operand(0)))
                    {
                        break;
                    }
                    advance = 1;
                    continue;

                case RegexCode.Lazybranch:
                    TrackPush(Textpos());
                    advance = 1;
                    continue;

                case RegexCode.Lazybranch | RegexCode.Back:
                    TrackPop();
                    Textto(TrackPeek());
                    Goto(Operand(0));
                    continue;

                case RegexCode.Setmark:
                    StackPush(Textpos());
                    TrackPush();
                    advance = 0;
                    continue;

                case RegexCode.Nullmark:
                    StackPush(-1);
                    TrackPush();
                    advance = 0;
                    continue;

                case RegexCode.Setmark | RegexCode.Back:
                case RegexCode.Nullmark | RegexCode.Back:
                    StackPop();
                    break;

                case RegexCode.Getmark:
                    StackPop();
                    TrackPush(StackPeek());
                    Textto(StackPeek());
                    advance = 0;
                    continue;

                case RegexCode.Getmark | RegexCode.Back:
                    TrackPop();
                    StackPush(TrackPeek());
                    break;

                case RegexCode.Capturemark:
                    if (Operand(1) != -1 && !IsMatched(Operand(1)))
                    {
                        break;
                    }
                    StackPop();
                    if (Operand(1) != -1)
                    {
                        TransferCapture(Operand(0), Operand(1), StackPeek(), Textpos());
                    }
                    else if (_callStack != null && _callStack.captureNum == Operand(0))
                    {
                        // Successful return from a subroutine
                        int returnPos = _callStack.returnPos;
                        PopCallFrame();
                        Goto(returnPos);
                        continue;
                    }
                    else
                    {
                        Capture(Operand(0), StackPeek(), Textpos());
                    }
                    TrackPush(StackPeek());

                    advance = 2;

                    continue;

                case RegexCode.Capturemark | RegexCode.Back:
                    TrackPop();
                    StackPush(TrackPeek());
                    Uncapture();
                    if (Operand(0) != -1 && Operand(1) != -1)
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Branchmark:
                {
                    int matched;
                    StackPop();

                    matched = Textpos() - StackPeek();

                    if (matched != 0)
                    {                                      // Nonempty match -> loop now
                        TrackPush(StackPeek(), Textpos()); // Save old mark, textpos
                        StackPush(Textpos());              // Make new mark
                        Goto(Operand(0));                  // Loop
                    }
                    else
                    {                                          // Empty match -> straight now
                        TrackPush2(StackPeek());               // Save old mark
                        advance = 1;                           // Straight
                    }
                    continue;
                }

                case RegexCode.Branchmark | RegexCode.Back:
                    TrackPop(2);
                    StackPop();
                    Textto(TrackPeek(1));                           // Recall position
                    TrackPush2(TrackPeek());                        // Save old mark
                    advance = 1;                                    // Straight
                    continue;

                case RegexCode.Branchmark | RegexCode.Back2:
                    TrackPop();
                    StackPush(TrackPeek());                         // Recall old mark
                    break;                                          // Backtrack

                case RegexCode.Lazybranchmark:
                {
                    // We hit this the first time through a lazy loop and after each
                    // successful match of the inner expression.  It simply continues
                    // on and doesn't loop.
                    StackPop();

                    int oldMarkPos = StackPeek();

                    if (Textpos() != oldMarkPos)
                    {                      // Nonempty match -> try to loop again by going to 'back' state
                        if (oldMarkPos != -1)
                        {
                            TrackPush(oldMarkPos, Textpos());           // Save old mark, textpos
                        }
                        else
                        {
                            TrackPush(Textpos(), Textpos());
                        }
                    }
                    else
                    {
                        // The inner expression found an empty match, so we'll go directly to 'back2' if we
                        // backtrack.  In this case, we need to push something on the stack, since back2 pops.
                        // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
                        // position associated with that empty match.
                        StackPush(oldMarkPos);

                        TrackPush2(StackPeek());                        // Save old mark
                    }
                    advance = 1;
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back:
                {
                    // After the first time, Lazybranchmark | RegexCode.Back occurs
                    // with each iteration of the loop, and therefore with every attempted
                    // match of the inner expression.  We'll try to match the inner expression,
                    // then go back to Lazybranchmark if successful.  If the inner expression
                    // fails, we go to Lazybranchmark | RegexCode.Back2
                    int pos;

                    TrackPop(2);
                    pos = TrackPeek(1);
                    TrackPush2(TrackPeek());                        // Save old mark
                    StackPush(pos);                                 // Make new mark
                    Textto(pos);                                    // Recall position
                    Goto(Operand(0));                               // Loop
                    continue;
                }

                case RegexCode.Lazybranchmark | RegexCode.Back2:
                    // The lazy loop has failed.  We'll do a true backtrack and
                    // start over before the lazy loop.
                    StackPop();
                    TrackPop();
                    StackPush(TrackPeek());                          // Recall old mark
                    break;

                case RegexCode.Setcount:
                    StackPush(Textpos(), Operand(0));
                    TrackPush();
                    advance = 1;
                    continue;

                case RegexCode.Nullcount:
                    StackPush(-1, Operand(0));
                    TrackPush();
                    advance = 1;
                    continue;

                case RegexCode.Setcount | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Nullcount | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Branchcount:
                    // StackPush:
                    //  0: Mark
                    //  1: Count
                {
                    StackPop(2);
                    int mark    = StackPeek();
                    int count   = StackPeek(1);
                    int matched = Textpos() - mark;

                    if (count >= Operand(1) || (matched == 0 && count >= 0))
                    {                                           // Max loops or empty match -> straight now
                        TrackPush2(mark, count);                // Save old mark, count
                        advance = 2;                            // Straight
                    }
                    else
                    {                                          // Nonempty match -> count+loop now
                        TrackPush(mark);                       // remember mark
                        StackPush(Textpos(), count + 1);       // Make new mark, incr count
                        Goto(Operand(0));                      // Loop
                    }
                    continue;
                }

                case RegexCode.Branchcount | RegexCode.Back:
                    // TrackPush:
                    //  0: Previous mark
                    // StackPush:
                    //  0: Mark (= current pos, discarded)
                    //  1: Count
                    TrackPop();
                    StackPop(2);
                    if (StackPeek(1) > 0)
                    {                                              // Positive -> can go straight
                        Textto(StackPeek());                       // Zap to mark
                        TrackPush2(TrackPeek(), StackPeek(1) - 1); // Save old mark, old count
                        advance = 2;                               // Straight
                        continue;
                    }
                    StackPush(TrackPeek(), StackPeek(1) - 1);           // recall old mark, old count
                    break;

                case RegexCode.Branchcount | RegexCode.Back2:
                    // TrackPush:
                    //  0: Previous mark
                    //  1: Previous count
                    TrackPop(2);
                    StackPush(TrackPeek(), TrackPeek(1));               // Recall old mark, old count
                    break;                                              // Backtrack


                case RegexCode.Lazybranchcount:
                    // StackPush:
                    //  0: Mark
                    //  1: Count
                {
                    StackPop(2);
                    int mark  = StackPeek();
                    int count = StackPeek(1);

                    if (count < 0)
                    {                                    // Negative count -> loop now
                        TrackPush2(mark);                // Save old mark
                        StackPush(Textpos(), count + 1); // Make new mark, incr count
                        Goto(Operand(0));                // Loop
                    }
                    else
                    {                                          // Nonneg count -> straight now
                        TrackPush(mark, count, Textpos());     // Save mark, count, position
                        advance = 2;                           // Straight
                    }
                    continue;
                }

                case RegexCode.Lazybranchcount | RegexCode.Back:
                    // TrackPush:
                    //  0: Mark
                    //  1: Count
                    //  2: Textpos
                {
                    TrackPop(3);
                    int mark    = TrackPeek();
                    int textpos = TrackPeek(2);

                    if (TrackPeek(1) < Operand(1) && textpos != mark)
                    {                                         // Under limit and not empty match -> loop
                        Textto(textpos);                      // Recall position
                        StackPush(textpos, TrackPeek(1) + 1); // Make new mark, incr count
                        TrackPush2(mark);                     // Save old mark
                        Goto(Operand(0));                     // Loop
                        continue;
                    }
                    else
                    {                                                  // Max loops or empty match -> backtrack
                        StackPush(TrackPeek(), TrackPeek(1));          // Recall old mark, count
                        break;                                         // backtrack
                    }
                }

                case RegexCode.Lazybranchcount | RegexCode.Back2:
                    // TrackPush:
                    //  0: Previous mark
                    // StackPush:
                    //  0: Mark (== current pos, discarded)
                    //  1: Count
                    TrackPop();
                    StackPop(2);
                    StackPush(TrackPeek(), StackPeek(1) - 1);       // Recall old mark, count
                    break;                                          // Backtrack

                case RegexCode.Setjump:
                    StackPush(Trackpos(), Crawlpos());
                    TrackPush();
                    advance = 0;
                    continue;

                case RegexCode.Setjump | RegexCode.Back:
                    StackPop(2);
                    break;

                case RegexCode.Backjump:
                    // StackPush:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    StackPop(2);
                    Trackto(StackPeek());

                    while (Crawlpos() != StackPeek(1))
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Forejump:
                    // StackPush:
                    //  0: Saved trackpos
                    //  1: Crawlpos
                    StackPop(2);
                    Trackto(StackPeek());
                    TrackPush(StackPeek(1));
                    advance = 0;
                    continue;

                case RegexCode.Forejump | RegexCode.Back:
                    // TrackPush:
                    //  0: Crawlpos
                    TrackPop();

                    while (Crawlpos() != TrackPeek())
                    {
                        Uncapture();
                    }

                    break;

                case RegexCode.Bol:
                    if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n')
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Eol:
                    if (Rightchars() > 0 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Boundary:
                    if (!IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Nonboundary:
                    if (IsBoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.ECMABoundary:
                    if (!IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.NonECMABoundary:
                    if (IsECMABoundary(Textpos(), runtextbeg, runtextend))
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Beginning:
                    if (Leftchars() > 0)
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.Start:
                    if (Textpos() != Textstart())
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.EndZ:
                    if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n')
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.End:
                    if (Rightchars() > 0)
                    {
                        break;
                    }
                    advance = 0;
                    continue;

                case RegexCode.One:
                    if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    advance = 1;
                    continue;

                case RegexCode.Notone:
                    if (Forwardchars() < 1 || Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    advance = 1;
                    continue;

                case RegexCode.Set:
                    if (Forwardchars() < 1 || !RegexCharClass.CharInClass(Forwardcharnext(), _code._strings[Operand(0)]))
                    {
                        break;
                    }

                    advance = 1;
                    continue;

                case RegexCode.Multi:
                {
                    if (!Stringmatch(_code._strings[Operand(0)]))
                    {
                        break;
                    }

                    advance = 1;
                    continue;
                }

                case RegexCode.Ref:
                {
                    int capnum = Operand(0);

                    if (IsMatched(capnum))
                    {
                        if (!Refmatch(MatchIndex(capnum), MatchLength(capnum)))
                        {
                            break;
                        }
                    }
                    else
                    {
                        if ((runregex.roptions & RegexOptions.ECMAScript) == 0)
                        {
                            break;
                        }
                    }

                    advance = 1;
                    continue;
                }

                case RegexCode.Onerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Notonerep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    char ch = (char)Operand(0);

                    while (c-- > 0)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            goto BreakBackward;
                        }
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setrep:
                {
                    int c = Operand(1);

                    if (Forwardchars() < c)
                    {
                        break;
                    }

                    string set = _code._strings[Operand(0)];

                    while (c-- > 0)
                    {
                        if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
                        {
                            goto BreakBackward;
                        }
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Oneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() != ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Notoneloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    char ch = (char)Operand(0);
                    int  i;

                    for (i = c; i > 0; i--)
                    {
                        if (Forwardcharnext() == ch)
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setloop:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    string set = _code._strings[Operand(0)];
                    int    i;

                    for (i = c; i > 0; i--)
                    {
                        if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
                        {
                            Backwardnext();
                            break;
                        }
                    }

                    if (c > i)
                    {
                        TrackPush(c - i - 1, Textpos() - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Oneloop | RegexCode.Back:
                case RegexCode.Notoneloop | RegexCode.Back:
                {
                    TrackPop(2);
                    int i   = TrackPeek();
                    int pos = TrackPeek(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setloop | RegexCode.Back:
                {
                    TrackPop(2);
                    int i   = TrackPeek();
                    int pos = TrackPeek(1);

                    Textto(pos);

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos - Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Onelazy:
                case RegexCode.Notonelazy:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        TrackPush(c - 1, Textpos());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setlazy:
                {
                    int c = Operand(1);

                    if (c > Forwardchars())
                    {
                        c = Forwardchars();
                    }

                    if (c > 0)
                    {
                        TrackPush(c - 1, Textpos());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Onelazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (Forwardcharnext() != (char)Operand(0))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Notonelazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (Forwardcharnext() == (char)Operand(0))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.Setlazy | RegexCode.Back:
                {
                    TrackPop(2);
                    int pos = TrackPeek(1);
                    Textto(pos);

                    if (!RegexCharClass.CharInClass(Forwardcharnext(), _code._strings[Operand(0)]))
                    {
                        break;
                    }

                    int i = TrackPeek();

                    if (i > 0)
                    {
                        TrackPush(i - 1, pos + Bump());
                    }

                    advance = 2;
                    continue;
                }

                case RegexCode.ResetMatchStart:
                    TrackPush(MatchStart());            // Enable backtracking, saving the current match start
                    SetMatchStart(Textpos());           // Set the match start to the current position in text
                    advance = 0;
                    continue;

                case RegexCode.ResetMatchStart | RegexCode.Back:
                    TrackPop();
                    SetMatchStart(TrackPeek());         // Restore the previously saved value as the match start
                    break;                              // Continue backtracking

                case RegexCode.CallSubroutine:
                {
                    int captureNum = Operand(0);
                    PushCallFrame(captureNum, returnPos: _codepos + 2);
                    TrackPush();                                                // This TrackPush will be forgotten after it returns or backtracks
                    Goto(_code._capPositions[captureNum]);
                    continue;
                }

                case RegexCode.CallSubroutine | RegexCode.Back:
                    PopCallFrame();                                     // It must have been from the called subroutine
                    break;

                default:
                    throw new NotImplementedException(SR.UnimplementedState);
                }

BreakBackward:
                ;

                // "break Backward" comes here:
                Backtrack();
            }
        }
コード例 #7
0
 protected static bool CharInClass(char ch, String charClass, RegexRunner runner)
 {
     return(RegexCharClass.CharInClass(ch, charClass, runner.charInClassOptimize));
 }
コード例 #8
0
        protected static bool CharInSet(char ch, String set, String category, RegexRunner runner)
        {
            string charClass = RegexCharClass.ConvertOldStringsToClass(set, category);

            return(RegexCharClass.CharInClass(ch, charClass, runner.charInClassOptimize));
        }