/// <summary> /// 获取字符类中包含的所有字符。 /// </summary> /// <param name="charClass">要获取所有字符的字符类。</param> /// <returns>字符类中包含的所有字符。</returns> private static CharSet GetCharClassSet(string charClass) { CharSet set = new CharSet(); if (RegexCharClass.IsSubtraction(charClass) || RegexCharClass.ContainsCategory(charClass)) { for (int i = 0; i <= char.MaxValue; i++) { if (RegexCharClass.CharInClass((char)i, charClass)) { set.Add((char)i); } } } else { // 如果不包含差集和 Unicode 字符分类的话,可以更快。 string ranges = RegexCharClass.GetCharClassRanges(charClass); if (RegexCharClass.IsNegated(charClass)) { int s = 0; for (int i = 0; i < ranges.Length; i++) { for (int j = s; j < ranges[i]; j++) { set.Add((char)j); } i++; s = i < ranges.Length ? ranges[i] : char.MaxValue + 1; } for (int j = s; j <= char.MaxValue; j++) { set.Add((char)j); } } else { for (int i = 0; i < ranges.Length; i++) { int j = ranges[i++]; int end = i < ranges.Length ? ranges[i] : char.MaxValue + 1; for (; j < end; j++) { set.Add((char)j); } } } } return(set); }
protected override void Go() { Goto(0); for (;;) { #if DBG if (runmatch.Debug) { DumpState(); } #endif CheckTimeout(); switch (Operator()) { case RegexCode.Stop: return; case RegexCode.Nothing: break; case RegexCode.Goto: Goto(Operand(0)); continue; case RegexCode.Testref: if (!IsMatched(Operand(0))) { break; } Advance(1); continue; case RegexCode.Lazybranch: TrackPush(Textpos()); Advance(1); continue; case RegexCode.Lazybranch | RegexCode.Back: TrackPop(); Textto(TrackPeek()); Goto(Operand(0)); continue; case RegexCode.Setmark: StackPush(Textpos()); TrackPush(); Advance(); continue; case RegexCode.Nullmark: StackPush(-1); TrackPush(); Advance(); continue; case RegexCode.Setmark | RegexCode.Back: case RegexCode.Nullmark | RegexCode.Back: StackPop(); break; case RegexCode.Getmark: StackPop(); TrackPush(StackPeek()); Textto(StackPeek()); Advance(); continue; case RegexCode.Getmark | RegexCode.Back: TrackPop(); StackPush(TrackPeek()); break; case RegexCode.Capturemark: if (Operand(1) != -1 && !IsMatched(Operand(1))) { break; } StackPop(); if (Operand(1) != -1) { TransferCapture(Operand(0), Operand(1), StackPeek(), Textpos()); } else { Capture(Operand(0), StackPeek(), Textpos()); } TrackPush(StackPeek()); Advance(2); continue; case RegexCode.Capturemark | RegexCode.Back: TrackPop(); StackPush(TrackPeek()); Uncapture(); if (Operand(0) != -1 && Operand(1) != -1) { Uncapture(); } break; case RegexCode.Branchmark: { int matched; StackPop(); matched = Textpos() - StackPeek(); if (matched != 0) // Nonempty match -> loop now { TrackPush(StackPeek(), Textpos()); // Save old mark, textpos StackPush(Textpos()); // Make new mark Goto(Operand(0)); // Loop } else // Empty match -> straight now { TrackPush2(StackPeek()); // Save old mark Advance(1); // Straight } continue; } case RegexCode.Branchmark | RegexCode.Back: TrackPop(2); StackPop(); Textto(TrackPeek(1)); // Recall position TrackPush2(TrackPeek()); // Save old mark Advance(1); // Straight continue; case RegexCode.Branchmark | RegexCode.Back2: TrackPop(); StackPush(TrackPeek()); // Recall old mark break; // Backtrack case RegexCode.Lazybranchmark: { // We hit this the first time through a lazy loop and after each // successful match of the inner expression. It simply continues // on and doesn't loop. StackPop(); int oldMarkPos = StackPeek(); if (Textpos() != oldMarkPos) // Nonempty match -> try to loop again by going to 'back' state { if (oldMarkPos != -1) { TrackPush(oldMarkPos, Textpos()); // Save old mark, textpos } else { TrackPush(Textpos(), Textpos()); } } else { // The inner expression found an empty match, so we'll go directly to 'back2' if we // backtrack. In this case, we need to push something on the stack, since back2 pops. // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text // position associated with that empty match. StackPush(oldMarkPos); TrackPush2(StackPeek()); // Save old mark } Advance(1); continue; } case RegexCode.Lazybranchmark | RegexCode.Back: { // After the first time, Lazybranchmark | RegexCode.Back occurs // with each iteration of the loop, and therefore with every attempted // match of the inner expression. We'll try to match the inner expression, // then go back to Lazybranchmark if successful. If the inner expression // failes, we go to Lazybranchmark | RegexCode.Back2 int pos; TrackPop(2); pos = TrackPeek(1); TrackPush2(TrackPeek()); // Save old mark StackPush(pos); // Make new mark Textto(pos); // Recall position Goto(Operand(0)); // Loop continue; } case RegexCode.Lazybranchmark | RegexCode.Back2: // The lazy loop has failed. We'll do a true backtrack and // start over before the lazy loop. StackPop(); TrackPop(); StackPush(TrackPeek()); // Recall old mark break; case RegexCode.Setcount: StackPush(Textpos(), Operand(0)); TrackPush(); Advance(1); continue; case RegexCode.Nullcount: StackPush(-1, Operand(0)); TrackPush(); Advance(1); continue; case RegexCode.Setcount | RegexCode.Back: StackPop(2); break; case RegexCode.Nullcount | RegexCode.Back: StackPop(2); break; case RegexCode.Branchcount: // StackPush: // 0: Mark // 1: Count { StackPop(2); int mark = StackPeek(); int count = StackPeek(1); int matched = Textpos() - mark; if (count >= Operand(1) || (matched == 0 && count >= 0)) // Max loops or empty match -> straight now { TrackPush2(mark, count); // Save old mark, count Advance(2); // Straight } else // Nonempty match -> count+loop now { TrackPush(mark); // remember mark StackPush(Textpos(), count + 1); // Make new mark, incr count Goto(Operand(0)); // Loop } continue; } case RegexCode.Branchcount | RegexCode.Back: // TrackPush: // 0: Previous mark // StackPush: // 0: Mark (= current pos, discarded) // 1: Count TrackPop(); StackPop(2); if (StackPeek(1) > 0) // Positive -> can go straight { Textto(StackPeek()); // Zap to mark TrackPush2(TrackPeek(), StackPeek(1) - 1); // Save old mark, old count Advance(2); // Straight continue; } StackPush(TrackPeek(), StackPeek(1) - 1); // recall old mark, old count break; case RegexCode.Branchcount | RegexCode.Back2: // TrackPush: // 0: Previous mark // 1: Previous count TrackPop(2); StackPush(TrackPeek(), TrackPeek(1)); // Recall old mark, old count break; // Backtrack case RegexCode.Lazybranchcount: // StackPush: // 0: Mark // 1: Count { StackPop(2); int mark = StackPeek(); int count = StackPeek(1); if (count < 0) // Negative count -> loop now { TrackPush2(mark); // Save old mark StackPush(Textpos(), count + 1); // Make new mark, incr count Goto(Operand(0)); // Loop } else // Nonneg count -> straight now { TrackPush(mark, count, Textpos()); // Save mark, count, position Advance(2); // Straight } continue; } case RegexCode.Lazybranchcount | RegexCode.Back: // TrackPush: // 0: Mark // 1: Count // 2: Textpos { TrackPop(3); int mark = TrackPeek(); int textpos = TrackPeek(2); if (TrackPeek(1) < Operand(1) && textpos != mark) // Under limit and not empty match -> loop { Textto(textpos); // Recall position StackPush(textpos, TrackPeek(1) + 1); // Make new mark, incr count TrackPush2(mark); // Save old mark Goto(Operand(0)); // Loop continue; } else // Max loops or empty match -> backtrack { StackPush(TrackPeek(), TrackPeek(1)); // Recall old mark, count break; // backtrack } } case RegexCode.Lazybranchcount | RegexCode.Back2: // TrackPush: // 0: Previous mark // StackPush: // 0: Mark (== current pos, discarded) // 1: Count TrackPop(); StackPop(2); StackPush(TrackPeek(), StackPeek(1) - 1); // Recall old mark, count break; // Backtrack case RegexCode.Setjump: StackPush(Trackpos(), Crawlpos()); TrackPush(); Advance(); continue; case RegexCode.Setjump | RegexCode.Back: StackPop(2); break; case RegexCode.Backjump: // StackPush: // 0: Saved trackpos // 1: Crawlpos StackPop(2); Trackto(StackPeek()); while (Crawlpos() != StackPeek(1)) { Uncapture(); } break; case RegexCode.Forejump: // StackPush: // 0: Saved trackpos // 1: Crawlpos StackPop(2); Trackto(StackPeek()); TrackPush(StackPeek(1)); Advance(); continue; case RegexCode.Forejump | RegexCode.Back: // TrackPush: // 0: Crawlpos TrackPop(); while (Crawlpos() != TrackPeek()) { Uncapture(); } break; case RegexCode.Bol: if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n') { break; } Advance(); continue; case RegexCode.Eol: if (Rightchars() > 0 && CharAt(Textpos()) != '\n') { break; } Advance(); continue; case RegexCode.Boundary: if (!IsBoundary(Textpos(), runtextbeg, runtextend)) { break; } Advance(); continue; case RegexCode.Nonboundary: if (IsBoundary(Textpos(), runtextbeg, runtextend)) { break; } Advance(); continue; case RegexCode.ECMABoundary: if (!IsECMABoundary(Textpos(), runtextbeg, runtextend)) { break; } Advance(); continue; case RegexCode.NonECMABoundary: if (IsECMABoundary(Textpos(), runtextbeg, runtextend)) { break; } Advance(); continue; case RegexCode.Beginning: if (Leftchars() > 0) { break; } Advance(); continue; case RegexCode.Start: if (Textpos() != Textstart()) { break; } Advance(); continue; case RegexCode.EndZ: if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n') { break; } Advance(); continue; case RegexCode.End: if (Rightchars() > 0) { break; } Advance(); continue; case RegexCode.One: if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0)) { break; } Advance(1); continue; case RegexCode.Notone: if (Forwardchars() < 1 || Forwardcharnext() == (char)Operand(0)) { break; } Advance(1); continue; case RegexCode.Set: if (Forwardchars() < 1 || !RegexCharClass.CharInClass(Forwardcharnext(), runstrings[Operand(0)])) { break; } Advance(1); continue; case RegexCode.Multi: { if (!Stringmatch(runstrings[Operand(0)])) { break; } Advance(1); continue; } case RegexCode.Ref: { int capnum = Operand(0); if (IsMatched(capnum)) { if (!Refmatch(MatchIndex(capnum), MatchLength(capnum))) { break; } } else { if ((runregex.roptions & RegexOptions.ECMAScript) == 0) { break; } } Advance(1); continue; } case RegexCode.Onerep: { int c = Operand(1); if (Forwardchars() < c) { break; } char ch = (char)Operand(0); while (c-- > 0) { if (Forwardcharnext() != ch) { goto BreakBackward; } } Advance(2); continue; } case RegexCode.Notonerep: { int c = Operand(1); if (Forwardchars() < c) { break; } char ch = (char)Operand(0); while (c-- > 0) { if (Forwardcharnext() == ch) { goto BreakBackward; } } Advance(2); continue; } case RegexCode.Setrep: { int c = Operand(1); if (Forwardchars() < c) { break; } String set = runstrings[Operand(0)]; while (c-- > 0) { if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) { goto BreakBackward; } } Advance(2); continue; } case RegexCode.Oneloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } char ch = (char)Operand(0); int i; for (i = c; i > 0; i--) { if (Forwardcharnext() != ch) { Backwardnext(); break; } } if (c > i) { TrackPush(c - i - 1, Textpos() - Bump()); } Advance(2); continue; } case RegexCode.Notoneloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } char ch = (char)Operand(0); int i; for (i = c; i > 0; i--) { if (Forwardcharnext() == ch) { Backwardnext(); break; } } if (c > i) { TrackPush(c - i - 1, Textpos() - Bump()); } Advance(2); continue; } case RegexCode.Setloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } String set = runstrings[Operand(0)]; int i; for (i = c; i > 0; i--) { if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) { Backwardnext(); break; } } if (c > i) { TrackPush(c - i - 1, Textpos() - Bump()); } Advance(2); continue; } case RegexCode.Oneloop | RegexCode.Back: case RegexCode.Notoneloop | RegexCode.Back: { TrackPop(2); int i = TrackPeek(); int pos = TrackPeek(1); Textto(pos); if (i > 0) { TrackPush(i - 1, pos - Bump()); } Advance(2); continue; } case RegexCode.Setloop | RegexCode.Back: { TrackPop(2); int i = TrackPeek(); int pos = TrackPeek(1); Textto(pos); if (i > 0) { TrackPush(i - 1, pos - Bump()); } Advance(2); continue; } case RegexCode.Onelazy: case RegexCode.Notonelazy: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } if (c > 0) { TrackPush(c - 1, Textpos()); } Advance(2); continue; } case RegexCode.Setlazy: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } if (c > 0) { TrackPush(c - 1, Textpos()); } Advance(2); continue; } case RegexCode.Onelazy | RegexCode.Back: { TrackPop(2); int pos = TrackPeek(1); Textto(pos); if (Forwardcharnext() != (char)Operand(0)) { break; } int i = TrackPeek(); if (i > 0) { TrackPush(i - 1, pos + Bump()); } Advance(2); continue; } case RegexCode.Notonelazy | RegexCode.Back: { TrackPop(2); int pos = TrackPeek(1); Textto(pos); if (Forwardcharnext() == (char)Operand(0)) { break; } int i = TrackPeek(); if (i > 0) { TrackPush(i - 1, pos + Bump()); } Advance(2); continue; } case RegexCode.Setlazy | RegexCode.Back: { TrackPop(2); int pos = TrackPeek(1); Textto(pos); if (!RegexCharClass.CharInClass(Forwardcharnext(), runstrings[Operand(0)])) { break; } int i = TrackPeek(); if (i > 0) { TrackPush(i - 1, pos + Bump()); } Advance(2); continue; } default: throw new NotImplementedException(); } BreakBackward: ; // "break Backward" comes here: Backtrack(); } }
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // !!!! This function must be kept synchronized with GenerateFindFirstChar !!!! // !!!! in RegexCompiler.cs !!!! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! protected override bool FindFirstChar() { int i; String set; if (0 != (runanchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End))) { if (!runcode._rightToLeft) { if ((0 != (runanchors & RegexFCD.Beginning) && runtextpos > runtextbeg) || (0 != (runanchors & RegexFCD.Start) && runtextpos > runtextstart)) { runtextpos = runtextend; return(false); } if (0 != (runanchors & RegexFCD.EndZ) && runtextpos < runtextend - 1) { runtextpos = runtextend - 1; } else if (0 != (runanchors & RegexFCD.End) && runtextpos < runtextend) { runtextpos = runtextend; } } else { if ((0 != (runanchors & RegexFCD.End) && runtextpos < runtextend) || (0 != (runanchors & RegexFCD.EndZ) && (runtextpos < runtextend - 1 || (runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'))) || (0 != (runanchors & RegexFCD.Start) && runtextpos < runtextstart)) { runtextpos = runtextbeg; return(false); } if (0 != (runanchors & RegexFCD.Beginning) && runtextpos > runtextbeg) { runtextpos = runtextbeg; } } if (runbmPrefix != null) { return(runbmPrefix.IsMatch(runtext, runtextpos, runtextbeg, runtextend)); } return(true); // found a valid start or end anchor } else if (runbmPrefix != null) { runtextpos = runbmPrefix.Scan(runtext, runtextpos, runtextbeg, runtextend); if (runtextpos == -1) { runtextpos = (runcode._rightToLeft ? runtextbeg : runtextend); return(false); } return(true); } else if (runfcPrefix == null) { return(true); } runrtl = runcode._rightToLeft; runci = runfcPrefix.CaseInsensitive; set = runfcPrefix.Prefix; if (RegexCharClass.IsSingleton(set)) { char ch = RegexCharClass.SingletonChar(set); for (i = Forwardchars(); i > 0; i--) { if (ch == Forwardcharnext()) { Backwardnext(); return(true); } } } else { for (i = Forwardchars(); i > 0; i--) { if (RegexCharClass.CharInClass(Forwardcharnext(), set)) { Backwardnext(); return(true); } } } return(false); }
protected static bool CharInClass(char ch, String charClass) { return(RegexCharClass.CharInClass(ch, charClass)); }
protected static bool CharInSet(char ch, String set, String category) { string charClass = RegexCharClass.ConvertOldStringsToClass(set, category); return(RegexCharClass.CharInClass(ch, charClass)); }
protected override void Go() { Goto(0); int advance = -1; while (true) { if (advance >= 0) { // https://github.com/dotnet/coreclr/pull/14850#issuecomment-342256447 // Single common Advance call to reduce method size; and single method inline point Advance(advance); advance = -1; } #if DEBUG if (runmatch.Debug) { DumpState(); } #endif //CheckTimeout(); switch (Operator()) { case RegexCode.Stop: return; case RegexCode.Nothing: break; case RegexCode.Goto: Goto(Operand(0)); continue; case RegexCode.Testref: if (!IsMatched(Operand(0))) { break; } advance = 1; continue; case RegexCode.Lazybranch: TrackPush(Textpos()); advance = 1; continue; case RegexCode.Lazybranch | RegexCode.Back: TrackPop(); Textto(TrackPeek()); Goto(Operand(0)); continue; case RegexCode.Setmark: StackPush(Textpos()); TrackPush(); advance = 0; continue; case RegexCode.Nullmark: StackPush(-1); TrackPush(); advance = 0; continue; case RegexCode.Setmark | RegexCode.Back: case RegexCode.Nullmark | RegexCode.Back: StackPop(); break; case RegexCode.Getmark: StackPop(); TrackPush(StackPeek()); Textto(StackPeek()); advance = 0; continue; case RegexCode.Getmark | RegexCode.Back: TrackPop(); StackPush(TrackPeek()); break; case RegexCode.Capturemark: if (Operand(1) != -1 && !IsMatched(Operand(1))) { break; } StackPop(); if (Operand(1) != -1) { TransferCapture(Operand(0), Operand(1), StackPeek(), Textpos()); } else if (_callStack != null && _callStack.captureNum == Operand(0)) { // Successful return from a subroutine int returnPos = _callStack.returnPos; PopCallFrame(); Goto(returnPos); continue; } else { Capture(Operand(0), StackPeek(), Textpos()); } TrackPush(StackPeek()); advance = 2; continue; case RegexCode.Capturemark | RegexCode.Back: TrackPop(); StackPush(TrackPeek()); Uncapture(); if (Operand(0) != -1 && Operand(1) != -1) { Uncapture(); } break; case RegexCode.Branchmark: { int matched; StackPop(); matched = Textpos() - StackPeek(); if (matched != 0) { // Nonempty match -> loop now TrackPush(StackPeek(), Textpos()); // Save old mark, textpos StackPush(Textpos()); // Make new mark Goto(Operand(0)); // Loop } else { // Empty match -> straight now TrackPush2(StackPeek()); // Save old mark advance = 1; // Straight } continue; } case RegexCode.Branchmark | RegexCode.Back: TrackPop(2); StackPop(); Textto(TrackPeek(1)); // Recall position TrackPush2(TrackPeek()); // Save old mark advance = 1; // Straight continue; case RegexCode.Branchmark | RegexCode.Back2: TrackPop(); StackPush(TrackPeek()); // Recall old mark break; // Backtrack case RegexCode.Lazybranchmark: { // We hit this the first time through a lazy loop and after each // successful match of the inner expression. It simply continues // on and doesn't loop. StackPop(); int oldMarkPos = StackPeek(); if (Textpos() != oldMarkPos) { // Nonempty match -> try to loop again by going to 'back' state if (oldMarkPos != -1) { TrackPush(oldMarkPos, Textpos()); // Save old mark, textpos } else { TrackPush(Textpos(), Textpos()); } } else { // The inner expression found an empty match, so we'll go directly to 'back2' if we // backtrack. In this case, we need to push something on the stack, since back2 pops. // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text // position associated with that empty match. StackPush(oldMarkPos); TrackPush2(StackPeek()); // Save old mark } advance = 1; continue; } case RegexCode.Lazybranchmark | RegexCode.Back: { // After the first time, Lazybranchmark | RegexCode.Back occurs // with each iteration of the loop, and therefore with every attempted // match of the inner expression. We'll try to match the inner expression, // then go back to Lazybranchmark if successful. If the inner expression // fails, we go to Lazybranchmark | RegexCode.Back2 int pos; TrackPop(2); pos = TrackPeek(1); TrackPush2(TrackPeek()); // Save old mark StackPush(pos); // Make new mark Textto(pos); // Recall position Goto(Operand(0)); // Loop continue; } case RegexCode.Lazybranchmark | RegexCode.Back2: // The lazy loop has failed. We'll do a true backtrack and // start over before the lazy loop. StackPop(); TrackPop(); StackPush(TrackPeek()); // Recall old mark break; case RegexCode.Setcount: StackPush(Textpos(), Operand(0)); TrackPush(); advance = 1; continue; case RegexCode.Nullcount: StackPush(-1, Operand(0)); TrackPush(); advance = 1; continue; case RegexCode.Setcount | RegexCode.Back: StackPop(2); break; case RegexCode.Nullcount | RegexCode.Back: StackPop(2); break; case RegexCode.Branchcount: // StackPush: // 0: Mark // 1: Count { StackPop(2); int mark = StackPeek(); int count = StackPeek(1); int matched = Textpos() - mark; if (count >= Operand(1) || (matched == 0 && count >= 0)) { // Max loops or empty match -> straight now TrackPush2(mark, count); // Save old mark, count advance = 2; // Straight } else { // Nonempty match -> count+loop now TrackPush(mark); // remember mark StackPush(Textpos(), count + 1); // Make new mark, incr count Goto(Operand(0)); // Loop } continue; } case RegexCode.Branchcount | RegexCode.Back: // TrackPush: // 0: Previous mark // StackPush: // 0: Mark (= current pos, discarded) // 1: Count TrackPop(); StackPop(2); if (StackPeek(1) > 0) { // Positive -> can go straight Textto(StackPeek()); // Zap to mark TrackPush2(TrackPeek(), StackPeek(1) - 1); // Save old mark, old count advance = 2; // Straight continue; } StackPush(TrackPeek(), StackPeek(1) - 1); // recall old mark, old count break; case RegexCode.Branchcount | RegexCode.Back2: // TrackPush: // 0: Previous mark // 1: Previous count TrackPop(2); StackPush(TrackPeek(), TrackPeek(1)); // Recall old mark, old count break; // Backtrack case RegexCode.Lazybranchcount: // StackPush: // 0: Mark // 1: Count { StackPop(2); int mark = StackPeek(); int count = StackPeek(1); if (count < 0) { // Negative count -> loop now TrackPush2(mark); // Save old mark StackPush(Textpos(), count + 1); // Make new mark, incr count Goto(Operand(0)); // Loop } else { // Nonneg count -> straight now TrackPush(mark, count, Textpos()); // Save mark, count, position advance = 2; // Straight } continue; } case RegexCode.Lazybranchcount | RegexCode.Back: // TrackPush: // 0: Mark // 1: Count // 2: Textpos { TrackPop(3); int mark = TrackPeek(); int textpos = TrackPeek(2); if (TrackPeek(1) < Operand(1) && textpos != mark) { // Under limit and not empty match -> loop Textto(textpos); // Recall position StackPush(textpos, TrackPeek(1) + 1); // Make new mark, incr count TrackPush2(mark); // Save old mark Goto(Operand(0)); // Loop continue; } else { // Max loops or empty match -> backtrack StackPush(TrackPeek(), TrackPeek(1)); // Recall old mark, count break; // backtrack } } case RegexCode.Lazybranchcount | RegexCode.Back2: // TrackPush: // 0: Previous mark // StackPush: // 0: Mark (== current pos, discarded) // 1: Count TrackPop(); StackPop(2); StackPush(TrackPeek(), StackPeek(1) - 1); // Recall old mark, count break; // Backtrack case RegexCode.Setjump: StackPush(Trackpos(), Crawlpos()); TrackPush(); advance = 0; continue; case RegexCode.Setjump | RegexCode.Back: StackPop(2); break; case RegexCode.Backjump: // StackPush: // 0: Saved trackpos // 1: Crawlpos StackPop(2); Trackto(StackPeek()); while (Crawlpos() != StackPeek(1)) { Uncapture(); } break; case RegexCode.Forejump: // StackPush: // 0: Saved trackpos // 1: Crawlpos StackPop(2); Trackto(StackPeek()); TrackPush(StackPeek(1)); advance = 0; continue; case RegexCode.Forejump | RegexCode.Back: // TrackPush: // 0: Crawlpos TrackPop(); while (Crawlpos() != TrackPeek()) { Uncapture(); } break; case RegexCode.Bol: if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n') { break; } advance = 0; continue; case RegexCode.Eol: if (Rightchars() > 0 && CharAt(Textpos()) != '\n') { break; } advance = 0; continue; case RegexCode.Boundary: if (!IsBoundary(Textpos(), runtextbeg, runtextend)) { break; } advance = 0; continue; case RegexCode.Nonboundary: if (IsBoundary(Textpos(), runtextbeg, runtextend)) { break; } advance = 0; continue; case RegexCode.ECMABoundary: if (!IsECMABoundary(Textpos(), runtextbeg, runtextend)) { break; } advance = 0; continue; case RegexCode.NonECMABoundary: if (IsECMABoundary(Textpos(), runtextbeg, runtextend)) { break; } advance = 0; continue; case RegexCode.Beginning: if (Leftchars() > 0) { break; } advance = 0; continue; case RegexCode.Start: if (Textpos() != Textstart()) { break; } advance = 0; continue; case RegexCode.EndZ: if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n') { break; } advance = 0; continue; case RegexCode.End: if (Rightchars() > 0) { break; } advance = 0; continue; case RegexCode.One: if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0)) { break; } advance = 1; continue; case RegexCode.Notone: if (Forwardchars() < 1 || Forwardcharnext() == (char)Operand(0)) { break; } advance = 1; continue; case RegexCode.Set: if (Forwardchars() < 1 || !RegexCharClass.CharInClass(Forwardcharnext(), _code._strings[Operand(0)])) { break; } advance = 1; continue; case RegexCode.Multi: { if (!Stringmatch(_code._strings[Operand(0)])) { break; } advance = 1; continue; } case RegexCode.Ref: { int capnum = Operand(0); if (IsMatched(capnum)) { if (!Refmatch(MatchIndex(capnum), MatchLength(capnum))) { break; } } else { if ((runregex.roptions & RegexOptions.ECMAScript) == 0) { break; } } advance = 1; continue; } case RegexCode.Onerep: { int c = Operand(1); if (Forwardchars() < c) { break; } char ch = (char)Operand(0); while (c-- > 0) { if (Forwardcharnext() != ch) { goto BreakBackward; } } advance = 2; continue; } case RegexCode.Notonerep: { int c = Operand(1); if (Forwardchars() < c) { break; } char ch = (char)Operand(0); while (c-- > 0) { if (Forwardcharnext() == ch) { goto BreakBackward; } } advance = 2; continue; } case RegexCode.Setrep: { int c = Operand(1); if (Forwardchars() < c) { break; } string set = _code._strings[Operand(0)]; while (c-- > 0) { if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) { goto BreakBackward; } } advance = 2; continue; } case RegexCode.Oneloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } char ch = (char)Operand(0); int i; for (i = c; i > 0; i--) { if (Forwardcharnext() != ch) { Backwardnext(); break; } } if (c > i) { TrackPush(c - i - 1, Textpos() - Bump()); } advance = 2; continue; } case RegexCode.Notoneloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } char ch = (char)Operand(0); int i; for (i = c; i > 0; i--) { if (Forwardcharnext() == ch) { Backwardnext(); break; } } if (c > i) { TrackPush(c - i - 1, Textpos() - Bump()); } advance = 2; continue; } case RegexCode.Setloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } string set = _code._strings[Operand(0)]; int i; for (i = c; i > 0; i--) { if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) { Backwardnext(); break; } } if (c > i) { TrackPush(c - i - 1, Textpos() - Bump()); } advance = 2; continue; } case RegexCode.Oneloop | RegexCode.Back: case RegexCode.Notoneloop | RegexCode.Back: { TrackPop(2); int i = TrackPeek(); int pos = TrackPeek(1); Textto(pos); if (i > 0) { TrackPush(i - 1, pos - Bump()); } advance = 2; continue; } case RegexCode.Setloop | RegexCode.Back: { TrackPop(2); int i = TrackPeek(); int pos = TrackPeek(1); Textto(pos); if (i > 0) { TrackPush(i - 1, pos - Bump()); } advance = 2; continue; } case RegexCode.Onelazy: case RegexCode.Notonelazy: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } if (c > 0) { TrackPush(c - 1, Textpos()); } advance = 2; continue; } case RegexCode.Setlazy: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } if (c > 0) { TrackPush(c - 1, Textpos()); } advance = 2; continue; } case RegexCode.Onelazy | RegexCode.Back: { TrackPop(2); int pos = TrackPeek(1); Textto(pos); if (Forwardcharnext() != (char)Operand(0)) { break; } int i = TrackPeek(); if (i > 0) { TrackPush(i - 1, pos + Bump()); } advance = 2; continue; } case RegexCode.Notonelazy | RegexCode.Back: { TrackPop(2); int pos = TrackPeek(1); Textto(pos); if (Forwardcharnext() == (char)Operand(0)) { break; } int i = TrackPeek(); if (i > 0) { TrackPush(i - 1, pos + Bump()); } advance = 2; continue; } case RegexCode.Setlazy | RegexCode.Back: { TrackPop(2); int pos = TrackPeek(1); Textto(pos); if (!RegexCharClass.CharInClass(Forwardcharnext(), _code._strings[Operand(0)])) { break; } int i = TrackPeek(); if (i > 0) { TrackPush(i - 1, pos + Bump()); } advance = 2; continue; } case RegexCode.ResetMatchStart: TrackPush(MatchStart()); // Enable backtracking, saving the current match start SetMatchStart(Textpos()); // Set the match start to the current position in text advance = 0; continue; case RegexCode.ResetMatchStart | RegexCode.Back: TrackPop(); SetMatchStart(TrackPeek()); // Restore the previously saved value as the match start break; // Continue backtracking case RegexCode.CallSubroutine: { int captureNum = Operand(0); PushCallFrame(captureNum, returnPos: _codepos + 2); TrackPush(); // This TrackPush will be forgotten after it returns or backtracks Goto(_code._capPositions[captureNum]); continue; } case RegexCode.CallSubroutine | RegexCode.Back: PopCallFrame(); // It must have been from the called subroutine break; default: throw new NotImplementedException(SR.UnimplementedState); } BreakBackward: ; // "break Backward" comes here: Backtrack(); } }
protected static bool CharInClass(char ch, String charClass, RegexRunner runner) { return(RegexCharClass.CharInClass(ch, charClass, runner.charInClassOptimize)); }
protected static bool CharInSet(char ch, String set, String category, RegexRunner runner) { string charClass = RegexCharClass.ConvertOldStringsToClass(set, category); return(RegexCharClass.CharInClass(ch, charClass, runner.charInClassOptimize)); }