internal RegexFC(string charClass, bool nullable, bool caseInsensitive) { _cc = RegexCharClass.Parse(charClass); _nullable = nullable; _caseInsensitive = caseInsensitive; }
internal RegexFC(string set, bool nullable, bool caseInsensitive) { this._cc = new RegexCharClass(); this._cc.AddSet(set); this._nullable = nullable; this._caseInsensitive = caseInsensitive; }
internal RegexFC(char ch, bool not, bool nullable, bool caseInsensitive) { _cc = new RegexCharClass(); if (not) { if (ch > 0) _cc.AddRange('\0', (char)(ch - 1)); if (ch < 0xFFFF) _cc.AddRange((char)(ch + 1), '\uFFFF'); } else { _cc.AddRange(ch, ch); } _caseInsensitive = caseInsensitive; _nullable = nullable; }
internal RegexFC(char ch, bool not, bool nullable, bool caseInsensitive) { this._cc = new RegexCharClass(); if (not) { if (ch > '\0') { this._cc.AddRange('\0', (char) (ch - '\x0001')); } if (ch < 0xffff) { this._cc.AddRange((char) (ch + '\x0001'), 0xffff); } } else { this._cc.AddRange(ch, ch); } this._caseInsensitive = caseInsensitive; this._nullable = nullable; }
internal void AddUnitSet(RegexCharClass cc) { this._unit = new RegexNode(11, this._options, cc.ToSetCi(this.UseOptionI(), this._culture), cc.Category); }
/// <summary> /// Basic optimization. Single-letter alternations can be replaced /// by faster set specifications, and nested alternations with no /// intervening operators can be flattened: /// /// a|b|c|def|g|h -> [a-c]|def|[gh] /// apple|(?:orange|pear)|grape -> apple|orange|pear|grape /// </summary> internal RegexNode ReduceAlternation() { // Combine adjacent sets/chars bool wasLastSet; bool lastNodeCannotMerge; RegexOptions optionsLast; RegexOptions optionsAt; int i; int j; RegexNode at; RegexNode prev; if (_children == null) { return(new RegexNode(RegexNode.Nothing, _options)); } wasLastSet = false; lastNodeCannotMerge = false; optionsLast = 0; for (i = 0, j = 0; i < _children.Count; i++, j++) { at = _children[i]; if (j < i) { _children[j] = at; } for (; ;) { if (at._type == Alternate) { for (int k = 0; k < at._children.Count; k++) { at._children[k]._next = this; } _children.InsertRange(i + 1, at._children); j--; } else if (at._type == Set || at._type == One) { // Cannot merge sets if L or I options differ, or if either are negated. optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); if (at._type == Set) { if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str)) { wasLastSet = true; lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str); optionsLast = optionsAt; break; } } else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge) { wasLastSet = true; lastNodeCannotMerge = false; optionsLast = optionsAt; break; } // The last node was a Set or a One, we're a Set or One and our options are the same. // Merge the two nodes. j--; prev = _children[j]; RegexCharClass prevCharClass; if (prev._type == RegexNode.One) { prevCharClass = new RegexCharClass(); prevCharClass.AddChar(prev._ch); } else { prevCharClass = RegexCharClass.Parse(prev._str); } if (at._type == RegexNode.One) { prevCharClass.AddChar(at._ch); } else { RegexCharClass atCharClass = RegexCharClass.Parse(at._str); prevCharClass.AddCharClass(atCharClass); } prev._type = RegexNode.Set; prev._str = prevCharClass.ToStringClass(); } else if (at._type == RegexNode.Nothing) { j--; } else { wasLastSet = false; lastNodeCannotMerge = false; } break; } } if (j < i) { _children.RemoveRange(j, i - j); } return(StripEnation(RegexNode.Nothing)); }
// The top level RegexCode generator. It does a depth-first walk // through the tree and calls EmitFragment to emits code before // and after each child of an interior node, and at each leaf. // // It runs two passes, first to count the size of the generated // code, and second to generate the code. // internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; int capsize; RegexPrefix fcPrefix; RegexPrefix scPrefix; RegexPrefix prefix; int anchors; RegexBoyerMoore bmPrefix; bool rtl; // construct sparse capnum mapping if some numbers are unused if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; _caps = null; } else { capsize = tree._capnumlist.Length; _caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { _caps[tree._capnumlist[i]] = i; } } _counting = true; for (;;) { if (!_counting) { _emitted = new int[_count]; } curNode = tree._root; curChild = 0; Emit(RegexCode.Lazybranch, 0); for (;;) { if (curNode._children == null) { EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = (RegexNode)curNode._children[curChild]; PushInt(curChild); curChild = 0; continue; } if (EmptyStack()) { break; } curChild = PopInt(); curNode = curNode._next; EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } PatchJump(0, CurPos()); Emit(RegexCode.Stop); if (!_counting) { break; } _counting = false; } // if the set of possible first chars is very large, // don't bother scanning for it (common case: . == [^\n]) fcPrefix = RegexFCD.FirstChars(tree); if (fcPrefix != null && RegexCharClass.SetSize(fcPrefix.Prefix) > 0) { fcPrefix = null; } scPrefix = null; //RegexFCD.ScanChars(tree); prefix = RegexFCD.Prefix(tree); rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } anchors = RegexFCD.Anchors(tree); return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, scPrefix, anchors, rtl)); }
internal RegexFC(bool nullable) { _cc = new RegexCharClass(); _nullable = nullable; }
private RegexCharClass(bool negate, List <SingleRange> ranges, StringBuilder categories, RegexCharClass subtraction) { this._rangelist = ranges; this._categories = categories; this._canonical = true; this._negate = negate; this._subtractor = subtraction; }
/* * Scans chars following a '\' (not counting the '\'), and returns * a RegexNode for the type of atom scanned. */ internal RegexNode ScanBackslash() { char ch; RegexCharClass cc; if (CharsRight() == 0) throw MakeException(SR.IllegalEndEscape); switch (ch = RightChar()) { case 'b': case 'B': case 'A': case 'G': case 'Z': case 'z': MoveRight(); return new RegexNode(TypeFromCode(ch), _options); case 'w': MoveRight(); if (UseOptionE()) return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMAWordClass); return new RegexNode(RegexNode.Set, _options, RegexCharClass.WordClass); case 'W': MoveRight(); if (UseOptionE()) return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMAWordClass); return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotWordClass); case 's': MoveRight(); if (UseOptionE()) return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMASpaceClass); return new RegexNode(RegexNode.Set, _options, RegexCharClass.SpaceClass); case 'S': MoveRight(); if (UseOptionE()) return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMASpaceClass); return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotSpaceClass); case 'd': MoveRight(); if (UseOptionE()) return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMADigitClass); return new RegexNode(RegexNode.Set, _options, RegexCharClass.DigitClass); case 'D': MoveRight(); if (UseOptionE()) return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMADigitClass); return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotDigitClass); case 'p': case 'P': MoveRight(); cc = new RegexCharClass(); cc.AddCategoryFromName(ParseProperty(), (ch != 'p'), UseOptionI(), _pattern); if (UseOptionI()) cc.AddLowercase(_culture); return new RegexNode(RegexNode.Set, _options, cc.ToStringClass()); default: return ScanBasicBackslash(); } }
// Sets the current unit to a single set node internal void AddUnitSet(RegexCharClass cc) { _unit = new RegexNode(RegexNode.Set, _options, cc.ToSetCi(UseOptionI(), _culture), cc.Category); }
protected override void Go() { Goto(0); for (;;) { #if DBG if (runmatch.Debug) { DumpState(); } #endif switch (Operator()) { case RegexCode.Stop: return; case RegexCode.Nothing: break; case RegexCode.Goto: Goto(Operand(0)); continue; case RegexCode.Testref: if (!IsMatched(Operand(0))) { break; } Advance(1); continue; case RegexCode.Lazybranch: Track(Textpos()); Advance(1); continue; case RegexCode.Lazybranch | RegexCode.Back: Trackframe(1); Textto(Tracked(0)); Goto(Operand(0)); continue; case RegexCode.Setmark: Stack(Textpos()); Track(); Advance(); continue; case RegexCode.Nullmark: Stack(-1); Track(); Advance(); continue; case RegexCode.Setmark | RegexCode.Back: case RegexCode.Nullmark | RegexCode.Back: Stackframe(1); break; case RegexCode.Getmark: Stackframe(1); Track(Stacked(0)); Textto(Stacked(0)); Advance(); continue; case RegexCode.Getmark | RegexCode.Back: Trackframe(1); Stack(Tracked(0)); break; case RegexCode.Capturemark: if (Operand(1) != -1 && !IsMatched(Operand(1))) { break; } Stackframe(1); if (Operand(1) != -1) { TransferCapture(Operand(0), Operand(1), Stacked(0), Textpos()); } else { Capture(Operand(0), Stacked(0), Textpos()); } Track(Stacked(0)); Advance(2); /* * */ continue; case RegexCode.Capturemark | RegexCode.Back: Trackframe(1); Stack(Tracked(0)); Uncapture(); if (Operand(0) != -1 && Operand(1) != -1) { Uncapture(); } break; case RegexCode.Branchmark: { int matched; Stackframe(1); matched = Textpos() - Stacked(0); if (matched != 0) // Nonempty match -> loop now { Track(Stacked(0), Textpos()); // Save old mark, textpos Stack(Textpos()); // Make new mark Goto(Operand(0)); // Loop } else // Empty match -> straight now { Track2(Stacked(0)); // Save old mark Advance(1); // Straight } continue; } case RegexCode.Branchmark | RegexCode.Back: Trackframe(2); Stackframe(1); Textto(Tracked(1)); // Recall position Track2(Tracked(0)); // Save old mark Advance(1); // Straight continue; case RegexCode.Branchmark | RegexCode.Back2: Trackframe(1); Stack(Tracked(0)); // Recall old mark break; // Backtrack case RegexCode.Lazybranchmark: { int matched; Stackframe(1); matched = Textpos() - Stacked(0); if (matched != 0) // Nonempty match -> next loop { Track(Stacked(0), Textpos()); // Save old mark, textpos } else // Empty match -> no loop { Track2(Stacked(0)); // Save old mark } Advance(1); continue; } case RegexCode.Lazybranchmark | RegexCode.Back: { int pos; Trackframe(2); pos = Tracked(1); Track2(Tracked(0)); // Save old mark Stack(pos); // Make new mark Textto(pos); // Recall position Goto(Operand(0)); // Loop continue; } case RegexCode.Lazybranchmark | RegexCode.Back2: Stackframe(1); Trackframe(1); Stack(Tracked(0)); // Recall old mark break; case RegexCode.Setcount: Stack(Textpos(), Operand(0)); Track(); Advance(1); continue; case RegexCode.Nullcount: Stack(-1, Operand(0)); Track(); Advance(1); continue; case RegexCode.Setcount | RegexCode.Back: Stackframe(2); break; case RegexCode.Nullcount | RegexCode.Back: Stackframe(2); break; case RegexCode.Branchcount: // Stack: // 0: Mark // 1: Count { Stackframe(2); int mark = Stacked(0); int count = Stacked(1); int matched = Textpos() - mark; if (count >= Operand(1) || (matched == 0 && count >= 0)) // Max loops or empty match -> straight now { Track2(mark, count); // Save old mark, count Advance(2); // Straight } else // Nonempty match -> count+loop now { Track(mark); // remember mark Stack(Textpos(), count + 1); // Make new mark, incr count Goto(Operand(0)); // Loop } continue; } case RegexCode.Branchcount | RegexCode.Back: // Track: // 0: Previous mark // Stack: // 0: Mark (= current pos, discarded) // 1: Count Trackframe(1); Stackframe(2); if (Stacked(1) > 0) // Positive -> can go straight { Textto(Stacked(0)); // Zap to mark Track2(Tracked(0), Stacked(1) - 1); // Save old mark, old count Advance(2); // Straight continue; } Stack(Tracked(0), Stacked(1) - 1); // recall old mark, old count break; case RegexCode.Branchcount | RegexCode.Back2: // Track: // 0: Previous mark // 1: Previous count Trackframe(2); Stack(Tracked(0), Tracked(1)); // Recall old mark, old count break; // Backtrack case RegexCode.Lazybranchcount: // Stack: // 0: Mark // 1: Count { Stackframe(2); int mark = Stacked(0); int count = Stacked(1); if (count < 0) // Negative count -> loop now { Track2(mark); // Save old mark Stack(Textpos(), count + 1); // Make new mark, incr count Goto(Operand(0)); // Loop } else // Nonneg count -> straight now { Track(mark, count, Textpos()); // Save mark, count, position Advance(2); // Straight } continue; } case RegexCode.Lazybranchcount | RegexCode.Back: // Track: // 0: Mark // 1: Count // 2: Textpos { Trackframe(3); int mark = Tracked(0); int textpos = Tracked(2); if (Tracked(1) <= Operand(1) && textpos != mark) // Under limit and not empty match -> loop { Textto(textpos); // Recall position Stack(textpos, Tracked(1) + 1); // Make new mark, incr count Track2(mark); // Save old mark Goto(Operand(0)); // Loop continue; } else // Max loops or empty match -> backtrack { Stack(Tracked(0), Tracked(1)); // Recall old mark, count break; // backtrack } } case RegexCode.Lazybranchcount | RegexCode.Back2: // Track: // 0: Previous mark // Stack: // 0: Mark (== current pos, discarded) // 1: Count Trackframe(1); Stackframe(2); Stack(Tracked(0), Stacked(1) - 1); // Recall old mark, count break; // Backtrack case RegexCode.Setjump: Stack(Trackpos(), Crawlpos()); Track(); Advance(); continue; case RegexCode.Setjump | RegexCode.Back: Stackframe(2); break; case RegexCode.Backjump: // Stack: // 0: Saved trackpos // 1: Crawlpos Stackframe(2); Trackto(Stacked(0)); while (Crawlpos() != Stacked(1)) { Uncapture(); } break; case RegexCode.Forejump: // Stack: // 0: Saved trackpos // 1: Crawlpos Stackframe(2); Trackto(Stacked(0)); Track(Stacked(1)); Advance(); continue; case RegexCode.Forejump | RegexCode.Back: // Track: // 0: Crawlpos Trackframe(1); while (Crawlpos() != Tracked(0)) { Uncapture(); } break; case RegexCode.Bol: if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n') { break; } Advance(); continue; case RegexCode.Eol: if (Rightchars() > 0 && CharAt(Textpos()) != '\n') { break; } Advance(); continue; case RegexCode.Boundary: if (!IsBoundary(Textpos(), runtextbeg, runtextend)) { break; } Advance(); continue; case RegexCode.Nonboundary: if (IsBoundary(Textpos(), runtextbeg, runtextend)) { break; } Advance(); continue; case RegexCode.ECMABoundary: if (!IsECMABoundary(Textpos(), runtextbeg, runtextend)) { break; } Advance(); continue; case RegexCode.NonECMABoundary: if (IsECMABoundary(Textpos(), runtextbeg, runtextend)) { break; } Advance(); continue; case RegexCode.Beginning: if (Leftchars() > 0) { break; } Advance(); continue; case RegexCode.Start: if (Textpos() != Textstart()) { break; } Advance(); continue; case RegexCode.EndZ: if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n') { break; } Advance(); continue; case RegexCode.End: if (Rightchars() > 0) { break; } Advance(); continue; case RegexCode.One: if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0)) { break; } Advance(1); continue; case RegexCode.Notone: if (Forwardchars() < 1 || Forwardcharnext() == (char)Operand(0)) { break; } Advance(1); continue; case RegexCode.Set: if (Forwardchars() < 1 || !RegexCharClass.CharInSet(Forwardcharnext(), runstrings[Operand(0)], runstrings[Operand(1)])) { break; } Advance(2); continue; case RegexCode.Multi: { if (!Stringmatch(runstrings[Operand(0)])) { break; } Advance(1); continue; } case RegexCode.Ref: { int capnum = Operand(0); if (IsMatched(capnum)) { if (!Refmatch(MatchIndex(capnum), MatchLength(capnum))) { break; } } else { if ((runregex.roptions & RegexOptions.ECMAScript) == 0) { break; } } Advance(1); continue; } case RegexCode.Onerep: { int c = Operand(1); if (Forwardchars() < c) { break; } char ch = (char)Operand(0); while (c-- > 0) { if (Forwardcharnext() != ch) { goto BreakBackward; } } Advance(2); continue; } case RegexCode.Notonerep: { int c = Operand(1); if (Forwardchars() < c) { break; } char ch = (char)Operand(0); while (c-- > 0) { if (Forwardcharnext() == ch) { goto BreakBackward; } } Advance(2); continue; } case RegexCode.Setrep: { int c = Operand(2); if (Forwardchars() < c) { break; } String set = runstrings[Operand(0)]; String cat = runstrings[Operand(1)]; while (c-- > 0) { if (!RegexCharClass.CharInSet(Forwardcharnext(), set, cat)) { goto BreakBackward; } } Advance(3); continue; } case RegexCode.Oneloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } char ch = (char)Operand(0); int i; for (i = c; i > 0; i--) { if (Forwardcharnext() != ch) { Backwardnext(); break; } } if (c > i) { Track(c - i - 1, Textpos() - Bump()); } Advance(2); continue; } case RegexCode.Notoneloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } char ch = (char)Operand(0); int i; for (i = c; i > 0; i--) { if (Forwardcharnext() == ch) { Backwardnext(); break; } } if (c > i) { Track(c - i - 1, Textpos() - Bump()); } Advance(2); continue; } case RegexCode.Setloop: { int c = Operand(2); if (c > Forwardchars()) { c = Forwardchars(); } String set = runstrings[Operand(0)]; String cat = runstrings[Operand(1)]; int i; for (i = c; i > 0; i--) { if (!RegexCharClass.CharInSet(Forwardcharnext(), set, cat)) { Backwardnext(); break; } } if (c > i) { Track(c - i - 1, Textpos() - Bump()); } Advance(3); continue; } case RegexCode.Oneloop | RegexCode.Back: case RegexCode.Notoneloop | RegexCode.Back: { Trackframe(2); int i = Tracked(0); int pos = Tracked(1); Textto(pos); if (i > 0) { Track(i - 1, pos - Bump()); } Advance(2); continue; } case RegexCode.Setloop | RegexCode.Back: { Trackframe(2); int i = Tracked(0); int pos = Tracked(1); Textto(pos); if (i > 0) { Track(i - 1, pos - Bump()); } Advance(3); continue; } case RegexCode.Onelazy: case RegexCode.Notonelazy: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } if (c > 0) { Track(c - 1, Textpos()); } Advance(2); continue; } case RegexCode.Setlazy: { int c = Operand(2); if (c > Forwardchars()) { c = Forwardchars(); } if (c > 0) { Track(c - 1, Textpos()); } Advance(3); continue; } case RegexCode.Onelazy | RegexCode.Back: { Trackframe(2); int pos = Tracked(1); Textto(pos); if (Forwardcharnext() != (char)Operand(0)) { break; } int i = Tracked(0); if (i > 0) { Track(i - 1, pos + Bump()); } Advance(2); continue; } case RegexCode.Notonelazy | RegexCode.Back: { Trackframe(2); int pos = Tracked(1); Textto(pos); if (Forwardcharnext() == (char)Operand(0)) { break; } int i = Tracked(0); if (i > 0) { Track(i - 1, pos + Bump()); } Advance(2); continue; } case RegexCode.Setlazy | RegexCode.Back: { Trackframe(2); int pos = Tracked(1); Textto(pos); if (!RegexCharClass.CharInSet(Forwardcharnext(), runstrings[Operand(0)], runstrings[Operand(1)])) { break; } int i = Tracked(0); if (i > 0) { Track(i - 1, pos + Bump()); } Advance(3); continue; } default: throw new NotImplementedException(SR.GetString(SR.UnimplementedState)); } BreakBackward: ; // "break Backward" comes here: Backtrack(); } }
protected override bool FindFirstChar() { int i; String set; if (0 != (runanchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End))) { if (!runcode._rightToLeft) { if ((0 != (runanchors & RegexFCD.Beginning) && runtextpos > runtextbeg) || (0 != (runanchors & RegexFCD.Start) && runtextpos > runtextstart)) { runtextpos = runtextend; return(false); } if (0 != (runanchors & RegexFCD.EndZ) && runtextpos < runtextend - 1) { runtextpos = runtextend - 1; } else if (0 != (runanchors & RegexFCD.End) && runtextpos < runtextend) { runtextpos = runtextend; } } else { if ((0 != (runanchors & RegexFCD.End) && runtextpos < runtextend) || (0 != (runanchors & RegexFCD.EndZ) && (runtextpos < runtextend - 1 || (runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'))) || (0 != (runanchors & RegexFCD.Start) && runtextpos < runtextstart)) { runtextpos = runtextbeg; return(false); } if (0 != (runanchors & RegexFCD.Beginning) && runtextpos > runtextbeg) { runtextpos = runtextbeg; } } if (runbmPrefix != null) { return(runbmPrefix.IsMatch(runtext, runtextpos, runtextbeg, runtextend)); } } else if (runbmPrefix != null) { runtextpos = runbmPrefix.Scan(runtext, runtextpos, runtextbeg, runtextend); if (runtextpos == -1) { runtextpos = (runcode._rightToLeft ? runtextbeg : runtextend); return(false); } return(true); } if (runfcPrefix == null) { return(true); } runrtl = runcode._rightToLeft; runci = runfcPrefix.CaseInsensitive; set = runfcPrefix.Prefix; if (RegexCharClass.IsSingleton(set)) { char ch = RegexCharClass.SingletonChar(set); for (i = Forwardchars(); i > 0; i--) { if (ch == Forwardcharnext()) { Backwardnext(); return(true); } } } else { for (i = Forwardchars(); i > 0; i--) { if (RegexCharClass.CharInSet(Forwardcharnext(), set, String.Empty)) { Backwardnext(); return(true); } } } return(false); }
internal void AddSubtraction(RegexCharClass sub) { this._subtractor = sub; }
// AddCharClass() // // Adds a regex char class internal void AddCharClass(RegexCharClass cc) { int i; if (_canonical && RangeCount() > 0 && cc.RangeCount() > 0 && cc.Range(cc.RangeCount() - 1)._last <= Range(RangeCount() - 1)._last) _canonical = false; for (i = 0; i < cc.RangeCount(); i += 1) { _rangelist.Add(cc.Range(i)); } _categories.Append(cc._categories.ToString()); }
/* * AddCharClass() * * Adds a regex char class */ internal void AddCharClass(RegexCharClass cc) { int i; Debug.Assert(cc.CanMerge && this.CanMerge, "Both character classes added together must be able to merge" ); if (!cc._canonical) { // if the new char class to add isn't canonical, we're not either. _canonical = false; } else if (_canonical && RangeCount() > 0 && cc.RangeCount() > 0 && cc.GetRangeAt(0)._first <= GetRangeAt(RangeCount() - 1)._last) _canonical = false; for (i = 0; i < cc.RangeCount(); i += 1) { _rangelist.Add(cc.GetRangeAt(i)); } _categories.Append(cc._categories.ToString()); }
public RegexFC(bool nullable) { _cc = new RegexCharClass(); _nullable = nullable; }
protected override bool FindFirstChar() { int i; String set; if (0 != (_code._anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End))) { if (!_code._rightToLeft) { if ((0 != (_code._anchors & RegexFCD.Beginning) && _runtextpos > _runtextbeg) || (0 != (_code._anchors & RegexFCD.Start) && _runtextpos > _runtextstart)) { _runtextpos = _runtextend; return(false); } if (0 != (_code._anchors & RegexFCD.EndZ) && _runtextpos < _runtextend - 1) { _runtextpos = _runtextend - 1; } else if (0 != (_code._anchors & RegexFCD.End) && _runtextpos < _runtextend) { _runtextpos = _runtextend; } } else { if ((0 != (_code._anchors & RegexFCD.End) && _runtextpos < _runtextend) || (0 != (_code._anchors & RegexFCD.EndZ) && (_runtextpos < _runtextend - 1 || (_runtextpos == _runtextend - 1 && CharAt(_runtextpos) != '\n'))) || (0 != (_code._anchors & RegexFCD.Start) && _runtextpos < _runtextstart)) { _runtextpos = _runtextbeg; return(false); } if (0 != (_code._anchors & RegexFCD.Beginning) && _runtextpos > _runtextbeg) { _runtextpos = _runtextbeg; } } if (_code._bmPrefix != null) { return(_code._bmPrefix.IsMatch(_runtext, _runtextpos, _runtextbeg, _runtextend)); } return(true); // found a valid start or end anchor } else if (_code._bmPrefix != null) { _runtextpos = _code._bmPrefix.Scan(_runtext, _runtextpos, _runtextbeg, _runtextend); if (_runtextpos == -1) { _runtextpos = (_code._rightToLeft ? _runtextbeg : _runtextend); return(false); } return(true); } else if (_code._fcPrefix == null) { return(true); } _rightToLeft = _code._rightToLeft; _caseInsensitive = _code._fcPrefix.CaseInsensitive; set = _code._fcPrefix.Prefix; if (RegexCharClass.IsSingleton(set)) { char ch = RegexCharClass.SingletonChar(set); for (i = Forwardchars(); i > 0; i--) { if (ch == Forwardcharnext()) { Backwardnext(); return(true); } } } else { for (i = Forwardchars(); i > 0; i--) { if (RegexCharClass.CharInClass(Forwardcharnext(), set)) { Backwardnext(); return(true); } } } return(false); }
protected override bool FindFirstChar() { if (0 != (_code.Anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End))) { if (!_code.RightToLeft) { if ((0 != (_code.Anchors & RegexFCD.Beginning) && runtextpos > runtextbeg) || (0 != (_code.Anchors & RegexFCD.Start) && runtextpos > runtextstart)) { runtextpos = runtextend; return(false); } if (0 != (_code.Anchors & RegexFCD.EndZ) && runtextpos < runtextend - 1) { runtextpos = runtextend - 1; } else if (0 != (_code.Anchors & RegexFCD.End) && runtextpos < runtextend) { runtextpos = runtextend; } } else { if ((0 != (_code.Anchors & RegexFCD.End) && runtextpos < runtextend) || (0 != (_code.Anchors & RegexFCD.EndZ) && (runtextpos < runtextend - 1 || (runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'))) || (0 != (_code.Anchors & RegexFCD.Start) && runtextpos < runtextstart)) { runtextpos = runtextbeg; return(false); } if (0 != (_code.Anchors & RegexFCD.Beginning) && runtextpos > runtextbeg) { runtextpos = runtextbeg; } } if (_code.BMPrefix != null) { return(_code.BMPrefix.IsMatch(runtext, runtextpos, runtextbeg, runtextend)); } return(true); // found a valid start or end anchor } else if (_code.BMPrefix != null) { runtextpos = _code.BMPrefix.Scan(runtext, runtextpos, runtextbeg, runtextend); if (runtextpos == -1) { runtextpos = (_code.RightToLeft ? runtextbeg : runtextend); return(false); } return(true); } else if (_code.FCPrefix == null) { return(true); } _rightToLeft = _code.RightToLeft; _caseInsensitive = _code.FCPrefix.GetValueOrDefault().CaseInsensitive; string set = _code.FCPrefix.GetValueOrDefault().Prefix; if (RegexCharClass.IsSingleton(set)) { char ch = RegexCharClass.SingletonChar(set); for (int i = Forwardchars(); i > 0; i--) { if (ch == Forwardcharnext()) { Backwardnext(); return(true); } } } else { for (int i = Forwardchars(); i > 0; i--) { if (RegexCharClass.CharInClass(Forwardcharnext(), set)) { Backwardnext(); return(true); } } } return(false); }
internal void AddCharClass(RegexCharClass cc) { if ((this._canonical && (this.RangeCount() > 0)) && ((cc.RangeCount() > 0) && (cc.Range(cc.RangeCount() - 1)._last <= this.Range(this.RangeCount() - 1)._last))) { this._canonical = false; } for (int i = 0; i < cc.RangeCount(); i++) { this._rangelist.Add(cc.Range(i)); } this._categories.Append(cc._categories.ToString()); }
protected override void Go() { Goto(0); int advance = -1; for (; ;) { if (advance >= 0) { // https://github.com/dotnet/coreclr/pull/14850#issuecomment-342256447 // Single common Advance call to reduce method size; and single method inline point Advance(advance); advance = -1; } #if DEBUG if (runmatch.Debug) { DumpState(); } #endif CheckTimeout(); switch (Operator()) { case RegexCode.Stop: return; case RegexCode.Nothing: break; case RegexCode.Goto: Goto(Operand(0)); continue; case RegexCode.Testref: if (!IsMatched(Operand(0))) { break; } advance = 1; continue; case RegexCode.Lazybranch: TrackPush(Textpos()); advance = 1; continue; case RegexCode.Lazybranch | RegexCode.Back: TrackPop(); Textto(TrackPeek()); Goto(Operand(0)); continue; case RegexCode.Setmark: StackPush(Textpos()); TrackPush(); advance = 0; continue; case RegexCode.Nullmark: StackPush(-1); TrackPush(); advance = 0; continue; case RegexCode.Setmark | RegexCode.Back: case RegexCode.Nullmark | RegexCode.Back: StackPop(); break; case RegexCode.Getmark: StackPop(); TrackPush(StackPeek()); Textto(StackPeek()); advance = 0; continue; case RegexCode.Getmark | RegexCode.Back: TrackPop(); StackPush(TrackPeek()); break; case RegexCode.Capturemark: if (Operand(1) != -1 && !IsMatched(Operand(1))) { break; } StackPop(); if (Operand(1) != -1) { TransferCapture(Operand(0), Operand(1), StackPeek(), Textpos()); } else { Capture(Operand(0), StackPeek(), Textpos()); } TrackPush(StackPeek()); advance = 2; continue; case RegexCode.Capturemark | RegexCode.Back: TrackPop(); StackPush(TrackPeek()); Uncapture(); if (Operand(0) != -1 && Operand(1) != -1) { Uncapture(); } break; case RegexCode.Branchmark: { int matched; StackPop(); matched = Textpos() - StackPeek(); if (matched != 0) { // Nonempty match -> loop now TrackPush(StackPeek(), Textpos()); // Save old mark, textpos StackPush(Textpos()); // Make new mark Goto(Operand(0)); // Loop } else { // Empty match -> straight now TrackPush2(StackPeek()); // Save old mark advance = 1; // Straight } continue; } case RegexCode.Branchmark | RegexCode.Back: TrackPop(2); StackPop(); Textto(TrackPeek(1)); // Recall position TrackPush2(TrackPeek()); // Save old mark advance = 1; // Straight continue; case RegexCode.Branchmark | RegexCode.Back2: TrackPop(); StackPush(TrackPeek()); // Recall old mark break; // Backtrack case RegexCode.Lazybranchmark: { // We hit this the first time through a lazy loop and after each // successful match of the inner expression. It simply continues // on and doesn't loop. StackPop(); int oldMarkPos = StackPeek(); if (Textpos() != oldMarkPos) { // Nonempty match -> try to loop again by going to 'back' state if (oldMarkPos != -1) { TrackPush(oldMarkPos, Textpos()); // Save old mark, textpos } else { TrackPush(Textpos(), Textpos()); } } else { // The inner expression found an empty match, so we'll go directly to 'back2' if we // backtrack. In this case, we need to push something on the stack, since back2 pops. // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text // position associated with that empty match. StackPush(oldMarkPos); TrackPush2(StackPeek()); // Save old mark } advance = 1; continue; } case RegexCode.Lazybranchmark | RegexCode.Back: { // After the first time, Lazybranchmark | RegexCode.Back occurs // with each iteration of the loop, and therefore with every attempted // match of the inner expression. We'll try to match the inner expression, // then go back to Lazybranchmark if successful. If the inner expression // fails, we go to Lazybranchmark | RegexCode.Back2 int pos; TrackPop(2); pos = TrackPeek(1); TrackPush2(TrackPeek()); // Save old mark StackPush(pos); // Make new mark Textto(pos); // Recall position Goto(Operand(0)); // Loop continue; } case RegexCode.Lazybranchmark | RegexCode.Back2: // The lazy loop has failed. We'll do a true backtrack and // start over before the lazy loop. StackPop(); TrackPop(); StackPush(TrackPeek()); // Recall old mark break; case RegexCode.Setcount: StackPush(Textpos(), Operand(0)); TrackPush(); advance = 1; continue; case RegexCode.Nullcount: StackPush(-1, Operand(0)); TrackPush(); advance = 1; continue; case RegexCode.Setcount | RegexCode.Back: StackPop(2); break; case RegexCode.Nullcount | RegexCode.Back: StackPop(2); break; case RegexCode.Branchcount: // StackPush: // 0: Mark // 1: Count { StackPop(2); int mark = StackPeek(); int count = StackPeek(1); int matched = Textpos() - mark; if (count >= Operand(1) || (matched == 0 && count >= 0)) { // Max loops or empty match -> straight now TrackPush2(mark, count); // Save old mark, count advance = 2; // Straight } else { // Nonempty match -> count+loop now TrackPush(mark); // remember mark StackPush(Textpos(), count + 1); // Make new mark, incr count Goto(Operand(0)); // Loop } continue; } case RegexCode.Branchcount | RegexCode.Back: // TrackPush: // 0: Previous mark // StackPush: // 0: Mark (= current pos, discarded) // 1: Count TrackPop(); StackPop(2); if (StackPeek(1) > 0) { // Positive -> can go straight Textto(StackPeek()); // Zap to mark TrackPush2(TrackPeek(), StackPeek(1) - 1); // Save old mark, old count advance = 2; // Straight continue; } StackPush(TrackPeek(), StackPeek(1) - 1); // recall old mark, old count break; case RegexCode.Branchcount | RegexCode.Back2: // TrackPush: // 0: Previous mark // 1: Previous count TrackPop(2); StackPush(TrackPeek(), TrackPeek(1)); // Recall old mark, old count break; // Backtrack case RegexCode.Lazybranchcount: // StackPush: // 0: Mark // 1: Count { StackPop(2); int mark = StackPeek(); int count = StackPeek(1); if (count < 0) { // Negative count -> loop now TrackPush2(mark); // Save old mark StackPush(Textpos(), count + 1); // Make new mark, incr count Goto(Operand(0)); // Loop } else { // Nonneg count -> straight now TrackPush(mark, count, Textpos()); // Save mark, count, position advance = 2; // Straight } continue; } case RegexCode.Lazybranchcount | RegexCode.Back: // TrackPush: // 0: Mark // 1: Count // 2: Textpos { TrackPop(3); int mark = TrackPeek(); int textpos = TrackPeek(2); if (TrackPeek(1) < Operand(1) && textpos != mark) { // Under limit and not empty match -> loop Textto(textpos); // Recall position StackPush(textpos, TrackPeek(1) + 1); // Make new mark, incr count TrackPush2(mark); // Save old mark Goto(Operand(0)); // Loop continue; } else { // Max loops or empty match -> backtrack StackPush(TrackPeek(), TrackPeek(1)); // Recall old mark, count break; // backtrack } } case RegexCode.Lazybranchcount | RegexCode.Back2: // TrackPush: // 0: Previous mark // StackPush: // 0: Mark (== current pos, discarded) // 1: Count TrackPop(); StackPop(2); StackPush(TrackPeek(), StackPeek(1) - 1); // Recall old mark, count break; // Backtrack case RegexCode.Setjump: StackPush(Trackpos(), Crawlpos()); TrackPush(); advance = 0; continue; case RegexCode.Setjump | RegexCode.Back: StackPop(2); break; case RegexCode.Backjump: // StackPush: // 0: Saved trackpos // 1: Crawlpos StackPop(2); Trackto(StackPeek()); while (Crawlpos() != StackPeek(1)) { Uncapture(); } break; case RegexCode.Forejump: // StackPush: // 0: Saved trackpos // 1: Crawlpos StackPop(2); Trackto(StackPeek()); TrackPush(StackPeek(1)); advance = 0; continue; case RegexCode.Forejump | RegexCode.Back: // TrackPush: // 0: Crawlpos TrackPop(); while (Crawlpos() != TrackPeek()) { Uncapture(); } break; case RegexCode.Bol: if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n') { break; } advance = 0; continue; case RegexCode.Eol: if (Rightchars() > 0 && CharAt(Textpos()) != '\n') { break; } advance = 0; continue; case RegexCode.Boundary: if (!IsBoundary(Textpos(), runtextbeg, runtextend)) { break; } advance = 0; continue; case RegexCode.Nonboundary: if (IsBoundary(Textpos(), runtextbeg, runtextend)) { break; } advance = 0; continue; case RegexCode.ECMABoundary: if (!IsECMABoundary(Textpos(), runtextbeg, runtextend)) { break; } advance = 0; continue; case RegexCode.NonECMABoundary: if (IsECMABoundary(Textpos(), runtextbeg, runtextend)) { break; } advance = 0; continue; case RegexCode.Beginning: if (Leftchars() > 0) { break; } advance = 0; continue; case RegexCode.Start: if (Textpos() != Textstart()) { break; } advance = 0; continue; case RegexCode.EndZ: if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n') { break; } advance = 0; continue; case RegexCode.End: if (Rightchars() > 0) { break; } advance = 0; continue; case RegexCode.One: if (Forwardchars() < 1 || Forwardcharnext() != (char)Operand(0)) { break; } advance = 1; continue; case RegexCode.Notone: if (Forwardchars() < 1 || Forwardcharnext() == (char)Operand(0)) { break; } advance = 1; continue; case RegexCode.Set: if (Forwardchars() < 1 || !RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[Operand(0)])) { break; } advance = 1; continue; case RegexCode.Multi: { if (!Stringmatch(_code.Strings[Operand(0)])) { break; } advance = 1; continue; } case RegexCode.Ref: { int capnum = Operand(0); if (IsMatched(capnum)) { if (!Refmatch(MatchIndex(capnum), MatchLength(capnum))) { break; } } else { if ((runregex.roptions & RegexOptions.ECMAScript) == 0) { break; } } advance = 1; continue; } case RegexCode.Onerep: { int c = Operand(1); if (Forwardchars() < c) { break; } char ch = (char)Operand(0); while (c-- > 0) { if (Forwardcharnext() != ch) { goto BreakBackward; } } advance = 2; continue; } case RegexCode.Notonerep: { int c = Operand(1); if (Forwardchars() < c) { break; } char ch = (char)Operand(0); while (c-- > 0) { if (Forwardcharnext() == ch) { goto BreakBackward; } } advance = 2; continue; } case RegexCode.Setrep: { int c = Operand(1); if (Forwardchars() < c) { break; } string set = _code.Strings[Operand(0)]; while (c-- > 0) { if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) { goto BreakBackward; } } advance = 2; continue; } case RegexCode.Oneloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } char ch = (char)Operand(0); int i; for (i = c; i > 0; i--) { if (Forwardcharnext() != ch) { Backwardnext(); break; } } if (c > i) { TrackPush(c - i - 1, Textpos() - Bump()); } advance = 2; continue; } case RegexCode.Notoneloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } char ch = (char)Operand(0); int i; for (i = c; i > 0; i--) { if (Forwardcharnext() == ch) { Backwardnext(); break; } } if (c > i) { TrackPush(c - i - 1, Textpos() - Bump()); } advance = 2; continue; } case RegexCode.Setloop: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } string set = _code.Strings[Operand(0)]; int i; for (i = c; i > 0; i--) { if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) { Backwardnext(); break; } } if (c > i) { TrackPush(c - i - 1, Textpos() - Bump()); } advance = 2; continue; } case RegexCode.Oneloop | RegexCode.Back: case RegexCode.Notoneloop | RegexCode.Back: { TrackPop(2); int i = TrackPeek(); int pos = TrackPeek(1); Textto(pos); if (i > 0) { TrackPush(i - 1, pos - Bump()); } advance = 2; continue; } case RegexCode.Setloop | RegexCode.Back: { TrackPop(2); int i = TrackPeek(); int pos = TrackPeek(1); Textto(pos); if (i > 0) { TrackPush(i - 1, pos - Bump()); } advance = 2; continue; } case RegexCode.Onelazy: case RegexCode.Notonelazy: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } if (c > 0) { TrackPush(c - 1, Textpos()); } advance = 2; continue; } case RegexCode.Setlazy: { int c = Operand(1); if (c > Forwardchars()) { c = Forwardchars(); } if (c > 0) { TrackPush(c - 1, Textpos()); } advance = 2; continue; } case RegexCode.Onelazy | RegexCode.Back: { TrackPop(2); int pos = TrackPeek(1); Textto(pos); if (Forwardcharnext() != (char)Operand(0)) { break; } int i = TrackPeek(); if (i > 0) { TrackPush(i - 1, pos + Bump()); } advance = 2; continue; } case RegexCode.Notonelazy | RegexCode.Back: { TrackPop(2); int pos = TrackPeek(1); Textto(pos); if (Forwardcharnext() == (char)Operand(0)) { break; } int i = TrackPeek(); if (i > 0) { TrackPush(i - 1, pos + Bump()); } advance = 2; continue; } case RegexCode.Setlazy | RegexCode.Back: { TrackPop(2); int pos = TrackPeek(1); Textto(pos); if (!RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[Operand(0)])) { break; } int i = TrackPeek(); if (i > 0) { TrackPush(i - 1, pos + Bump()); } advance = 2; continue; } default: throw NotImplemented.ByDesignWithMessage(SR.UnimplementedState); } BreakBackward: ; // "break Backward" comes here: Backtrack(); } }
/* * This is a related computation: it takes a RegexTree and computes the * leading []* construct if it see one. It's quite trivial and gives up easily. */ internal static RegexPrefix ScanChars(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; String foundSet = null; bool caseInsensitive = false; curNode = tree._root; for (;;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: #if ECMA case RegexNode.ECMABoundary: #endif case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; case RegexNode.Oneloop: case RegexNode.Onelazy: if (curNode._n != infinite) { return(null); } foundSet = RegexCharClass.SetFromChar(curNode._ch); caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase)); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: if (curNode._n != infinite) { return(null); } foundSet = RegexCharClass.SetInverseFromChar(curNode._ch); caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase)); break; case RegexNode.Setloop: case RegexNode.Setlazy: if (curNode._n != infinite || (curNode._str2 != null && curNode._str2.Length != 0)) { return(null); } foundSet = curNode._str; caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase)); break; default: return(null); } if (foundSet != null) { return(new RegexPrefix(foundSet, caseInsensitive)); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(null); } curNode = concatNode.Child(nextChild++); } }
internal RegexFC(string charClass, bool nullable, bool caseInsensitive) { this._cc = RegexCharClass.Parse(charClass); this._nullable = nullable; this._caseInsensitive = caseInsensitive; }
internal RegexNode ReduceAlternation() { if (this._children == null) { return new RegexNode(0x16, this._options); } bool flag = false; bool flag2 = false; RegexOptions none = RegexOptions.None; int num = 0; int index = 0; while (num < this._children.Count) { RegexCharClass class2; RegexNode node = this._children[num]; if (index < num) { this._children[index] = node; } if (node._type == 0x18) { for (int i = 0; i < node._children.Count; i++) { node._children[i]._next = this; } this._children.InsertRange(num + 1, node._children); index--; goto Label_01C2; } if ((node._type != 11) && (node._type != 9)) { goto Label_01AB; } RegexOptions options2 = node._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); if (node._type == 11) { if ((flag && (none == options2)) && (!flag2 && RegexCharClass.IsMergeable(node._str))) { goto Label_011B; } flag = true; flag2 = !RegexCharClass.IsMergeable(node._str); none = options2; goto Label_01C2; } if ((!flag || (none != options2)) || flag2) { flag = true; flag2 = false; none = options2; goto Label_01C2; } Label_011B: index--; RegexNode node2 = this._children[index]; if (node2._type == 9) { class2 = new RegexCharClass(); class2.AddChar(node2._ch); } else { class2 = RegexCharClass.Parse(node2._str); } if (node._type == 9) { class2.AddChar(node._ch); } else { RegexCharClass cc = RegexCharClass.Parse(node._str); class2.AddCharClass(cc); } node2._type = 11; node2._str = class2.ToStringClass(); goto Label_01C2; Label_01AB: if (node._type == 0x16) { index--; } else { flag = false; flag2 = false; } Label_01C2: num++; index++; } if (index < num) { this._children.RemoveRange(index, num - index); } return this.StripEnation(0x16); }
protected override void Go() { this.Goto(0); Label_0007: switch (this.Operator()) { case 0: { int num12 = this.Operand(1); if (this.Forwardchars() < num12) { goto Label_0EA3; } char ch = (char)this.Operand(0); while (num12-- > 0) { if (this.Forwardcharnext() != ch) { goto Label_0EA3; } } this.Advance(2); goto Label_0007; } case 1: { int num13 = this.Operand(1); if (this.Forwardchars() < num13) { goto Label_0EA3; } char ch2 = (char)this.Operand(0); while (num13-- > 0) { if (this.Forwardcharnext() == ch2) { goto Label_0EA3; } } this.Advance(2); goto Label_0007; } case 2: { int num14 = this.Operand(2); if (this.Forwardchars() < num14) { goto Label_0EA3; } string set = this.runstrings[this.Operand(0)]; string category = this.runstrings[this.Operand(1)]; while (num14-- > 0) { if (!RegexCharClass.CharInSet(this.Forwardcharnext(), set, category)) { goto Label_0EA3; } } this.Advance(3); goto Label_0007; } case 3: { int num15 = this.Operand(1); if (num15 > this.Forwardchars()) { num15 = this.Forwardchars(); } char ch3 = (char)this.Operand(0); int num16 = num15; while (num16 > 0) { if (this.Forwardcharnext() != ch3) { this.Backwardnext(); break; } num16--; } if (num15 > num16) { this.Track((num15 - num16) - 1, this.Textpos() - this.Bump()); } this.Advance(2); goto Label_0007; } case 4: { int num17 = this.Operand(1); if (num17 > this.Forwardchars()) { num17 = this.Forwardchars(); } char ch4 = (char)this.Operand(0); int num18 = num17; while (num18 > 0) { if (this.Forwardcharnext() == ch4) { this.Backwardnext(); break; } num18--; } if (num17 > num18) { this.Track((num17 - num18) - 1, this.Textpos() - this.Bump()); } this.Advance(2); goto Label_0007; } case 5: { int num19 = this.Operand(2); if (num19 > this.Forwardchars()) { num19 = this.Forwardchars(); } string str3 = this.runstrings[this.Operand(0)]; string str4 = this.runstrings[this.Operand(1)]; int num20 = num19; while (num20 > 0) { if (!RegexCharClass.CharInSet(this.Forwardcharnext(), str3, str4)) { this.Backwardnext(); break; } num20--; } if (num19 > num20) { this.Track((num19 - num20) - 1, this.Textpos() - this.Bump()); } this.Advance(3); goto Label_0007; } case 6: case 7: { int num25 = this.Operand(1); if (num25 > this.Forwardchars()) { num25 = this.Forwardchars(); } if (num25 > 0) { this.Track(num25 - 1, this.Textpos()); } this.Advance(2); goto Label_0007; } case 8: { int num26 = this.Operand(2); if (num26 > this.Forwardchars()) { num26 = this.Forwardchars(); } if (num26 > 0) { this.Track(num26 - 1, this.Textpos()); } this.Advance(3); goto Label_0007; } case 9: if ((this.Forwardchars() < 1) || (this.Forwardcharnext() != ((char)this.Operand(0)))) { goto Label_0EA3; } this.Advance(1); goto Label_0007; case 10: if ((this.Forwardchars() < 1) || (this.Forwardcharnext() == ((char)this.Operand(0)))) { goto Label_0EA3; } this.Advance(1); goto Label_0007; case 11: if ((this.Forwardchars() < 1) || !RegexCharClass.CharInSet(this.Forwardcharnext(), this.runstrings[this.Operand(0)], this.runstrings[this.Operand(1)])) { goto Label_0EA3; } this.Advance(2); goto Label_0007; case 12: if (!this.Stringmatch(this.runstrings[this.Operand(0)])) { goto Label_0EA3; } this.Advance(1); goto Label_0007; case 13: { int cap = this.Operand(0); if (!base.IsMatched(cap)) { if ((base.runregex.roptions & RegexOptions.ECMAScript) == RegexOptions.None) { goto Label_0EA3; } goto Label_0A06; } if (this.Refmatch(base.MatchIndex(cap), base.MatchLength(cap))) { goto Label_0A06; } goto Label_0EA3; } case 14: if ((this.Leftchars() > 0) && (this.CharAt(this.Textpos() - 1) != '\n')) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 15: if ((this.Rightchars() > 0) && (this.CharAt(this.Textpos()) != '\n')) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 0x10: if (!base.IsBoundary(this.Textpos(), base.runtextbeg, base.runtextend)) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 0x11: if (base.IsBoundary(this.Textpos(), base.runtextbeg, base.runtextend)) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 0x12: if (this.Leftchars() > 0) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 0x13: if (this.Textpos() != this.Textstart()) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 20: if ((this.Rightchars() > 1) || ((this.Rightchars() == 1) && (this.CharAt(this.Textpos()) != '\n'))) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 0x15: if (this.Rightchars() > 0) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 0x16: goto Label_0EA3; case 0x17: this.Track(this.Textpos()); this.Advance(1); goto Label_0007; case 0x18: this.Stackframe(1); if ((this.Textpos() - this.Stacked(0)) == 0) { this.Track2(this.Stacked(0)); this.Advance(1); } else { this.Track(this.Stacked(0), this.Textpos()); this.Stack(this.Textpos()); this.Goto(this.Operand(0)); } goto Label_0007; case 0x19: this.Stackframe(1); if ((this.Textpos() - this.Stacked(0)) == 0) { this.Track2(this.Stacked(0)); break; } this.Track(this.Stacked(0), this.Textpos()); break; case 0x1a: this.Stack(-1, this.Operand(0)); this.Track(); this.Advance(1); goto Label_0007; case 0x1b: this.Stack(this.Textpos(), this.Operand(0)); this.Track(); this.Advance(1); goto Label_0007; case 0x1c: { this.Stackframe(2); int num4 = this.Stacked(0); int num5 = this.Stacked(1); int num6 = this.Textpos() - num4; if ((num5 < this.Operand(1)) && ((num6 != 0) || (num5 < 0))) { this.Track(num4); this.Stack(this.Textpos(), num5 + 1); this.Goto(this.Operand(0)); } else { this.Track2(num4, num5); this.Advance(2); } goto Label_0007; } case 0x1d: { this.Stackframe(2); int num7 = this.Stacked(0); int num8 = this.Stacked(1); if (num8 >= 0) { this.Track(num7, num8, this.Textpos()); this.Advance(2); } else { this.Track2(num7); this.Stack(this.Textpos(), num8 + 1); this.Goto(this.Operand(0)); } goto Label_0007; } case 30: this.Stack(-1); this.Track(); this.Advance(); goto Label_0007; case 0x1f: this.Stack(this.Textpos()); this.Track(); this.Advance(); goto Label_0007; case 0x20: if ((this.Operand(1) != -1) && !base.IsMatched(this.Operand(1))) { goto Label_0EA3; } this.Stackframe(1); if (this.Operand(1) != -1) { base.TransferCapture(this.Operand(0), this.Operand(1), this.Stacked(0), this.Textpos()); } else { base.Capture(this.Operand(0), this.Stacked(0), this.Textpos()); } this.Track(this.Stacked(0)); this.Advance(2); goto Label_0007; case 0x21: this.Stackframe(1); this.Track(this.Stacked(0)); this.Textto(this.Stacked(0)); this.Advance(); goto Label_0007; case 0x22: this.Stack(this.Trackpos(), base.Crawlpos()); this.Track(); this.Advance(); goto Label_0007; case 0x23: this.Stackframe(2); this.Trackto(this.Stacked(0)); while (base.Crawlpos() != this.Stacked(1)) { base.Uncapture(); } goto Label_0EA3; case 0x24: this.Stackframe(2); this.Trackto(this.Stacked(0)); this.Track(this.Stacked(1)); this.Advance(); goto Label_0007; case 0x25: if (!base.IsMatched(this.Operand(0))) { goto Label_0EA3; } this.Advance(1); goto Label_0007; case 0x26: this.Goto(this.Operand(0)); goto Label_0007; case 40: return; case 0x29: if (!base.IsECMABoundary(this.Textpos(), base.runtextbeg, base.runtextend)) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 0x2a: if (base.IsECMABoundary(this.Textpos(), base.runtextbeg, base.runtextend)) { goto Label_0EA3; } this.Advance(); goto Label_0007; case 0x83: case 0x84: { this.Trackframe(2); int num21 = this.Tracked(0); int newpos = this.Tracked(1); this.Textto(newpos); if (num21 > 0) { this.Track(num21 - 1, newpos - this.Bump()); } this.Advance(2); goto Label_0007; } case 0x85: { this.Trackframe(2); int num23 = this.Tracked(0); int num24 = this.Tracked(1); this.Textto(num24); if (num23 > 0) { this.Track(num23 - 1, num24 - this.Bump()); } this.Advance(3); goto Label_0007; } case 0x86: { this.Trackframe(2); int num27 = this.Tracked(1); this.Textto(num27); if (this.Forwardcharnext() != ((char)this.Operand(0))) { goto Label_0EA3; } int num28 = this.Tracked(0); if (num28 > 0) { this.Track(num28 - 1, num27 + this.Bump()); } this.Advance(2); goto Label_0007; } case 0x87: { this.Trackframe(2); int num29 = this.Tracked(1); this.Textto(num29); if (this.Forwardcharnext() == ((char)this.Operand(0))) { goto Label_0EA3; } int num30 = this.Tracked(0); if (num30 > 0) { this.Track(num30 - 1, num29 + this.Bump()); } this.Advance(2); goto Label_0007; } case 0x88: { this.Trackframe(2); int num31 = this.Tracked(1); this.Textto(num31); if (!RegexCharClass.CharInSet(this.Forwardcharnext(), this.runstrings[this.Operand(0)], this.runstrings[this.Operand(1)])) { goto Label_0EA3; } int num32 = this.Tracked(0); if (num32 > 0) { this.Track(num32 - 1, num31 + this.Bump()); } this.Advance(3); goto Label_0007; } case 0x97: this.Trackframe(1); this.Textto(this.Tracked(0)); this.Goto(this.Operand(0)); goto Label_0007; case 0x98: this.Trackframe(2); this.Stackframe(1); this.Textto(this.Tracked(1)); this.Track2(this.Tracked(0)); this.Advance(1); goto Label_0007; case 0x99: { this.Trackframe(2); int num3 = this.Tracked(1); this.Track2(this.Tracked(0)); this.Stack(num3); this.Textto(num3); this.Goto(this.Operand(0)); goto Label_0007; } case 0x9a: this.Stackframe(2); goto Label_0EA3; case 0x9b: this.Stackframe(2); goto Label_0EA3; case 0x9c: this.Trackframe(1); this.Stackframe(2); if (this.Stacked(1) <= 0) { this.Stack(this.Tracked(0), this.Stacked(1) - 1); goto Label_0EA3; } this.Textto(this.Stacked(0)); this.Track2(this.Tracked(0), this.Stacked(1) - 1); this.Advance(2); goto Label_0007; case 0x9d: { this.Trackframe(3); int num9 = this.Tracked(0); int num10 = this.Tracked(2); if ((this.Tracked(1) > this.Operand(1)) || (num10 == num9)) { this.Stack(this.Tracked(0), this.Tracked(1)); goto Label_0EA3; } this.Textto(num10); this.Stack(num10, this.Tracked(1) + 1); this.Track2(num9); this.Goto(this.Operand(0)); goto Label_0007; } case 0x9e: case 0x9f: this.Stackframe(1); goto Label_0EA3; case 160: this.Trackframe(1); this.Stack(this.Tracked(0)); base.Uncapture(); if ((this.Operand(0) != -1) && (this.Operand(1) != -1)) { base.Uncapture(); } goto Label_0EA3; case 0xa1: this.Trackframe(1); this.Stack(this.Tracked(0)); goto Label_0EA3; case 0xa2: this.Stackframe(2); goto Label_0EA3; case 0xa4: this.Trackframe(1); while (base.Crawlpos() != this.Tracked(0)) { base.Uncapture(); } goto Label_0EA3; case 280: this.Trackframe(1); this.Stack(this.Tracked(0)); goto Label_0EA3; case 0x119: this.Stackframe(1); this.Trackframe(1); this.Stack(this.Tracked(0)); goto Label_0EA3; case 0x11c: this.Trackframe(2); this.Stack(this.Tracked(0), this.Tracked(1)); goto Label_0EA3; case 0x11d: this.Trackframe(1); this.Stackframe(2); this.Stack(this.Tracked(0), this.Stacked(1) - 1); goto Label_0EA3; default: throw new Exception(RegExRes.GetString(3)); } this.Advance(1); goto Label_0007; Label_0A06: this.Advance(1); goto Label_0007; Label_0EA3: this.Backtrack(); goto Label_0007; }
internal String OpcodeDescription(int offset) { StringBuilder sb = new StringBuilder(); int opcode = _codes[offset]; sb.AppendFormat("{0:D6} ", offset); sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' '); sb.Append(OperatorDescription(opcode)); sb.Append('('); opcode &= Mask; switch (opcode) { case One: case Notone: case Onerep: case Notonerep: case Oneloop: case Notoneloop: case Onelazy: case Notonelazy: sb.Append("Ch = "); sb.Append(RegexCharClass.CharDescription((char)_codes[offset + 1])); break; case Set: case Setrep: case Setloop: case Setlazy: sb.Append("Set = "); sb.Append(RegexCharClass.SetDescription(_strings[_codes[offset + 1]])); break; case Multi: sb.Append("String = "); sb.Append(_strings[_codes[offset + 1]]); break; case Ref: case Testref: sb.Append("Index = "); sb.Append(_codes[offset + 1]); break; case Capturemark: sb.Append("Index = "); sb.Append(_codes[offset + 1]); if (_codes[offset + 2] != -1) { sb.Append(", Unindex = "); sb.Append(_codes[offset + 2]); } break; case Nullcount: case Setcount: sb.Append("Value = "); sb.Append(_codes[offset + 1]); break; case Goto: case Lazybranch: case Branchmark: case Lazybranchmark: case Branchcount: case Lazybranchcount: sb.Append("Addr = "); sb.Append(_codes[offset + 1]); break; } switch (opcode) { case Onerep: case Notonerep: case Oneloop: case Notoneloop: case Onelazy: case Notonelazy: case Setrep: case Setloop: case Setlazy: sb.Append(", Rep = "); if (_codes[offset + 2] == Int32.MaxValue) { sb.Append("inf"); } else { sb.Append(_codes[offset + 2]); } break; case Branchcount: case Lazybranchcount: sb.Append(", Limit = "); if (_codes[offset + 2] == Int32.MaxValue) { sb.Append("inf"); } else { sb.Append(_codes[offset + 2]); } break; } sb.Append(')'); return(sb.ToString()); }
protected override bool FindFirstChar() { int num; if ((this.runanchors & 0x35) != 0) { if (!this.runcode._rightToLeft) { if ((((this.runanchors & 1) != 0) && (base.runtextpos > base.runtextbeg)) || (((this.runanchors & 4) != 0) && (base.runtextpos > base.runtextstart))) { base.runtextpos = base.runtextend; return(false); } if (((this.runanchors & 0x10) != 0) && (base.runtextpos < (base.runtextend - 1))) { base.runtextpos = base.runtextend - 1; } else if (((this.runanchors & 0x20) != 0) && (base.runtextpos < base.runtextend)) { base.runtextpos = base.runtextend; } } else { if (((((this.runanchors & 0x20) != 0) && (base.runtextpos < base.runtextend)) || (((this.runanchors & 0x10) != 0) && ((base.runtextpos < (base.runtextend - 1)) || ((base.runtextpos == (base.runtextend - 1)) && (this.CharAt(base.runtextpos) != '\n'))))) || (((this.runanchors & 4) != 0) && (base.runtextpos < base.runtextstart))) { base.runtextpos = base.runtextbeg; return(false); } if (((this.runanchors & 1) != 0) && (base.runtextpos > base.runtextbeg)) { base.runtextpos = base.runtextbeg; } } if (this.runbmPrefix != null) { return(this.runbmPrefix.IsMatch(base.runtext, base.runtextpos, base.runtextbeg, base.runtextend)); } } else if (this.runbmPrefix != null) { base.runtextpos = this.runbmPrefix.Scan(base.runtext, base.runtextpos, base.runtextbeg, base.runtextend); if (base.runtextpos == -1) { base.runtextpos = this.runcode._rightToLeft ? base.runtextbeg : base.runtextend; return(false); } return(true); } if (this.runfcPrefix == null) { return(true); } this.runrtl = this.runcode._rightToLeft; this.runci = this.runfcPrefix.CaseInsensitive; string prefix = this.runfcPrefix.Prefix; if (RegexCharClass.IsSingleton(prefix)) { char ch = RegexCharClass.SingletonChar(prefix); for (num = this.Forwardchars(); num > 0; num--) { if (ch == this.Forwardcharnext()) { this.Backwardnext(); return(true); } } } else { for (num = this.Forwardchars(); num > 0; num--) { if (RegexCharClass.CharInSet(this.Forwardcharnext(), prefix, string.Empty)) { this.Backwardnext(); return(true); } } } return(false); }
internal String Description() { StringBuilder ArgSb = new StringBuilder(); ArgSb.Append(TypeStr[_type]); if ((_options & RegexOptions.ExplicitCapture) != 0) { ArgSb.Append("-C"); } if ((_options & RegexOptions.IgnoreCase) != 0) { ArgSb.Append("-I"); } if ((_options & RegexOptions.RightToLeft) != 0) { ArgSb.Append("-L"); } if ((_options & RegexOptions.Multiline) != 0) { ArgSb.Append("-M"); } if ((_options & RegexOptions.Singleline) != 0) { ArgSb.Append("-S"); } if ((_options & RegexOptions.IgnorePatternWhitespace) != 0) { ArgSb.Append("-X"); } if ((_options & RegexOptions.ECMAScript) != 0) { ArgSb.Append("-E"); } switch (_type) { case Oneloop: case Notoneloop: case Onelazy: case Notonelazy: case One: case Notone: ArgSb.Append("(Ch = " + RegexCharClass.CharDescription(_ch) + ")"); break; case Capture: ArgSb.Append("(index = " + _m.ToString(CultureInfo.InvariantCulture) + ", unindex = " + _n.ToString(CultureInfo.InvariantCulture) + ")"); break; case Ref: case Testref: ArgSb.Append("(index = " + _m.ToString(CultureInfo.InvariantCulture) + ")"); break; case Multi: ArgSb.Append("(String = " + _str + ")"); break; case Set: case Setloop: case Setlazy: ArgSb.Append("(Set = " + RegexCharClass.SetDescription(_str) + ")"); break; } switch (_type) { case Oneloop: case Notoneloop: case Onelazy: case Notonelazy: case Setloop: case Setlazy: case Loop: case Lazyloop: ArgSb.Append("(Min = " + _m.ToString(CultureInfo.InvariantCulture) + ", Max = " + (_n == Int32.MaxValue ? "inf" : Convert.ToString(_n, CultureInfo.InvariantCulture)) + ")"); break; } return(ArgSb.ToString()); }
internal RegexFC(bool nullable) { this._cc = new RegexCharClass(); this._nullable = nullable; }
internal static string OpcodeDescription(int offset, int[] codes, string[] strings) { var sb = new StringBuilder(); int opcode = codes[offset]; sb.Append($"{offset:D6} "); sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' '); sb.Append(OperatorDescription(opcode)); opcode &= Mask; switch (opcode) { case One: case Notone: case Onerep: case Notonerep: case Oneloop: case Oneloopatomic: case Notoneloop: case Notoneloopatomic: case Onelazy: case Notonelazy: sb.Append(Indent()).Append('\'').Append(RegexCharClass.CharDescription((char)codes[offset + 1])).Append('\''); break; case Set: case Setrep: case Setloop: case Setloopatomic: case Setlazy: sb.Append(Indent()).Append(RegexCharClass.SetDescription(strings[codes[offset + 1]])); break; case Multi: sb.Append(Indent()).Append('"').Append(strings[codes[offset + 1]]).Append('"'); break; case Ref: case Testref: sb.Append(Indent()).Append("index = ").Append(codes[offset + 1]); break; case Capturemark: sb.Append(Indent()).Append("index = ").Append(codes[offset + 1]); if (codes[offset + 2] != -1) { sb.Append(", unindex = ").Append(codes[offset + 2]); } break; case Nullcount: case Setcount: sb.Append(Indent()).Append("value = ").Append(codes[offset + 1]); break; case Goto: case Lazybranch: case Branchmark: case Lazybranchmark: case Branchcount: case Lazybranchcount: sb.Append(Indent()).Append("addr = ").Append(codes[offset + 1]); break; } switch (opcode) { case Onerep: case Notonerep: case Oneloop: case Oneloopatomic: case Notoneloop: case Notoneloopatomic: case Onelazy: case Notonelazy: case Setrep: case Setloop: case Setloopatomic: case Setlazy: sb.Append(", rep = "); if (codes[offset + 2] == int.MaxValue) { sb.Append("inf"); } else { sb.Append(codes[offset + 2]); } break; case Branchcount: case Lazybranchcount: sb.Append(", limit = "); if (codes[offset + 2] == int.MaxValue) { sb.Append("inf"); } else { sb.Append(codes[offset + 2]); } break; } string Indent() => new string(' ', Math.Max(1, 25 - sb.Length)); return(sb.ToString()); }
/// <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.CharInSet"]/*' /> protected static bool CharInSet(char ch, String set, String category) { return(RegexCharClass.CharInSet(ch, set, category)); }
private RegexCharClass(bool negate, ArrayList ranges, StringBuilder categories, RegexCharClass subtraction) { _rangelist = ranges; _categories = categories; _canonical = true; _negate=negate; _subtractor = subtraction; }
internal string DescribeInstruction(int opcodeOffset) { RegexOpcode opcode = (RegexOpcode)Codes[opcodeOffset]; var sb = new StringBuilder(); sb.Append($"{opcodeOffset:D6} "); sb.Append(OpcodeBacktracks(opcode & RegexOpcode.OperatorMask) ? '~' : ' '); sb.Append(opcode & RegexOpcode.OperatorMask); if ((opcode & RegexOpcode.CaseInsensitive) != 0) { sb.Append("-Ci"); } if ((opcode & RegexOpcode.RightToLeft) != 0) { sb.Append("-Rtl"); } if ((opcode & RegexOpcode.Backtracking) != 0) { sb.Append("-Back"); } if ((opcode & RegexOpcode.BacktrackingSecond) != 0) { sb.Append("-Back2"); } opcode &= RegexOpcode.OperatorMask; switch (opcode) { case RegexOpcode.One: case RegexOpcode.Onerep: case RegexOpcode.Oneloop: case RegexOpcode.Oneloopatomic: case RegexOpcode.Onelazy: case RegexOpcode.Notone: case RegexOpcode.Notonerep: case RegexOpcode.Notoneloop: case RegexOpcode.Notoneloopatomic: case RegexOpcode.Notonelazy: sb.Append(Indent()).Append('\'').Append(RegexCharClass.DescribeChar((char)Codes[opcodeOffset + 1])).Append('\''); break; case RegexOpcode.Set: case RegexOpcode.Setrep: case RegexOpcode.Setloop: case RegexOpcode.Setloopatomic: case RegexOpcode.Setlazy: sb.Append(Indent()).Append(RegexCharClass.DescribeSet(Strings[Codes[opcodeOffset + 1]])); break; case RegexOpcode.Multi: sb.Append(Indent()).Append('"').Append(Strings[Codes[opcodeOffset + 1]]).Append('"'); break; case RegexOpcode.Backreference: case RegexOpcode.TestBackreference: sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]); break; case RegexOpcode.Capturemark: sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]); if (Codes[opcodeOffset + 2] != -1) { sb.Append(", unindex = ").Append(Codes[opcodeOffset + 2]); } break; case RegexOpcode.Nullcount: case RegexOpcode.Setcount: sb.Append(Indent()).Append("value = ").Append(Codes[opcodeOffset + 1]); break; case RegexOpcode.Goto: case RegexOpcode.Lazybranch: case RegexOpcode.Branchmark: case RegexOpcode.Lazybranchmark: case RegexOpcode.Branchcount: case RegexOpcode.Lazybranchcount: sb.Append(Indent()).Append("addr = ").Append(Codes[opcodeOffset + 1]); break; } switch (opcode) { case RegexOpcode.Onerep: case RegexOpcode.Oneloop: case RegexOpcode.Oneloopatomic: case RegexOpcode.Onelazy: case RegexOpcode.Notonerep: case RegexOpcode.Notoneloop: case RegexOpcode.Notoneloopatomic: case RegexOpcode.Notonelazy: case RegexOpcode.Setrep: case RegexOpcode.Setloop: case RegexOpcode.Setloopatomic: case RegexOpcode.Setlazy: sb.Append(", rep = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]); break; case RegexOpcode.Branchcount: case RegexOpcode.Lazybranchcount: sb.Append(", limit = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]); break; } return(sb.ToString());
internal void AddSubtraction(RegexCharClass sub) { Debug.Assert(_subtractor == null, "Can't add two subtractions to a char class. "); _subtractor = sub; }
internal static RegexPrefix ScanChars(RegexTree tree) { RegexNode node2 = null; int num = 0; string prefix = null; bool ci = false; RegexNode node = tree._root; Label_0010: switch (node._type) { case 3: case 6: if (node._n == 0x7fffffff) { prefix = RegexCharClass.SetFromChar(node._ch); ci = RegexOptions.None != (node._options & RegexOptions.IgnoreCase); break; } return(null); case 4: case 7: if (node._n == 0x7fffffff) { prefix = RegexCharClass.SetInverseFromChar(node._ch); ci = RegexOptions.None != (node._options & RegexOptions.IgnoreCase); break; } return(null); case 5: case 8: if ((node._n == 0x7fffffff) && ((node._str2 == null) || (node._str2.Length == 0))) { prefix = node._str; ci = RegexOptions.None != (node._options & RegexOptions.IgnoreCase); break; } return(null); case 14: case 15: case 0x10: case 0x12: case 0x13: case 20: case 0x15: case 0x17: case 30: case 0x1f: case 0x29: break; case 0x19: if (node.ChildCount() > 0) { node2 = node; num = 0; } break; case 0x1c: case 0x20: node = node.Child(0); node2 = null; goto Label_0010; default: return(null); } if (prefix != null) { return(new RegexPrefix(prefix, ci)); } if ((node2 == null) || (num >= node2.ChildCount())) { return(null); } node = node2.Child(num++); goto Label_0010; }
protected bool IsECMABoundary(int index, int startpos, int endpos) { return((index > startpos && RegexCharClass.IsECMAWordChar(_runtext[index - 1])) != (index < endpos && RegexCharClass.IsECMAWordChar(_runtext[index]))); }
/* * ReduceAlternation: * * Basic optimization. Single-letter alternations can be replaced * by faster set specifications, and nested alternations with no * intervening operators can be flattened: * * a|b|c|def|g|h -> [a-c]|def|[gh] * apple|(?:orange|pear)|grape -> apple|orange|pear|grape * * < */ internal RegexNode ReduceAlternation() { // Combine adjacent sets/chars bool wasLastSet; bool lastNodeCannotMerge; RegexOptions optionsLast; RegexOptions optionsAt; int i; int j; RegexNode at; RegexNode prev; if (_children == null) return new RegexNode(RegexNode.Nothing, _options); wasLastSet = false; lastNodeCannotMerge = false; optionsLast = 0; for (i = 0, j = 0; i < _children.Count; i++, j++) { at = _children[i]; if (j < i) _children[j] = at; for (;;) { if (at._type == Alternate) { for (int k = 0; k < at._children.Count; k++) at._children[k]._next = this; _children.InsertRange(i + 1, at._children); j--; } else if (at._type == Set || at._type == One) { // Cannot merge sets if L or I options differ, or if either are negated. optionsAt = at._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); if (at._type == Set) { if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at._str)) { wasLastSet = true; lastNodeCannotMerge = !RegexCharClass.IsMergeable(at._str); optionsLast = optionsAt; break; } } else if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge) { wasLastSet = true; lastNodeCannotMerge = false; optionsLast = optionsAt; break; } // The last node was a Set or a One, we're a Set or One and our options are the same. // Merge the two nodes. j--; prev = _children[j]; RegexCharClass prevCharClass; if (prev._type == RegexNode.One) { prevCharClass = new RegexCharClass(); prevCharClass.AddChar(prev._ch); } else { prevCharClass = RegexCharClass.Parse(prev._str); } if (at._type == RegexNode.One) { prevCharClass.AddChar(at._ch); } else { RegexCharClass atCharClass = RegexCharClass.Parse(at._str); prevCharClass.AddCharClass(atCharClass); } prev._type = RegexNode.Set; prev._str = prevCharClass.ToStringClass(); } else if (at._type == RegexNode.Nothing) { j--; } else { wasLastSet = false; lastNodeCannotMerge = false; } break; } } if (j < i) _children.RemoveRange(j, i - j); return StripEnation(RegexNode.Nothing); }
protected static bool CharInSet(char ch, String set, String category) { string charClass = RegexCharClass.ConvertOldStringsToClass(set, category); return(RegexCharClass.CharInClass(ch, charClass)); }
internal static RegexCharClass CreateFromCategory(string categoryName, bool invert, bool caseInsensitive, string pattern) { RegexCharClass class2 = new RegexCharClass(); class2.AddCategoryFromName(categoryName, invert, caseInsensitive, pattern); return class2; }
protected static bool CharInClass(char ch, String charClass) { return(RegexCharClass.CharInClass(ch, charClass)); }
internal RegexNode ScanBackslash() { char ch; if (this.CharsRight() == 0) { throw this.MakeException(SR.GetString("IllegalEndEscape")); } switch ((ch = this.RightChar())) { case 'S': this.MoveRight(); if (this.UseOptionE()) { return new RegexNode(11, this._options, "\x0001\x0004\0\t\x000e !"); } return new RegexNode(11, this._options, RegexCharClass.NotSpaceClass); case 'W': this.MoveRight(); if (this.UseOptionE()) { return new RegexNode(11, this._options, "\x0001\n\00:A[_`a{İı"); } return new RegexNode(11, this._options, RegexCharClass.NotWordClass); case 'Z': case 'A': case 'B': case 'G': case 'b': case 'z': this.MoveRight(); return new RegexNode(this.TypeFromCode(ch), this._options); case 'D': this.MoveRight(); if (!this.UseOptionE()) { return new RegexNode(11, this._options, RegexCharClass.NotDigitClass); } return new RegexNode(11, this._options, "\x0001\x0002\00:"); case 'P': case 'p': { this.MoveRight(); RegexCharClass class2 = new RegexCharClass(); class2.AddCategoryFromName(this.ParseProperty(), ch != 'p', this.UseOptionI(), this._pattern); if (this.UseOptionI()) { class2.AddLowercase(this._culture); } return new RegexNode(11, this._options, class2.ToStringClass()); } case 'd': this.MoveRight(); if (!this.UseOptionE()) { return new RegexNode(11, this._options, RegexCharClass.DigitClass); } return new RegexNode(11, this._options, "\0\x0002\00:"); case 's': this.MoveRight(); if (this.UseOptionE()) { return new RegexNode(11, this._options, "\0\x0004\0\t\x000e !"); } return new RegexNode(11, this._options, RegexCharClass.SpaceClass); case 'w': this.MoveRight(); if (this.UseOptionE()) { return new RegexNode(11, this._options, "\0\n\00:A[_`a{İı"); } return new RegexNode(11, this._options, RegexCharClass.WordClass); } return this.ScanBasicBackslash(); }
protected bool IsBoundary(int index, int startpos, int endpos) { return(((index > startpos) && RegexCharClass.IsWordChar(this.runtext[index - 1])) != ((index < endpos) && RegexCharClass.IsWordChar(this.runtext[index]))); }