internal RegexTree(RegexNode root, Dictionary<int, int> caps, int[] capnumlist, int captop, Dictionary<string, int> capnames, string[] capslist, RegexOptions opts) { _root = root; _caps = caps; _capnumlist = capnumlist; _capnames = capnames; _capslist = capslist; _captop = captop; _options = opts; }
private readonly List<int> _rules; // negative -> group #, positive -> string # /// <summary> /// Since RegexReplacement shares the same parser as Regex, /// the constructor takes a RegexNode which is a concatenation /// of constant strings and backreferences. /// </summary> internal RegexReplacement(string rep, RegexNode concat, Dictionary<int, int> _caps) { if (concat.Type() != RegexNode.Concatenate) throw new ArgumentException(SR.ReplacementError); StringBuilder sb = new StringBuilder(); List<string> strings = new List<string>(); List<int> rules = new List<int>(); for (int i = 0; i < concat.ChildCount(); i++) { RegexNode child = concat.Child(i); switch (child.Type()) { case RegexNode.Multi: sb.Append(child._str); break; case RegexNode.One: sb.Append(child._ch); break; case RegexNode.Ref: if (sb.Length > 0) { rules.Add(strings.Count); strings.Add(sb.ToString()); sb.Length = 0; } int slot = child._m; if (_caps != null && slot >= 0) slot = (int)_caps[slot]; rules.Add(-Specials - 1 - slot); break; default: throw new ArgumentException(SR.ReplacementError); } } if (sb.Length > 0) { rules.Add(strings.Count); strings.Add(sb.ToString()); } _rep = rep; _strings = strings; _rules = rules; }
internal void AddChild(RegexNode newChild) { RegexNode reducedChild; if (_children == null) { _children = new List <RegexNode>(4); } reducedChild = newChild.Reduce(); _children.Add(reducedChild); reducedChild._next = this; }
/* * Finish the current quantifiable (when a quantifier is not found or is not possible) */ internal void AddConcatenate() { // The first (| inside a Testgroup group goes directly to the group _concatenation.AddChild(_unit); _unit = null; }
/* * Finish the current concatenation (in response to a |) */ internal void AddAlternate() { // The | parts inside a Testgroup group go directly to the group if (_group.Type() == RegexNode.Testgroup || _group.Type() == RegexNode.Testref) { _group.AddChild(_concatenation.ReverseLeft()); } else { _alternation.AddChild(_concatenation.ReverseLeft()); } _concatenation = new RegexNode(RegexNode.Concatenate, _options); }
/* * Start a new round for the parser state (in response to an open paren or string start) */ internal void StartGroup(RegexNode openGroup) { _group = openGroup; _alternation = new RegexNode(RegexNode.Alternate, _options); _concatenation = new RegexNode(RegexNode.Concatenate, _options); }
/* * FC computation and shortcut cases for each node type */ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) { bool ci = false; bool rtl = false; if (NodeType <= RegexNode.Ref) { if ((node._options & RegexOptions.IgnoreCase) != 0) ci = true; if ((node._options & RegexOptions.RightToLeft) != 0) rtl = true; } switch (NodeType) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Alternate | BeforeChild: case RegexNode.Testref | BeforeChild: case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: break; case RegexNode.Testgroup | BeforeChild: if (CurIndex == 0) SkipChild(); break; case RegexNode.Empty: PushFC(new RegexFC(true)); break; case RegexNode.Concatenate | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, true); } if (!TopFC()._nullable) _skipAllChildren = true; break; case RegexNode.Testgroup | AfterChild: if (CurIndex > 1) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, false); } break; case RegexNode.Alternate | AfterChild: case RegexNode.Testref | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, false); } break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: if (node._m == 0) TopFC()._nullable = true; break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: case RegexNode.Capture | BeforeChild: case RegexNode.Capture | AfterChild: case RegexNode.Greedy | BeforeChild: case RegexNode.Greedy | AfterChild: break; case RegexNode.Require | BeforeChild: case RegexNode.Prevent | BeforeChild: SkipChild(); PushFC(new RegexFC(true)); break; case RegexNode.Require | AfterChild: case RegexNode.Prevent | AfterChild: break; case RegexNode.One: case RegexNode.Notone: PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci)); break; case RegexNode.Oneloop: case RegexNode.Onelazy: PushFC(new RegexFC(node._ch, false, node._m == 0, ci)); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: PushFC(new RegexFC(node._ch, true, node._m == 0, ci)); break; case RegexNode.Multi: if (node._str.Length == 0) PushFC(new RegexFC(true)); else if (!rtl) PushFC(new RegexFC(node._str[0], false, false, ci)); else PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci)); break; case RegexNode.Set: PushFC(new RegexFC(node._str, false, ci)); break; case RegexNode.Setloop: case RegexNode.Setlazy: PushFC(new RegexFC(node._str, node._m == 0, ci)); break; case RegexNode.Ref: PushFC(new RegexFC(RegexCharClass.AnyClass, true, false)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: PushFC(new RegexFC(true)); break; default: throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture))); } }
/* * Resets parsing to the beginning of the pattern. */ internal void Reset(RegexOptions topopts) { _currentPos = 0; _autocap = 1; _ignoreNextParen = false; if (_optionsStack.Count > 0) _optionsStack.RemoveRange(0, _optionsStack.Count - 1); _options = topopts; _stack = null; }
/* * Sets the current unit to an assertion of the specified type */ internal void AddUnitType(int type) { _unit = new RegexNode(type, _options); }
/* * Sets the current unit to a single set node */ internal void AddUnitSet(string cc) { _unit = new RegexNode(RegexNode.Set, _options, cc); }
/// <summary> /// The main RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> private void EmitFragment(int nodetype, RegexNode node, int curIndex) { int bits = 0; if (nodetype <= RegexNode.Ref) { if (node.UseOptionR()) { bits |= RegexCode.Rtl; } if ((node._options & RegexOptions.IgnoreCase) != 0) { bits |= RegexCode.Ci; } } switch (nodetype) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Concatenate | AfterChild: case RegexNode.Empty: break; case RegexNode.Alternate | BeforeChild: if (curIndex < node._children.Count - 1) { PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); } break; case RegexNode.Alternate | AfterChild: { if (curIndex < node._children.Count - 1) { int LBPos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(LBPos, CurPos()); } else { int I; for (I = 0; I < curIndex; I++) { PatchJump(PopInt(), CurPos()); } } break; } case RegexNode.Testref | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); Emit(RegexCode.Testref, MapCapnum(node._m)); Emit(RegexCode.Forejump); break; } break; case RegexNode.Testref | AfterChild: switch (curIndex) { case 0: { int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Forejump); if (node._children.Count > 1) { break; } // else fallthrough goto case 1; } case 1: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Testgroup | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; } break; case RegexNode.Testgroup | AfterChild: switch (curIndex) { case 0: Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); break; case 1: int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); if (node._children.Count > 2) { break; } // else fallthrough goto case 2; case 2: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: if (node._n < int.MaxValue || node._m > 1) { Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m); } else { Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark); } if (node._m == 0) { PushInt(CurPos()); Emit(RegexCode.Goto, 0); } PushInt(CurPos()); break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: { int StartJumpPos = CurPos(); int Lazy = (nodetype - (RegexNode.Loop | AfterChild)); if (node._n < int.MaxValue || node._m > 1) { Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == int.MaxValue ? int.MaxValue : node._n - node._m); } else { Emit(RegexCode.Branchmark + Lazy, PopInt()); } if (node._m == 0) { PatchJump(PopInt(), StartJumpPos); } } break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: break; case RegexNode.Capture | BeforeChild: Emit(RegexCode.Setmark); break; case RegexNode.Capture | AfterChild: Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n)); break; case RegexNode.Require | BeforeChild: // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); break; case RegexNode.Require | AfterChild: Emit(RegexCode.Getmark); // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Forejump); break; case RegexNode.Prevent | BeforeChild: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; case RegexNode.Prevent | AfterChild: Emit(RegexCode.Backjump); PatchJump(PopInt(), CurPos()); Emit(RegexCode.Forejump); break; case RegexNode.Greedy | BeforeChild: Emit(RegexCode.Setjump); break; case RegexNode.Greedy | AfterChild: Emit(RegexCode.Forejump); break; case RegexNode.One: case RegexNode.Notone: Emit(node._type | bits, node._ch); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: case RegexNode.Oneloop: case RegexNode.Onelazy: if (node._m > 0) { Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ? RegexCode.Onerep : RegexCode.Notonerep) | bits, node._ch, node._m); } if (node._n > node._m) { Emit(node._type | bits, node._ch, node._n == int.MaxValue ? int.MaxValue : node._n - node._m); } break; case RegexNode.Setloop: case RegexNode.Setlazy: if (node._m > 0) { Emit(RegexCode.Setrep | bits, StringCode(node._str), node._m); } if (node._n > node._m) { Emit(node._type | bits, StringCode(node._str), (node._n == int.MaxValue) ? int.MaxValue : node._n - node._m); } break; case RegexNode.Multi: Emit(node._type | bits, StringCode(node._str)); break; case RegexNode.Set: Emit(node._type | bits, StringCode(node._str)); break; case RegexNode.Ref: Emit(node._type | bits, MapCapnum(node._m)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: Emit(node._type); break; default: throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture))); } }
/* * This is a related computation: it takes a RegexTree and computes the * leading substring if it see one. It's quite trivial and gives up easily. */ internal static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; curNode = tree._root; for (; ;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Onelazy: if (curNode._m > 0) { string pref = string.Empty.PadRight(curNode._m, curNode._ch); return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase))); } else { return(RegexPrefix.Empty); } case RegexNode.One: return(new RegexPrefix(curNode._ch.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Multi: return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(RegexPrefix.Empty); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(RegexPrefix.Empty); } curNode = concatNode.Child(nextChild++); } }
/* * FC computation and shortcut cases for each node type */ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) { bool ci = false; bool rtl = false; if (NodeType <= RegexNode.Ref) { if ((node._options & RegexOptions.IgnoreCase) != 0) { ci = true; } if ((node._options & RegexOptions.RightToLeft) != 0) { rtl = true; } } switch (NodeType) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Alternate | BeforeChild: case RegexNode.Testref | BeforeChild: case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: break; case RegexNode.Testgroup | BeforeChild: if (CurIndex == 0) { SkipChild(); } break; case RegexNode.Empty: PushFC(new RegexFC(true)); break; case RegexNode.Concatenate | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, true); } if (!TopFC()._nullable) { _skipAllChildren = true; } break; case RegexNode.Testgroup | AfterChild: if (CurIndex > 1) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, false); } break; case RegexNode.Alternate | AfterChild: case RegexNode.Testref | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, false); } break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: if (node._m == 0) { TopFC()._nullable = true; } break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: case RegexNode.Capture | BeforeChild: case RegexNode.Capture | AfterChild: case RegexNode.Greedy | BeforeChild: case RegexNode.Greedy | AfterChild: break; case RegexNode.Require | BeforeChild: case RegexNode.Prevent | BeforeChild: SkipChild(); PushFC(new RegexFC(true)); break; case RegexNode.Require | AfterChild: case RegexNode.Prevent | AfterChild: break; case RegexNode.One: case RegexNode.Notone: PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci)); break; case RegexNode.Oneloop: case RegexNode.Onelazy: PushFC(new RegexFC(node._ch, false, node._m == 0, ci)); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: PushFC(new RegexFC(node._ch, true, node._m == 0, ci)); break; case RegexNode.Multi: if (node._str.Length == 0) { PushFC(new RegexFC(true)); } else if (!rtl) { PushFC(new RegexFC(node._str[0], false, false, ci)); } else { PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci)); } break; case RegexNode.Set: PushFC(new RegexFC(node._str, false, ci)); break; case RegexNode.Setloop: case RegexNode.Setlazy: PushFC(new RegexFC(node._str, node._m == 0, ci)); break; case RegexNode.Ref: PushFC(new RegexFC(RegexCharClass.AnyClass, true, false)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.ResetMatchStart: PushFC(new RegexFC(true)); break; default: throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture))); } }
internal void AddChild(RegexNode newChild) { RegexNode reducedChild; if (_children == null) _children = new List<RegexNode>(4); reducedChild = newChild.Reduce(); _children.Add(reducedChild); reducedChild._next = this; }
internal RegexNode MakeQuantifier(bool lazy, int min, int max) { RegexNode result; if (min == 0 && max == 0) return new RegexNode(Empty, _options); if (min == 1 && max == 1) return this; switch (_type) { case One: case Notone: case Set: MakeRep(lazy ? Onelazy : Oneloop, min, max); return this; default: result = new RegexNode(lazy ? Lazyloop : Loop, _options, min, max); result.AddChild(this); return result; } }
/* * Finish the current quantifiable (when a quantifier is found) */ internal void AddConcatenate(bool lazy, int min, int max) { _concatenation.AddChild(_unit.MakeQuantifier(lazy, min, max)); _unit = null; }
/* * Sets the current unit to a single inverse-char node */ internal void AddUnitNotone(char ch) { if (UseOptionI()) ch = _culture.TextInfo.ToLower(ch); _unit = new RegexNode(RegexNode.Notone, _options, ch); }
/* * Add a string to the last concatenate. */ internal void AddConcatenate(int pos, int cch, bool isReplacement) { RegexNode node; if (cch == 0) return; if (cch > 1) { string str = _pattern.Substring(pos, cch); if (UseOptionI() && !isReplacement) { // We do the ToLower character by character for consistency. With surrogate chars, doing // a ToLower on the entire string could actually change the surrogate pair. This is more correct // linguistically, but since Regex doesn't support surrogates, it's more important to be // consistent. var sb = new StringBuilder(str.Length); for (int i = 0; i < str.Length; i++) sb.Append(_culture.TextInfo.ToLower(str[i])); str = sb.ToString(); } node = new RegexNode(RegexNode.Multi, _options, str); } else { char ch = _pattern[pos]; if (UseOptionI() && !isReplacement) ch = _culture.TextInfo.ToLower(ch); node = new RegexNode(RegexNode.One, _options, ch); } _concatenation.AddChild(node); }
/* * Sets the current unit to a subtree */ internal void AddUnitNode(RegexNode node) { _unit = node; }
/* * Push the parser state (in response to an open paren) */ internal void PushGroup() { _group._next = _stack; _alternation._next = _group; _concatenation._next = _alternation; _stack = _concatenation; }
/* * Finish the current group (in response to a ')' or end) */ internal void AddGroup() { if (_group.Type() == RegexNode.Testgroup || _group.Type() == RegexNode.Testref) { _group.AddChild(_concatenation.ReverseLeft()); if (_group.Type() == RegexNode.Testref && _group.ChildCount() > 2 || _group.ChildCount() > 3) throw MakeException(SR.TooManyAlternates); } else { _alternation.AddChild(_concatenation.ReverseLeft()); _group.AddChild(_alternation); } _unit = _group; }
/* * Remember the pushed state (in response to a ')') */ internal void PopGroup() { _concatenation = _stack; _alternation = _concatenation._next; _group = _alternation._next; _stack = _group._next; // The first () inside a Testgroup group goes directly to the group if (_group.Type() == RegexNode.Testgroup && _group.ChildCount() == 0) { if (_unit == null) throw MakeException(SR.IllegalCondition); _group.AddChild(_unit); _unit = null; } }
/* * Simple parsing for replacement patterns */ internal RegexNode ScanReplacement() { int c; int startpos; _concatenation = new RegexNode(RegexNode.Concatenate, _options); for (;;) { c = CharsRight(); if (c == 0) break; startpos = Textpos(); while (c > 0 && RightChar() != '$') { MoveRight(); c--; } AddConcatenate(startpos, Textpos() - startpos, true); if (c > 0) { if (MoveRightGetChar() == '$') AddUnitNode(ScanDollar()); AddConcatenate(); } } return _concatenation; }
/// <summary> /// The main RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> private void EmitFragment(int nodetype, RegexNode node, int curIndex) { int bits = 0; if (nodetype <= RegexNode.Ref) { if (node.UseOptionR()) bits |= RegexCode.Rtl; if ((node._options & RegexOptions.IgnoreCase) != 0) bits |= RegexCode.Ci; } switch (nodetype) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Concatenate | AfterChild: case RegexNode.Empty: break; case RegexNode.Alternate | BeforeChild: if (curIndex < node._children.Count - 1) { PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); } break; case RegexNode.Alternate | AfterChild: { if (curIndex < node._children.Count - 1) { int LBPos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(LBPos, CurPos()); } else { int I; for (I = 0; I < curIndex; I++) { PatchJump(PopInt(), CurPos()); } } break; } case RegexNode.Testref | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); Emit(RegexCode.Testref, MapCapnum(node._m)); Emit(RegexCode.Forejump); break; } break; case RegexNode.Testref | AfterChild: switch (curIndex) { case 0: { int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Forejump); if (node._children.Count > 1) break; // else fallthrough goto case 1; } case 1: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Testgroup | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; } break; case RegexNode.Testgroup | AfterChild: switch (curIndex) { case 0: Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); break; case 1: int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); if (node._children.Count > 2) break; // else fallthrough goto case 2; case 2: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: if (node._n < int.MaxValue || node._m > 1) Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m); else Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark); if (node._m == 0) { PushInt(CurPos()); Emit(RegexCode.Goto, 0); } PushInt(CurPos()); break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: { int StartJumpPos = CurPos(); int Lazy = (nodetype - (RegexNode.Loop | AfterChild)); if (node._n < int.MaxValue || node._m > 1) Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == int.MaxValue ? int.MaxValue : node._n - node._m); else Emit(RegexCode.Branchmark + Lazy, PopInt()); if (node._m == 0) PatchJump(PopInt(), StartJumpPos); } break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: break; case RegexNode.Capture | BeforeChild: Emit(RegexCode.Setmark); break; case RegexNode.Capture | AfterChild: Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n)); break; case RegexNode.Require | BeforeChild: // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); break; case RegexNode.Require | AfterChild: Emit(RegexCode.Getmark); // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Forejump); break; case RegexNode.Prevent | BeforeChild: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; case RegexNode.Prevent | AfterChild: Emit(RegexCode.Backjump); PatchJump(PopInt(), CurPos()); Emit(RegexCode.Forejump); break; case RegexNode.Greedy | BeforeChild: Emit(RegexCode.Setjump); break; case RegexNode.Greedy | AfterChild: Emit(RegexCode.Forejump); break; case RegexNode.One: case RegexNode.Notone: Emit(node._type | bits, node._ch); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: case RegexNode.Oneloop: case RegexNode.Onelazy: if (node._m > 0) Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ? RegexCode.Onerep : RegexCode.Notonerep) | bits, node._ch, node._m); if (node._n > node._m) Emit(node._type | bits, node._ch, node._n == int.MaxValue ? int.MaxValue : node._n - node._m); break; case RegexNode.Setloop: case RegexNode.Setlazy: if (node._m > 0) Emit(RegexCode.Setrep | bits, StringCode(node._str), node._m); if (node._n > node._m) Emit(node._type | bits, StringCode(node._str), (node._n == int.MaxValue) ? int.MaxValue : node._n - node._m); break; case RegexNode.Multi: Emit(node._type | bits, StringCode(node._str)); break; case RegexNode.Set: Emit(node._type | bits, StringCode(node._str)); break; case RegexNode.Ref: Emit(node._type | bits, MapCapnum(node._m)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: Emit(node._type); break; default: throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture))); } }