/// <summary> /// The main RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> private void EmitFragment(int nodetype, RegexNode node, int curIndex) { int bits = 0; if (nodetype <= RegexNode.Ref) { if (node.UseOptionR()) { bits |= RegexCode.Rtl; } if ((node.Options & RegexOptions.IgnoreCase) != 0) { bits |= RegexCode.Ci; } } switch (nodetype) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Concatenate | AfterChild: case RegexNode.Empty: break; case RegexNode.Alternate | BeforeChild: if (curIndex < node.Children.Count - 1) { _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); } break; case RegexNode.Alternate | AfterChild: { if (curIndex < node.Children.Count - 1) { int LBPos = _intStack.Pop(); _intStack.Append(_emitted.Length); Emit(RegexCode.Goto, 0); PatchJump(LBPos, _emitted.Length); } else { int I; for (I = 0; I < curIndex; I++) { PatchJump(_intStack.Pop(), _emitted.Length); } } break; } case RegexNode.Testref | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); Emit(RegexCode.Testref, MapCapnum(node.M)); Emit(RegexCode.Forejump); break; } break; case RegexNode.Testref | AfterChild: switch (curIndex) { case 0: { int Branchpos = _intStack.Pop(); _intStack.Append(_emitted.Length); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, _emitted.Length); Emit(RegexCode.Forejump); if (node.Children.Count > 1) { break; } // else fallthrough goto case 1; } case 1: PatchJump(_intStack.Pop(), _emitted.Length); break; } break; case RegexNode.Testgroup | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); break; } break; case RegexNode.Testgroup | AfterChild: switch (curIndex) { case 0: Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); break; case 1: int Branchpos = _intStack.Pop(); _intStack.Append(_emitted.Length); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, _emitted.Length); Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); if (node.Children.Count > 2) { break; } // else fallthrough goto case 2; case 2: PatchJump(_intStack.Pop(), _emitted.Length); break; } break; case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: if (node.N < int.MaxValue || node.M > 1) { Emit(node.M == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node.M == 0 ? 0 : 1 - node.M); } else { Emit(node.M == 0 ? RegexCode.Nullmark : RegexCode.Setmark); } if (node.M == 0) { _intStack.Append(_emitted.Length); Emit(RegexCode.Goto, 0); } _intStack.Append(_emitted.Length); break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: { int StartJumpPos = _emitted.Length; int Lazy = (nodetype - (RegexNode.Loop | AfterChild)); if (node.N < int.MaxValue || node.M > 1) { Emit(RegexCode.Branchcount + Lazy, _intStack.Pop(), node.N == int.MaxValue ? int.MaxValue : node.N - node.M); } else { Emit(RegexCode.Branchmark + Lazy, _intStack.Pop()); } if (node.M == 0) { PatchJump(_intStack.Pop(), StartJumpPos); } } break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: break; case RegexNode.Capture | BeforeChild: { int mappedCapnum = MapCapnum(node.M); if (_capPositions[mappedCapnum] == default) // Note only the first one in the case of a branch reset group { _capPositions[mappedCapnum] = _emitted.Length; // Note that this capture group starts here } Emit(RegexCode.Setmark); break; } case RegexNode.Capture | AfterChild: Emit(RegexCode.Capturemark, MapCapnum(node.M), MapCapnum(node.N)); break; case RegexNode.Require | BeforeChild: // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); break; case RegexNode.Require | AfterChild: Emit(RegexCode.Getmark); // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Forejump); break; case RegexNode.Prevent | BeforeChild: Emit(RegexCode.Setjump); _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); break; case RegexNode.Prevent | AfterChild: Emit(RegexCode.Backjump); PatchJump(_intStack.Pop(), _emitted.Length); Emit(RegexCode.Forejump); break; case RegexNode.Greedy | BeforeChild: Emit(RegexCode.Setjump); break; case RegexNode.Greedy | AfterChild: Emit(RegexCode.Forejump); break; case RegexNode.One: case RegexNode.Notone: Emit(node.NType | bits, node.Ch); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: case RegexNode.Oneloop: case RegexNode.Onelazy: if (node.M > 0) { Emit(((node.NType == RegexNode.Oneloop || node.NType == RegexNode.Onelazy) ? RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M); } if (node.N > node.M) { Emit(node.NType | bits, node.Ch, node.N == int.MaxValue ? int.MaxValue : node.N - node.M); } break; case RegexNode.Setloop: case RegexNode.Setlazy: if (node.M > 0) { Emit(RegexCode.Setrep | bits, StringCode(node.Str), node.M); } if (node.N > node.M) { Emit(node.NType | bits, StringCode(node.Str), (node.N == int.MaxValue) ? int.MaxValue : node.N - node.M); } break; case RegexNode.Multi: Emit(node.NType | bits, StringCode(node.Str)); break; case RegexNode.Set: Emit(node.NType | bits, StringCode(node.Str)); break; case RegexNode.Ref: Emit(node.NType | bits, MapCapnum(node.M)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: Emit(node.NType); break; case RegexNode.ResetMatchStart: _resetMatchStartFound = true; Emit(node.NType); break; case RegexNode.CallSubroutine: Emit(RegexCode.CallSubroutine, MapCapnum(node.M)); break; case RegexNode.BacktrackingVerb: Emit(node.M); break; default: throw new ArgumentException(string.Format(SR.UnexpectedOpcode, nodetype.ToString())); } }
/// <summary> /// The main RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> private void EmitFragment(int nodetype, RegexNode node, int curIndex) { int bits = 0; if (nodetype <= RegexNode.Ref) { if (node.UseOptionR()) { bits |= RegexCode.Rtl; } if ((node._options & RegexOptions.IgnoreCase) != 0) { bits |= RegexCode.Ci; } } switch (nodetype) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Concatenate | AfterChild: case RegexNode.Empty: break; case RegexNode.Alternate | BeforeChild: if (curIndex < node._children.Count - 1) { PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); } break; case RegexNode.Alternate | AfterChild: { if (curIndex < node._children.Count - 1) { int LBPos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(LBPos, CurPos()); } else { int I; for (I = 0; I < curIndex; I++) { PatchJump(PopInt(), CurPos()); } } break; } case RegexNode.Testref | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); Emit(RegexCode.Testref, MapCapnum(node._m)); Emit(RegexCode.Forejump); break; } break; case RegexNode.Testref | AfterChild: switch (curIndex) { case 0: { int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Forejump); if (node._children.Count > 1) { break; } // else fallthrough goto case 1; } case 1: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Testgroup | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; } break; case RegexNode.Testgroup | AfterChild: switch (curIndex) { case 0: Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); break; case 1: int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); if (node._children.Count > 2) { break; } // else fallthrough goto case 2; case 2: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: if (node._n < int.MaxValue || node._m > 1) { Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m); } else { Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark); } if (node._m == 0) { PushInt(CurPos()); Emit(RegexCode.Goto, 0); } PushInt(CurPos()); break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: { int StartJumpPos = CurPos(); int Lazy = (nodetype - (RegexNode.Loop | AfterChild)); if (node._n < int.MaxValue || node._m > 1) { Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == int.MaxValue ? int.MaxValue : node._n - node._m); } else { Emit(RegexCode.Branchmark + Lazy, PopInt()); } if (node._m == 0) { PatchJump(PopInt(), StartJumpPos); } } break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: break; case RegexNode.Capture | BeforeChild: _capPositions[MapCapnum(node._m)] = _curpos; // Note that this capture group starts here Emit(RegexCode.Setmark); break; case RegexNode.Capture | AfterChild: Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n)); break; case RegexNode.Require | BeforeChild: // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); break; case RegexNode.Require | AfterChild: Emit(RegexCode.Getmark); // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Forejump); break; case RegexNode.Prevent | BeforeChild: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; case RegexNode.Prevent | AfterChild: Emit(RegexCode.Backjump); PatchJump(PopInt(), CurPos()); Emit(RegexCode.Forejump); break; case RegexNode.Greedy | BeforeChild: Emit(RegexCode.Setjump); break; case RegexNode.Greedy | AfterChild: Emit(RegexCode.Forejump); break; case RegexNode.One: case RegexNode.Notone: Emit(node._type | bits, node._ch); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: case RegexNode.Oneloop: case RegexNode.Onelazy: if (node._m > 0) { Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ? RegexCode.Onerep : RegexCode.Notonerep) | bits, node._ch, node._m); } if (node._n > node._m) { Emit(node._type | bits, node._ch, node._n == int.MaxValue ? int.MaxValue : node._n - node._m); } break; case RegexNode.Setloop: case RegexNode.Setlazy: if (node._m > 0) { Emit(RegexCode.Setrep | bits, StringCode(node._str), node._m); } if (node._n > node._m) { Emit(node._type | bits, StringCode(node._str), (node._n == int.MaxValue) ? int.MaxValue : node._n - node._m); } break; case RegexNode.Multi: Emit(node._type | bits, StringCode(node._str)); break; case RegexNode.Set: Emit(node._type | bits, StringCode(node._str)); break; case RegexNode.Ref: Emit(node._type | bits, MapCapnum(node._m)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: Emit(node._type); break; case RegexNode.ResetMatchStart: _resetMatchStartFound = true; Emit(node._type); break; case RegexNode.CallSubroutine: Emit(RegexCode.CallSubroutine, MapCapnum(node._m)); break; default: throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture))); } }