internal RegexReplacement(string rep, RegexNode concat, System.Collections.Generic.Dictionary<object,object> _caps) { this._rep = rep; if (concat.Type() != 0x19) { throw new ArgumentException(RegExRes.GetString(0x25)); } StringBuilder builder = new StringBuilder(); ArrayList list = new ArrayList(); ArrayList list2 = new ArrayList(); for (int i = 0; i < concat.ChildCount(); i++) { RegexNode node = concat.Child(i); switch (node.Type()) { case 9: { builder.Append(node._ch); continue; } case 12: { builder.Append(node._str); continue; } case 13: { if (builder.Length > 0) { list2.Add(list.Count); list.Add(builder.ToString()); builder.Length = 0; } int num = node._m; if ((_caps != null) && (num >= 0)) { num = (int) _caps[num]; } list2.Add(-5 - num); continue; } } throw new ArgumentException(RegExRes.GetString(0x25)); } if (builder.Length > 0) { list2.Add(list.Count); list.Add(builder.ToString()); } this._strings = new string[list.Count]; list.CopyTo(0, this._strings, 0, list.Count); this._rules = new int[list2.Count]; for (int j = 0; j < list2.Count; j++) { this._rules[j] = (int) list2[j]; } }
internal RegexTree(RegexNode root, Hashtable caps, Object[] capnumlist, int captop, Hashtable capnames, String[] capslist, RegexOptions opts) { _root = root; _caps = caps; _capnumlist = capnumlist; _capnames = capnames; _capslist = capslist; _captop = captop; _options = opts; }
/* * Since RegexReplacement shares the same parser as Regex, * the constructor takes a RegexNode which is a concatenation * of constant strings and backreferences. */ internal RegexReplacement(String rep, RegexNode concat, Dictionary<Int32, Int32> _caps) { StringBuilder sb; List<String> strings; List<Int32> rules; int slot; _rep = rep; if (concat.Type() != RegexNode.Concatenate) throw new ArgumentException(SR.ReplacementError); sb = new StringBuilder(); strings = new List<String>(); rules = new List<Int32>(); for (int i = 0; i < concat.ChildCount(); i++) { RegexNode child = concat.Child(i); switch (child.Type()) { case RegexNode.Multi: sb.Append(child._str); break; case RegexNode.One: sb.Append(child._ch); break; case RegexNode.Ref: if (sb.Length > 0) { rules.Add(strings.Count); strings.Add(sb.ToString()); sb.Length = 0; } slot = child._m; if (_caps != null && slot >= 0) slot = (int)_caps[slot]; rules.Add(-Specials - 1 - slot); break; default: throw new ArgumentException(SR.ReplacementError); } } if (sb.Length > 0) { rules.Add(strings.Count); strings.Add(sb.ToString()); } _strings = strings; _rules = rules; }
internal RegexTree(RegexNode root, Dictionary<Int32, Int32> caps, Int32[] capnumlist, int captop, Dictionary<String, Int32> capnames, String[] capslist, RegexOptions opts) { _root = root; _caps = caps; _capnumlist = capnumlist; _capnames = capnames; _capslist = capslist; _captop = captop; _options = opts; }
internal RegexTree(RegexNode root, Hashtable caps, int[] capnumlist, int captop, Hashtable capnames, string[] capslist, RegexOptions opts) { this._root = root; this._caps = caps; this._capnumlist = capnumlist; this._capnames = capnames; this._capslist = capslist; this._captop = captop; this._options = opts; }
private readonly List<string> _strings; // table of string constants #endregion Fields #region Constructors /// <summary> /// Since RegexReplacement shares the same parser as Regex, /// the constructor takes a RegexNode which is a concatenation /// of constant strings and backreferences. /// </summary> internal RegexReplacement(string rep, RegexNode concat, Hashtable _caps) { if (concat.Type() != RegexNode.Concatenate) throw new ArgumentException(SR.ReplacementError); StringBuilder sb = StringBuilderCache.Acquire(); List<string> strings = new List<string>(); List<int> rules = new List<int>(); for (int i = 0; i < concat.ChildCount(); i++) { RegexNode child = concat.Child(i); switch (child.Type()) { case RegexNode.Multi: sb.Append(child._str); break; case RegexNode.One: sb.Append(child._ch); break; case RegexNode.Ref: if (sb.Length > 0) { rules.Add(strings.Count); strings.Add(sb.ToString()); sb.Length = 0; } int slot = child._m; if (_caps != null && slot >= 0) slot = (int)_caps[slot]; rules.Add(-Specials - 1 - slot); break; default: throw new ArgumentException(SR.ReplacementError); } } if (sb.Length > 0) { rules.Add(strings.Count); strings.Add(sb.ToString()); } StringBuilderCache.Release(sb); _rep = rep; _strings = strings; _rules = rules; }
internal RegexTree(RegexNode root, System.Collections.Generic.Dictionary<object,object> caps, object[] capnumlist, int captop, System.Collections.Generic.Dictionary<object,object> capnames, string[] capslist, RegexOptions opts) { this._root = root; this._caps = caps; this._capnumlist = capnumlist; this._capnames = capnames; this._capslist = capslist; this._captop = captop; this._options = opts; }
internal void AddAlternate() { if ((this._group.Type() == 0x22) || (this._group.Type() == 0x21)) { this._group.AddChild(this._concatenation.ReverseLeft()); } else { this._alternation.AddChild(this._concatenation.ReverseLeft()); } this._concatenation = new RegexNode(0x19, this._options); }
internal RegexReplacement(string rep, RegexNode concat, Hashtable _caps) { this._rep = rep; if (concat.Type() != 0x19) { throw new ArgumentException(SR.GetString("ReplacementError")); } StringBuilder builder = new StringBuilder(); List<string> list = new List<string>(); List<int> list2 = new List<int>(); for (int i = 0; i < concat.ChildCount(); i++) { RegexNode node = concat.Child(i); switch (node.Type()) { case 9: { builder.Append(node._ch); continue; } case 12: { builder.Append(node._str); continue; } case 13: { if (builder.Length > 0) { list2.Add(list.Count); list.Add(builder.ToString()); builder.Length = 0; } int num = node._m; if ((_caps != null) && (num >= 0)) { num = (int) _caps[num]; } list2.Add(-5 - num); continue; } } throw new ArgumentException(SR.GetString("ReplacementError")); } if (builder.Length > 0) { list2.Add(list.Count); list.Add(builder.ToString()); } this._strings = list; this._rules = list2; }
internal void AddChild(RegexNode newChild) { RegexNode reducedChild; if (_children == null) { _children = new List <RegexNode>(4); } reducedChild = newChild.Reduce(); _children.Add(reducedChild); reducedChild._next = this; }
/// <summary> /// Nested repeaters just get multiplied with each other if they're not /// too lumpy /// </summary> private RegexNode ReduceRep() { RegexNode u = this; RegexNode child; int type = Type(); int min = M; int max = N; for (; ;) { if (u.ChildCount() == 0) { break; } child = u.Child(0); // multiply reps of the same type only if (child.Type() != type) { int childType = child.Type(); if (!(childType >= Oneloop && childType <= Setloop && type == Loop || childType >= Onelazy && childType <= Setlazy && type == Lazyloop)) { break; } } // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})? // [but things like (a {2,})+ are not too lumpy...] if (u.M == 0 && child.M > 1 || child.N < child.M * 2) { break; } u = child; if (u.M > 0) { u.M = min = ((int.MaxValue - 1) / u.M < min) ? int.MaxValue : u.M * min; } if (u.N > 0) { u.N = max = ((int.MaxValue - 1) / u.N < max) ? int.MaxValue : u.N * max; } } return(min == int.MaxValue ? new RegexNode(Nothing, Options) : u); }
private RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode node = tree._root; int curIndex = 0; Label_0009: if (node._children == null) { this.CalculateFC(node._type, node, 0); } else if ((curIndex < node._children.Count) && !this._skipAllChildren) { this.CalculateFC(node._type | 0x40, node, curIndex); if (!this._skipchild) { node = node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; } else { curIndex++; this._skipchild = false; } goto Label_0009; } this._skipAllChildren = false; if (!this.IntIsEmpty()) { curIndex = this.PopInt(); node = node._next; this.CalculateFC(node._type | 0x80, node, curIndex); if (this._failed) { return(null); } curIndex++; goto Label_0009; } if (this.FCIsEmpty()) { return(null); } return(this.PopFC()); }
internal RegexNode ReduceRep() { RegexNode node = this; int num = this.Type(); int num2 = this._m; int num3 = this._n; while (true) { if (node.ChildCount() == 0) { break; } RegexNode node2 = node.Child(0); if (node2.Type() != num) { int num4 = node2.Type(); if ((((num4 < 3) || (num4 > 5)) || (num != 0x1a)) && (((num4 < 6) || (num4 > 8)) || (num != 0x1b))) { break; } } if (((node._m == 0) && (node2._m > 1)) || (node2._n < (node2._m * 2))) { break; } node = node2; if (node._m > 0) { node._m = num2 = ((0x7ffffffe / node._m) < num2) ? 0x7fffffff : (node._m * num2); } if (node._n > 0) { node._n = num3 = ((0x7ffffffe / node._n) < num3) ? 0x7fffffff : (node._n * num3); } } if (num2 != 0x7fffffff) { return(node); } return(new RegexNode(0x16, this._options)); }
internal RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode node = tree._root; int curIndex = 0; Label_0009: if (node._children == null) { this.CalculateFC(node._type, node, 0); } else if ((curIndex < node._children.Count) && !this._earlyexit) { this.CalculateFC(node._type | 0x40, node, curIndex); if (!this._skipchild) { node = (RegexNode)node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; } else { curIndex++; this._skipchild = false; } goto Label_0009; } this._earlyexit = false; if (!this.EmptyInt()) { curIndex = this.PopInt(); node = node._next; this.CalculateFC(node._type | 0x80, node, curIndex); curIndex++; goto Label_0009; } if (this.EmptyFC()) { return(new RegexFC("\0", true, false)); } return(this.PopFC()); }
/// <summary> /// The main RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> private void EmitFragment(int nodetype, RegexNode node, int curIndex) { int bits = 0; if (nodetype <= RegexNode.Ref) { if (node.UseOptionR()) { bits |= RegexCode.Rtl; } if ((node.Options & RegexOptions.IgnoreCase) != 0) { bits |= RegexCode.Ci; } } switch (nodetype) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Concatenate | AfterChild: case RegexNode.Empty: break; case RegexNode.Alternate | BeforeChild: if (curIndex < node.Children !.Count - 1) { _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); } break; case RegexNode.Alternate | AfterChild: { if (curIndex < node.Children !.Count - 1) { int LBPos = _intStack.Pop(); _intStack.Append(_emitted.Length); Emit(RegexCode.Goto, 0); PatchJump(LBPos, _emitted.Length); }
internal RegexTree(RegexNode root, int captureCount, string[]?captureNames, Hashtable?captureNameToNumberMapping, Hashtable?captureNumberSparseMapping, RegexOptions options, CultureInfo?culture) { #if DEBUG // Asserts to both demonstrate and validate the relationships between the various capture data structures. Debug.Assert(captureNumberSparseMapping is null || captureNames is not null); Debug.Assert((captureNames is null) == (captureNameToNumberMapping is null)); Debug.Assert(captureNames is null || captureCount == captureNames.Length); Debug.Assert(captureNumberSparseMapping is null || captureCount == captureNumberSparseMapping.Count); Debug.Assert(captureNameToNumberMapping is null || captureCount == captureNameToNumberMapping.Count); if (captureNames is not null) { Debug.Assert(captureNameToNumberMapping is not null); for (int i = 0; i < captureNames.Length; i++) { string captureName = captureNames[i]; int?captureNumber = captureNameToNumberMapping[captureName] as int?; Debug.Assert(captureNumber is not null); if (captureNumberSparseMapping is not null) { captureNumber = captureNumberSparseMapping[captureNumber] as int?; Debug.Assert(captureNumber is not null); } Debug.Assert(captureNumber == i); } } #endif Root = root; Culture = culture; CaptureNumberSparseMapping = captureNumberSparseMapping; CaptureCount = captureCount; CaptureNameToNumberMapping = captureNameToNumberMapping; CaptureNames = captureNames; Options = options; FindOptimizations = new RegexFindOptimizations(root, options); }
internal RegexNode MakeQuantifier(bool lazy, int min, int max) { if ((min == 0) && (max == 0)) { return(new RegexNode(0x17, this._options)); } if ((min == 1) && (max == 1)) { return(this); } switch (this._type) { case 9: case 10: case 11: this.MakeRep(lazy ? 6 : 3, min, max); return(this); } RegexNode node = new RegexNode(lazy ? 0x1b : 0x1a, this._options, min, max); node.AddChild(this); return(node); }
/* * This is a related computation: it takes a RegexTree and computes the * leading substring if it see one. It's quite trivial and gives up easily. */ internal static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; curNode = tree._root; for (; ;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Onelazy: if (curNode._m > 0) { string pref = String.Empty.PadRight(curNode._m, curNode._ch); return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase))); } else { return(RegexPrefix.Empty); } case RegexNode.One: return(new RegexPrefix(curNode._ch.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Multi: return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(RegexPrefix.Empty); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(RegexPrefix.Empty); } curNode = concatNode.Child(nextChild++); } }
internal void AddGroup() { if ((this._group.Type() == 0x22) || (this._group.Type() == 0x21)) { this._group.AddChild(this._concatenation.ReverseLeft()); if (((this._group.Type() == 0x21) && (this._group.ChildCount() > 2)) || (this._group.ChildCount() > 3)) { throw this.MakeException(SR.GetString("TooManyAlternates")); } } else { this._alternation.AddChild(this._concatenation.ReverseLeft()); this._group.AddChild(this._alternation); } this._unit = this._group; }
public RegexReplacement(string rep, RegexNode concat, Hashtable caps) { }
/* * The main RegexCode generator. It does a depth-first walk * through the tree and calls EmitFragment to emits code before * and after each child of an interior node, and at each leaf. */ internal void EmitFragment(int nodetype, RegexNode node, int CurIndex) { int bits = 0; if (nodetype <= RegexNode.Ref) { if (node.UseOptionR()) { bits |= RegexCode.Rtl; } if ((node._options & RegexOptions.IgnoreCase) != 0) { bits |= RegexCode.Ci; } } switch (nodetype) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Concatenate | AfterChild: case RegexNode.Empty: break; case RegexNode.Alternate | BeforeChild: if (CurIndex < node._children.Count - 1) { PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); } break; case RegexNode.Alternate | AfterChild: { if (CurIndex < node._children.Count - 1) { int LBPos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(LBPos, CurPos()); } else { int I; for (I = 0; I < CurIndex; I++) { PatchJump(PopInt(), CurPos()); } } break; } case RegexNode.Testref | BeforeChild: switch (CurIndex) { case 0: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); Emit(RegexCode.Testref, MapCapnum(node._m)); Emit(RegexCode.Forejump); break; } break; case RegexNode.Testref | AfterChild: switch (CurIndex) { case 0: { int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Forejump); if (node._children.Count > 1) { break; } // else fallthrough goto case 1; } case 1: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Testgroup | BeforeChild: switch (CurIndex) { case 0: Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; } break; case RegexNode.Testgroup | AfterChild: switch (CurIndex) { case 0: Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); break; case 1: int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); if (node._children.Count > 2) { break; } // else fallthrough goto case 2; case 2: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: if (node._n < infinite || node._m > 1) { Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m); } else { Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark); } if (node._m == 0) { PushInt(CurPos()); Emit(RegexCode.Goto, 0); } PushInt(CurPos()); break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: { int StartJumpPos = CurPos(); int Lazy = (nodetype - (RegexNode.Loop | AfterChild)); if (node._n < infinite || node._m > 1) { Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == infinite ? infinite : node._n - node._m); } else { Emit(RegexCode.Branchmark + Lazy, PopInt()); } if (node._m == 0) { PatchJump(PopInt(), StartJumpPos); } } break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: break; case RegexNode.Capture | BeforeChild: Emit(RegexCode.Setmark); break; case RegexNode.Capture | AfterChild: Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n)); break; case RegexNode.Require | BeforeChild: // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); break; case RegexNode.Require | AfterChild: Emit(RegexCode.Getmark); // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Forejump); break; case RegexNode.Prevent | BeforeChild: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; case RegexNode.Prevent | AfterChild: Emit(RegexCode.Backjump); PatchJump(PopInt(), CurPos()); Emit(RegexCode.Forejump); break; case RegexNode.Greedy | BeforeChild: Emit(RegexCode.Setjump); break; case RegexNode.Greedy | AfterChild: Emit(RegexCode.Forejump); break; case RegexNode.One: case RegexNode.Notone: Emit(node._type | bits, (int)node._ch); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: case RegexNode.Oneloop: case RegexNode.Onelazy: if (node._m > 0) { Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ? RegexCode.Onerep : RegexCode.Notonerep) | bits, (int)node._ch, node._m); } if (node._n > node._m) { Emit(node._type | bits, (int)node._ch, node._n == infinite ? infinite : node._n - node._m); } break; case RegexNode.Setloop: case RegexNode.Setlazy: if (node._m > 0) { Emit(RegexCode.Setrep | bits, StringCode(node._str), StringCode(node._str2), node._m); } if (node._n > node._m) { Emit(node._type | bits, StringCode(node._str), StringCode(node._str2), (node._n == infinite) ? infinite : node._n - node._m); } break; case RegexNode.Multi: Emit(node._type | bits, StringCode(node._str)); break; case RegexNode.Set: Emit(node._type | bits, StringCode(node._str), StringCode(node._str2)); break; case RegexNode.Ref: Emit(node._type | bits, MapCapnum(node._m)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: #if ECMA case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: #endif case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: Emit(node._type); break; default: throw MakeException(SR.GetString(SR.UnexpectedOpcode, nodetype.ToString())); } }
/// <summary> /// The main RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> private void EmitFragment(int nodetype, RegexNode node, int curIndex) { int bits = 0; if (nodetype <= RegexNode.Ref) { if (node.UseOptionR()) { bits |= RegexCode.Rtl; } if ((node.Options & RegexOptions.IgnoreCase) != 0) { bits |= RegexCode.Ci; } } switch (nodetype) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Concatenate | AfterChild: case RegexNode.Empty: break; case RegexNode.Alternate | BeforeChild: if (curIndex < node.Children.Count - 1) { _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); } break; case RegexNode.Alternate | AfterChild: { if (curIndex < node.Children.Count - 1) { int LBPos = _intStack.Pop(); _intStack.Append(_emitted.Length); Emit(RegexCode.Goto, 0); PatchJump(LBPos, _emitted.Length); } else { int I; for (I = 0; I < curIndex; I++) { PatchJump(_intStack.Pop(), _emitted.Length); } } break; } case RegexNode.Testref | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); Emit(RegexCode.Testref, MapCapnum(node.M)); Emit(RegexCode.Forejump); break; } break; case RegexNode.Testref | AfterChild: switch (curIndex) { case 0: { int Branchpos = _intStack.Pop(); _intStack.Append(_emitted.Length); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, _emitted.Length); Emit(RegexCode.Forejump); if (node.Children.Count > 1) { break; } // else fallthrough goto case 1; } case 1: PatchJump(_intStack.Pop(), _emitted.Length); break; } break; case RegexNode.Testgroup | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); break; } break; case RegexNode.Testgroup | AfterChild: switch (curIndex) { case 0: Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); break; case 1: int Branchpos = _intStack.Pop(); _intStack.Append(_emitted.Length); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, _emitted.Length); Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); if (node.Children.Count > 2) { break; } // else fallthrough goto case 2; case 2: PatchJump(_intStack.Pop(), _emitted.Length); break; } break; case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: if (node.N < int.MaxValue || node.M > 1) { Emit(node.M == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node.M == 0 ? 0 : 1 - node.M); } else { Emit(node.M == 0 ? RegexCode.Nullmark : RegexCode.Setmark); } if (node.M == 0) { _intStack.Append(_emitted.Length); Emit(RegexCode.Goto, 0); } _intStack.Append(_emitted.Length); break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: { int StartJumpPos = _emitted.Length; int Lazy = (nodetype - (RegexNode.Loop | AfterChild)); if (node.N < int.MaxValue || node.M > 1) { Emit(RegexCode.Branchcount + Lazy, _intStack.Pop(), node.N == int.MaxValue ? int.MaxValue : node.N - node.M); } else { Emit(RegexCode.Branchmark + Lazy, _intStack.Pop()); } if (node.M == 0) { PatchJump(_intStack.Pop(), StartJumpPos); } } break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: break; case RegexNode.Capture | BeforeChild: Emit(RegexCode.Setmark); break; case RegexNode.Capture | AfterChild: Emit(RegexCode.Capturemark, MapCapnum(node.M), MapCapnum(node.N)); break; case RegexNode.Require | BeforeChild: // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); break; case RegexNode.Require | AfterChild: Emit(RegexCode.Getmark); // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Forejump); break; case RegexNode.Prevent | BeforeChild: Emit(RegexCode.Setjump); _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); break; case RegexNode.Prevent | AfterChild: Emit(RegexCode.Backjump); PatchJump(_intStack.Pop(), _emitted.Length); Emit(RegexCode.Forejump); break; case RegexNode.Greedy | BeforeChild: Emit(RegexCode.Setjump); break; case RegexNode.Greedy | AfterChild: Emit(RegexCode.Forejump); break; case RegexNode.One: case RegexNode.Notone: Emit(node.NType | bits, node.Ch); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: case RegexNode.Oneloop: case RegexNode.Onelazy: if (node.M > 0) { Emit(((node.NType == RegexNode.Oneloop || node.NType == RegexNode.Onelazy) ? RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M); } if (node.N > node.M) { Emit(node.NType | bits, node.Ch, node.N == int.MaxValue ? int.MaxValue : node.N - node.M); } break; case RegexNode.Setloop: case RegexNode.Setlazy: if (node.M > 0) { Emit(RegexCode.Setrep | bits, StringCode(node.Str), node.M); } if (node.N > node.M) { Emit(node.NType | bits, StringCode(node.Str), (node.N == int.MaxValue) ? int.MaxValue : node.N - node.M); } break; case RegexNode.Multi: Emit(node.NType | bits, StringCode(node.Str)); break; case RegexNode.Set: Emit(node.NType | bits, StringCode(node.Str)); break; case RegexNode.Ref: Emit(node.NType | bits, MapCapnum(node.M)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: Emit(node.NType); break; default: throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture))); } }
/// <summary> /// This is a related computation: it takes a RegexTree and computes the /// leading substring if it see one. It's quite trivial and gives up easily. /// </summary> public static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode = tree.Root; RegexNode?concatNode = null; int nextChild = 0; while (true) { switch (curNode.Type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Atomic: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Oneloopatomic: case RegexNode.Onelazy: // In release, cutoff at a length to which we can still reasonably construct a string // In debug, use a smaller cutoff to exercise the cutoff path in tests const int Cutoff = #if DEBUG 50; #else 1_000_000; #endif if (curNode.M > 0 && curNode.M < Cutoff) { string pref = new string(curNode.Ch, curNode.M); return(new RegexPrefix(pref, 0 != (curNode.Options & RegexOptions.IgnoreCase))); } return(RegexPrefix.Empty); case RegexNode.One: return(new RegexPrefix(curNode.Ch.ToString(), 0 != (curNode.Options & RegexOptions.IgnoreCase))); case RegexNode.Multi: return(new RegexPrefix(curNode.Str !, 0 != (curNode.Options & RegexOptions.IgnoreCase))); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(RegexPrefix.Empty); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(RegexPrefix.Empty); } curNode = concatNode.Child(nextChild++); } }
/* * Sets the current unit to a single inverse-char node */ internal void AddUnitNotone(char ch) { if (UseOptionI()) ch = _culture.TextInfo.ToLower(ch); _unit = new RegexNode(RegexNode.Notone, _options, ch); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { Span <int> emittedSpan = stackalloc int[EmittedSize]; Span <int> intStackSpan = stackalloc int[IntStackSize]; RegexWriter writer = new RegexWriter(emittedSpan, intStackSpan); // construct sparse capnum mapping if some numbers are unused int capsize; if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; writer._caps = null; } else { capsize = tree._capnumlist.Length; writer._caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { writer._caps[tree._capnumlist[i]] = i; } } RegexNode curNode = tree._root; int curChild = 0; writer.Emit(RegexCode.Lazybranch, 0); for (; ;) { if (curNode._children == null) { writer.EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { writer.EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = curNode._children[curChild]; writer._intStack.Append(curChild); curChild = 0; continue; } if (writer._intStack.Length == 0) { break; } curChild = writer._intStack.Pop(); curNode = curNode._next; writer.EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } writer.PatchJump(0, writer._emitted.Length); writer.Emit(RegexCode.Stop); RegexPrefix fcPrefix = RegexFCD.FirstChars(tree); RegexPrefix prefix = RegexFCD.Prefix(tree); bool rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; RegexBoyerMoore bmPrefix; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } int anchors = RegexFCD.Anchors(tree); int[] emitted = writer._emitted.AsReadOnlySpan().ToArray(); // Cleaning up and returning the borrowed arrays writer._emitted.Dispose(); writer._intStack.Dispose(); return(new RegexCode(emitted, writer._stringTable, writer._trackCount, writer._caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
private readonly int[] _rules; // negative -> group #, positive -> string # /// <summary> /// Since RegexReplacement shares the same parser as Regex, /// the constructor takes a RegexNode which is a concatenation /// of constant strings and backreferences. /// </summary> public RegexReplacement(string rep, RegexNode concat, Hashtable _caps) { if (concat.Type != RegexNode.Concatenate) { throw ThrowHelper.CreateArgumentException(ExceptionResource.ReplacementError); } Span <char> vsbStack = stackalloc char[256]; var vsb = new ValueStringBuilder(vsbStack); FourStackStrings stackStrings = default; var strings = new ValueListBuilder <string>(MemoryMarshal.CreateSpan(ref stackStrings.Item1 !, 4)); var rules = new ValueListBuilder <int>(stackalloc int[64]); int childCount = concat.ChildCount(); for (int i = 0; i < childCount; i++) { RegexNode child = concat.Child(i); switch (child.Type) { case RegexNode.Multi: vsb.Append(child.Str !); break; case RegexNode.One: vsb.Append(child.Ch); break; case RegexNode.Ref: if (vsb.Length > 0) { rules.Append(strings.Length); strings.Append(vsb.ToString()); vsb = new ValueStringBuilder(vsbStack); } int slot = child.M; if (_caps != null && slot >= 0) { slot = (int)_caps[slot] !; } rules.Append(-Specials - 1 - slot); break; default: throw ThrowHelper.CreateArgumentException(ExceptionResource.ReplacementError); } } if (vsb.Length > 0) { rules.Append(strings.Length); strings.Append(vsb.ToString()); } Pattern = rep; _strings = strings.AsSpan().ToArray(); _rules = rules.AsSpan().ToArray(); rules.Dispose(); }
/* * Since RegexReplacement shares the same parser as Regex, * the constructor takes a RegexNode which is a concatenation * of constant strings and backreferences. */ #if SILVERLIGHT internal RegexReplacement(String rep, RegexNode concat, Dictionary <Int32, Int32> _caps) {
/// <summary>Computes the leading substring in <paramref name="tree"/>.</summary> /// <remarks>It's quite trivial and gives up easily, in which case an empty string is returned.</remarks> public static (string Prefix, bool CaseInsensitive) ComputeLeadingSubstring(RegexTree tree) { RegexNode curNode = tree.Root; RegexNode?concatNode = null; int nextChild = 0; while (true) { switch (curNode.Type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Atomic: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Oneloopatomic: case RegexNode.Onelazy: // In release, cutoff at a length to which we can still reasonably construct a string and Boyer-Moore search. // In debug, use a smaller cutoff to exercise the cutoff path in tests const int Cutoff = #if DEBUG 50; #else RegexBoyerMoore.MaxLimit; #endif if (curNode.M > 0 && curNode.M < Cutoff) { return(new string(curNode.Ch, curNode.M), (curNode.Options & RegexOptions.IgnoreCase) != 0); } return(string.Empty, false); case RegexNode.One: return(curNode.Ch.ToString(), (curNode.Options & RegexOptions.IgnoreCase) != 0); case RegexNode.Multi: return(curNode.Str !, (curNode.Options & RegexOptions.IgnoreCase) != 0); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(string.Empty, false); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(string.Empty, false); } curNode = concatNode.Child(nextChild++); } }
internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { int length; RegexBoyerMoore moore; if ((tree._capnumlist == null) || (tree._captop == tree._capnumlist.Length)) { length = tree._captop; this._caps = null; } else { length = tree._capnumlist.Length; this._caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { this._caps[tree._capnumlist[i]] = i; } } this._counting = true; Label_0076: if (!this._counting) { this._emitted = new int[this._count]; } RegexNode node = tree._root; int curIndex = 0; this.Emit(0x17, 0); Label_00A1: if (node._children == null) { this.EmitFragment(node._type, node, 0); } else if (curIndex < node._children.Count) { this.EmitFragment(node._type | 0x40, node, curIndex); node = (RegexNode)node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; goto Label_00A1; } if (!this.EmptyStack()) { curIndex = this.PopInt(); node = node._next; this.EmitFragment(node._type | 0x80, node, curIndex); curIndex++; goto Label_00A1; } this.PatchJump(0, this.CurPos()); this.Emit(40); if (this._counting) { this._counting = false; goto Label_0076; } RegexPrefix fcPrefix = RegexFCD.FirstChars(tree); if ((fcPrefix != null) && (RegexCharClass.SetSize(fcPrefix.Prefix) > 0)) { fcPrefix = null; } RegexPrefix scPrefix = null; RegexPrefix prefix3 = RegexFCD.Prefix(tree); bool rightToLeft = (tree._options & RegexOptions.RightToLeft) != RegexOptions.None; CultureInfo culture = ((tree._options & RegexOptions.CultureInvariant) != RegexOptions.None) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if ((prefix3 != null) && (prefix3.Prefix.Length > 0)) { moore = new RegexBoyerMoore(prefix3.Prefix, prefix3.CaseInsensitive, rightToLeft, culture); } else { moore = null; } return(new RegexCode(this._emitted, this._stringtable, this._trackcount, this._caps, length, moore, fcPrefix, scPrefix, RegexFCD.Anchors(tree), rightToLeft)); }
internal void EmitFragment(int nodetype, RegexNode node, int CurIndex) { int num = 0; if (nodetype <= 13) { if (node.UseOptionR()) { num |= 0x40; } if ((node._options & RegexOptions.IgnoreCase) != RegexOptions.None) { num |= 0x200; } } int num8 = nodetype; switch (num8) { case 3: case 4: case 6: case 7: if (node._m > 0) { this.Emit((((node._type == 3) || (node._type == 6)) ? 0 : 1) | num, node._ch, node._m); } if (node._n > node._m) { this.Emit(node._type | num, node._ch, (node._n == 0x7fffffff) ? 0x7fffffff : (node._n - node._m)); } return; case 5: case 8: if (node._m > 0) { this.Emit(2 | num, this.StringCode(node._str), this.StringCode(node._str2), node._m); } if (node._n > node._m) { this.Emit(node._type | num, this.StringCode(node._str), this.StringCode(node._str2), (node._n == 0x7fffffff) ? 0x7fffffff : (node._n - node._m)); } return; case 9: case 10: this.Emit(node._type | num, node._ch); return; case 11: this.Emit(node._type | num, this.StringCode(node._str), this.StringCode(node._str2)); return; case 12: this.Emit(node._type | num, this.StringCode(node._str)); return; case 13: this.Emit(node._type | num, this.MapCapnum(node._m)); return; case 14: case 15: case 0x10: case 0x11: case 0x12: case 0x13: case 20: case 0x15: case 0x16: case 0x29: case 0x2a: this.Emit(node._type); return; case 0x17: case 0x59: case 0x5d: case 0x99: case 0x9d: return; case 0x58: if (CurIndex < (node._children.Count - 1)) { this.PushInt(this.CurPos()); this.Emit(0x17, 0); } return; case 90: case 0x5b: if ((node._n >= 0x7fffffff) && (node._m <= 1)) { this.Emit((node._m == 0) ? 30 : 0x1f); } else { this.Emit((node._m == 0) ? 0x1a : 0x1b, (node._m == 0) ? 0 : (1 - node._m)); } if (node._m == 0) { this.PushInt(this.CurPos()); this.Emit(0x26, 0); } this.PushInt(this.CurPos()); return; case 0x5c: this.Emit(0x1f); return; case 0x5e: this.Emit(0x22); this.Emit(0x1f); return; case 0x5f: this.Emit(0x22); this.PushInt(this.CurPos()); this.Emit(0x17, 0); return; case 0x60: this.Emit(0x22); return; case 0x61: num8 = CurIndex; if (num8 == 0) { this.Emit(0x22); this.PushInt(this.CurPos()); this.Emit(0x17, 0); this.Emit(0x25, this.MapCapnum(node._m)); this.Emit(0x24); return; } return; case 0x62: num8 = CurIndex; if (num8 == 0) { this.Emit(0x22); this.Emit(0x1f); this.PushInt(this.CurPos()); this.Emit(0x17, 0); return; } return; case 0x98: { if (CurIndex >= (node._children.Count - 1)) { for (int i = 0; i < CurIndex; i++) { this.PatchJump(this.PopInt(), this.CurPos()); } return; } int offset = this.PopInt(); this.PushInt(this.CurPos()); this.Emit(0x26, 0); this.PatchJump(offset, this.CurPos()); return; } case 0x9a: case 0x9b: { int jumpDest = this.CurPos(); int num7 = nodetype - 0x9a; if ((node._n >= 0x7fffffff) && (node._m <= 1)) { this.Emit(0x18 + num7, this.PopInt()); } else { this.Emit(0x1c + num7, this.PopInt(), (node._n == 0x7fffffff) ? 0x7fffffff : (node._n - node._m)); } if (node._m == 0) { this.PatchJump(this.PopInt(), jumpDest); } return; } case 0x9c: this.Emit(0x20, this.MapCapnum(node._m), this.MapCapnum(node._n)); return; case 0x9e: this.Emit(0x21); this.Emit(0x24); return; case 0x9f: this.Emit(0x23); this.PatchJump(this.PopInt(), this.CurPos()); this.Emit(0x24); return; case 160: this.Emit(0x24); return; case 0xa1: switch (CurIndex) { case 0: { int num4 = this.PopInt(); this.PushInt(this.CurPos()); this.Emit(0x26, 0); this.PatchJump(num4, this.CurPos()); this.Emit(0x24); if (node._children.Count > 1) { return; } break; } } return; case 0xa2: switch (CurIndex) { case 0: this.Emit(0x21); this.Emit(0x24); return; case 1: { int num5 = this.PopInt(); this.PushInt(this.CurPos()); this.Emit(0x26, 0); this.PatchJump(num5, this.CurPos()); this.Emit(0x21); this.Emit(0x24); if (node._children.Count > 2) { return; } goto Label_0312; } case 2: goto Label_0312; } return; default: throw MakeException(RegExRes.GetString(4, nodetype.ToString())); } this.PatchJump(this.PopInt(), this.CurPos()); return; Label_0312: this.PatchJump(this.PopInt(), this.CurPos()); }
internal void AddUnitOne(char ch) { if (this.UseOptionI()) { ch = char.ToLower(ch, this._culture); } this._unit = new RegexNode(9, this._options, ch); }
/* * Sets the current unit to a single set node */ internal void AddUnitSet(string cc) { _unit = new RegexNode(RegexNode.Set, _options, cc); }
internal void AddUnitType(int type) { this._unit = new RegexNode(type, this._options); }
/* * Finish the current group (in response to a ')' or end) */ internal void AddGroup() { if (_group.Type() == RegexNode.Testgroup || _group.Type() == RegexNode.Testref) { _group.AddChild(_concatenation.ReverseLeft()); if (_group.Type() == RegexNode.Testref && _group.ChildCount() > 2 || _group.ChildCount() > 3) throw MakeException(SR.TooManyAlternates); } else { _alternation.AddChild(_concatenation.ReverseLeft()); _group.AddChild(_alternation); } _unit = _group; }
private void CalculateFC(int NodeType, RegexNode node, int CurIndex) { bool caseInsensitive = false; bool flag2 = false; if (NodeType <= 13) { if ((node._options & RegexOptions.IgnoreCase) != RegexOptions.None) { caseInsensitive = true; } if ((node._options & RegexOptions.RightToLeft) != RegexOptions.None) { flag2 = true; } } switch (NodeType) { case 3: case 6: this.PushFC(new RegexFC(node._ch, false, node._m == 0, caseInsensitive)); return; case 4: case 7: this.PushFC(new RegexFC(node._ch, true, node._m == 0, caseInsensitive)); return; case 5: case 8: this.PushFC(new RegexFC(node._str, node._m == 0, caseInsensitive)); return; case 9: case 10: this.PushFC(new RegexFC(node._ch, NodeType == 10, false, caseInsensitive)); return; case 11: this.PushFC(new RegexFC(node._str, false, caseInsensitive)); return; case 12: if (node._str.Length != 0) { if (!flag2) { this.PushFC(new RegexFC(node._str[0], false, false, caseInsensitive)); return; } this.PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, caseInsensitive)); return; } this.PushFC(new RegexFC(true)); return; case 13: this.PushFC(new RegexFC("\0\x0001\0\0", true, false)); return; case 14: case 15: case 0x10: case 0x11: case 0x12: case 0x13: case 20: case 0x15: case 0x16: case 0x29: case 0x2a: this.PushFC(new RegexFC(true)); return; case 0x17: this.PushFC(new RegexFC(true)); return; case 0x58: case 0x59: case 90: case 0x5b: case 0x5c: case 0x5d: case 0x60: case 0x61: case 0x9c: case 0x9d: case 0x9e: case 0x9f: case 160: return; case 0x5e: case 0x5f: this.SkipChild(); this.PushFC(new RegexFC(true)); return; case 0x62: if (CurIndex == 0) { this.SkipChild(); } return; case 0x98: case 0xa1: if (CurIndex != 0) { RegexFC fc = this.PopFC(); RegexFC xfc6 = this.TopFC(); this._failed = !xfc6.AddFC(fc, false); } return; case 0x99: if (CurIndex != 0) { RegexFC xfc = this.PopFC(); RegexFC xfc2 = this.TopFC(); this._failed = !xfc2.AddFC(xfc, true); } if (!this.TopFC()._nullable) { this._skipAllChildren = true; } return; case 0x9a: case 0x9b: if (node._m == 0) { this.TopFC()._nullable = true; } return; case 0xa2: if (CurIndex > 1) { RegexFC xfc3 = this.PopFC(); RegexFC xfc4 = this.TopFC(); this._failed = !xfc4.AddFC(xfc3, false); } return; } throw new ArgumentException(SR.GetString("UnexpectedOpcode", new object[] { NodeType.ToString(CultureInfo.CurrentCulture) })); }
/* * Simple parsing for replacement patterns */ internal RegexNode ScanReplacement() { int c; int startpos; _concatenation = new RegexNode(RegexNode.Concatenate, _options); for (; ;) { c = CharsRight(); if (c == 0) break; startpos = Textpos(); while (c > 0 && RightChar() != '$') { MoveRight(); c--; } AddConcatenate(startpos, Textpos() - startpos, true); if (c > 0) { if (MoveRightGetChar() == '$') AddUnitNode(ScanDollar()); AddConcatenate(); } } return _concatenation; }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { // construct sparse capnum mapping if some numbers are unused int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } RegexNode curNode = tree.Root; int curChild = 0; Emit(RegexCode.Lazybranch, 0); for (; ;) { if (curNode.Children == null) { EmitFragment(curNode.NType, curNode, 0); } else if (curChild < curNode.Children.Count) { EmitFragment(curNode.NType | BeforeChild, curNode, curChild); curNode = curNode.Children[curChild]; _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Next; EmitFragment(curNode.NType | AfterChild, curNode, curChild); curChild++; } PatchJump(0, _emitted.Length); Emit(RegexCode.Stop); RegexPrefix?fcPrefix = RegexFCD.FirstChars(tree); RegexPrefix prefix = RegexFCD.Prefix(tree); bool rtl = ((tree.Options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; RegexBoyerMoore bmPrefix; if (prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } int anchors = RegexFCD.Anchors(tree); int[] emitted = _emitted.AsSpan().ToArray(); return(new RegexCode(emitted, _stringTable, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
internal void PushGroup() { this._group._next = this._stack; this._alternation._next = this._group; this._concatenation._next = this._alternation; this._stack = this._concatenation; }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emit code before /// and after each child of an interior node and at each leaf. /// It also computes various information about the tree, such as /// prefix data to help with optimizations. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { // Construct sparse capnum mapping if some numbers are unused. int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } // Every written code begins with a lazy branch. This will be back-patched // to point to the ending Stop after the whole expression has been written. Emit(RegexCode.Lazybranch, 0); // Emit every node. RegexNode curNode = tree.Root; int curChild = 0; while (true) { int curNodeChildCount = curNode.ChildCount(); if (curNodeChildCount == 0) { EmitFragment(curNode.Type, curNode, 0); } else if (curChild < curNodeChildCount) { EmitFragment(curNode.Type | BeforeChild, curNode, curChild); curNode = curNode.Child(curChild); _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Next !; EmitFragment(curNode.Type | AfterChild, curNode, curChild); curChild++; } // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array. PatchJump(0, _emitted.Length); Emit(RegexCode.Stop); int[] emitted = _emitted.AsSpan().ToArray(); bool rtl = (tree.Options & RegexOptions.RightToLeft) != 0; bool compiled = (tree.Options & RegexOptions.Compiled) != 0; // Compute prefixes to help optimize FindFirstChar. RegexBoyerMoore?boyerMoorePrefix = null; (string CharClass, bool CaseInsensitive)[]? leadingCharClasses = null;
internal void AddConcatenate(bool lazy, int min, int max) { this._concatenation.AddChild(this._unit.MakeQuantifier(lazy, min, max)); this._unit = null; }
internal RegexNode ReduceConcatenation() { if (this._children == null) { return(new RegexNode(0x17, this._options)); } bool flag = false; RegexOptions none = RegexOptions.None; int num = 0; int index = 0; while (num < this._children.Count) { RegexNode node = (RegexNode)this._children[num]; if (index < num) { this._children[index] = node; } if ((node._type == 0x19) && ((node._options & RegexOptions.RightToLeft) == (this._options & RegexOptions.RightToLeft))) { for (int i = 0; i < node._children.Count; i++) { ((RegexNode)node._children[i])._next = this; } this._children.InsertRange(num + 1, node._children); index--; } else if ((node._type == 12) || (node._type == 9)) { RegexOptions options2 = node._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); if (!flag || (none != options2)) { flag = true; none = options2; } else { RegexNode node2 = (RegexNode)this._children[--index]; if (node2._type == 9) { node2._type = 12; node2._str = node2._ch.ToString();// Convert.ToString(node2._ch); } if ((options2 & RegexOptions.RightToLeft) == RegexOptions.None) { if (node._type == 9) { node2._str = node2._str + node._ch.ToString(); } else { node2._str = node2._str + node._str; } } else if (node._type == 9) { node2._str = node._ch.ToString() + node2._str; } else { node2._str = node._str + node2._str; } } } else if (node._type == 0x17) { index--; } else { flag = false; } num++; index++; } if (index < num) { this._children.RemoveRange(index, num - index); } return(this.StripEnation(0x17)); }
internal void AddConcatenate() { this._concatenation.AddChild(this._unit); this._unit = null; }
/* * FC computation and shortcut cases for each node type */ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) { bool ci = false; bool rtl = false; if (NodeType <= RegexNode.Ref) { if ((node._options & RegexOptions.IgnoreCase) != 0) { ci = true; } if ((node._options & RegexOptions.RightToLeft) != 0) { rtl = true; } } switch (NodeType) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Alternate | BeforeChild: case RegexNode.Testref | BeforeChild: case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: break; case RegexNode.Testgroup | BeforeChild: if (CurIndex == 0) { SkipChild(); } break; case RegexNode.Empty: PushFC(new RegexFC(true)); break; case RegexNode.Concatenate | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, true); } if (!TopFC()._nullable) { _skipAllChildren = true; } break; case RegexNode.Testgroup | AfterChild: if (CurIndex > 1) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, false); } break; case RegexNode.Alternate | AfterChild: case RegexNode.Testref | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, false); } break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: if (node._m == 0) { TopFC()._nullable = true; } break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: case RegexNode.Capture | BeforeChild: case RegexNode.Capture | AfterChild: case RegexNode.Greedy | BeforeChild: case RegexNode.Greedy | AfterChild: break; case RegexNode.Require | BeforeChild: case RegexNode.Prevent | BeforeChild: SkipChild(); PushFC(new RegexFC(true)); break; case RegexNode.Require | AfterChild: case RegexNode.Prevent | AfterChild: break; case RegexNode.One: case RegexNode.Notone: PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci)); break; case RegexNode.Oneloop: case RegexNode.Onelazy: PushFC(new RegexFC(node._ch, false, node._m == 0, ci)); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: PushFC(new RegexFC(node._ch, true, node._m == 0, ci)); break; case RegexNode.Multi: if (node._str.Length == 0) { PushFC(new RegexFC(true)); } else if (!rtl) { PushFC(new RegexFC(node._str[0], false, false, ci)); } else { PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci)); } break; case RegexNode.Set: PushFC(new RegexFC(node._str, false, ci)); break; case RegexNode.Setloop: case RegexNode.Setlazy: PushFC(new RegexFC(node._str, node._m == 0, ci)); break; case RegexNode.Ref: PushFC(new RegexFC(RegexCharClass.AnyClass, true, false)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: PushFC(new RegexFC(true)); break; default: throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture))); } }
internal void StartGroup(RegexNode openGroup) { this._group = openGroup; this._alternation = new RegexNode(0x18, this._options); this._concatenation = new RegexNode(0x19, this._options); }
static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByAncestor, bool isInLoop) { if (!StackHelper.TryEnsureSufficientExecutionStack()) { return(false); } // Track whether we've seen any nodes with various options set. results._hasIgnoreCase |= (node.Options & RegexOptions.IgnoreCase) != 0; results._hasRightToLeft |= (node.Options & RegexOptions.RightToLeft) != 0; // Track whether this node is inside of a loop. if (isInLoop) { (results._inLoops ??= new HashSet <RegexNode>()).Add(node); } if (isAtomicByAncestor) { // We've been told by our parent that we should be considered atomic, so add ourselves // to the atomic collection. results._isAtomicByAncestor.Add(node); } else { // Certain kinds of nodes incur backtracking logic themselves: add them to the backtracking collection. // We may later find that a node contains another that has backtracking; we'll add nodes based on that // after examining the children. switch (node.Kind) { case RegexNodeKind.Alternate: case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.M != node.N: case RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop or RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy when node.M != node.N: (results._mayBacktrack ??= new HashSet <RegexNode>()).Add(node); break; } } // Update state for certain node types. bool isAtomicBySelf = false; switch (node.Kind) { // Some node types add atomicity around what they wrap. Set isAtomicBySelfOrParent to true for such nodes // even if it was false upon entering the method. case RegexNodeKind.Atomic: case RegexNodeKind.NegativeLookaround: case RegexNodeKind.PositiveLookaround: isAtomicBySelf = true; break; // Track any nodes that are themselves captures. case RegexNodeKind.Capture: results._containsCapture.Add(node); break; // Track whether we've recurred into a loop case RegexNodeKind.Loop: case RegexNodeKind.Lazyloop: isInLoop = true; break; } // Process each child. int childCount = node.ChildCount(); for (int i = 0; i < childCount; i++) { RegexNode child = node.Child(i); // Determine whether the child should be treated as atomic (whether anything // can backtrack into it), which is influenced by whether this node (the child's // parent) is considered atomic by itself or by its parent. bool treatChildAsAtomic = (isAtomicByAncestor | isAtomicBySelf) && node.Kind switch { // If the parent is atomic, so is the child. That's the whole purpose // of the Atomic node, and lookarounds are also implicitly atomic. RegexNodeKind.Atomic or RegexNodeKind.NegativeLookaround or RegexNodeKind.PositiveLookaround => true, // Each branch is considered independently, so any atomicity applied to the alternation also applies // to each individual branch. This is true as well for conditionals. RegexNodeKind.Alternate or RegexNodeKind.BackreferenceConditional or RegexNodeKind.ExpressionConditional => true, // Captures don't impact atomicity: if the parent of a capture is atomic, the capture is also atomic. RegexNodeKind.Capture => true, // If the parent is a concatenation and this is the last node, any atomicity // applying to the concatenation applies to this node, too. RegexNodeKind.Concatenate => i == childCount - 1, // For loops with a max iteration count of 1, they themselves can be considered // atomic as can whatever they wrap, as they won't ever iterate more than once // and thus we don't need to worry about one iteration consuming input destined // for a subsequent iteration. RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.N == 1 => true, // For any other parent type, give up on trying to prove atomicity. _ => false, }; // Now analyze the child. if (!TryAnalyze(child, results, treatChildAsAtomic, isInLoop)) { return(false); } // If the child contains captures, so too does this parent. if (results._containsCapture.Contains(child)) { results._containsCapture.Add(node); } // If the child might require backtracking into it, so too might the parent, // unless the parent is itself considered atomic. Here we don't consider parental // atomicity, as we need to surface upwards to the parent whether any backtracking // will be visible from this node to it. if (!isAtomicBySelf && (results._mayBacktrack?.Contains(child) == true)) { (results._mayBacktrack ??= new HashSet <RegexNode>()).Add(node); } } // Successfully analyzed the node. return(true); }
internal RegexNode ReduceAlternation() { if (this._children == null) { return(new RegexNode(0x16, this._options)); } bool flag = false; RegexOptions none = RegexOptions.None; int num = 0; int index = 0; while (num < this._children.Count) { RegexNode node = (RegexNode)this._children[num]; if (index < num) { this._children[index] = node; } if (node._type == 0x18) { for (int i = 0; i < node._children.Count; i++) { ((RegexNode)node._children[i])._next = this; } this._children.InsertRange(num + 1, node._children); index--; } else if ((node._type == 11) || (node._type == 9)) { RegexOptions options2 = node._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); if (!flag || (none != options2)) { flag = true; none = options2; } else { RegexNode node2 = (RegexNode)this._children[--index]; if (node2._type == 9) { node2._type = 11; node2._str = RegexCharClass.SetFromChar(node2._ch); } if (node._type == 9) { node2._str = RegexCharClass.SetUnion(node2._str, RegexCharClass.SetFromChar(node._ch)); } else { node2._str = RegexCharClass.SetUnion(node2._str, node._str); node2._str2 = RegexCharClass.CategoryUnion(node2._str2, node._str2); } } } else if (node._type == 0x16) { index--; } else { flag = false; } num++; index++; } if (index < num) { this._children.RemoveRange(index, num - index); } return(this.StripEnation(0x16)); }
internal void AddUnitNode(RegexNode node) { this._unit = node; }
internal void AddChild(RegexNode newChild) { RegexNode reducedChild; if (_children == null) _children = new List<RegexNode>(4); reducedChild = newChild.Reduce(); _children.Add(reducedChild); reducedChild._next = this; }
internal void AddUnitSet(string cc) { this._unit = new RegexNode(11, this._options, cc); }
/* * FC computation and shortcut cases for each node type */ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) { bool ci = false; bool rtl = false; if (NodeType <= RegexNode.Ref) { if ((node._options & RegexOptions.IgnoreCase) != 0) ci = true; if ((node._options & RegexOptions.RightToLeft) != 0) rtl = true; } switch (NodeType) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Alternate | BeforeChild: case RegexNode.Testref | BeforeChild: case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: break; case RegexNode.Testgroup | BeforeChild: if (CurIndex == 0) SkipChild(); break; case RegexNode.Empty: PushFC(new RegexFC(true)); break; case RegexNode.Concatenate | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, true); } if (!TopFC()._nullable) _skipAllChildren = true; break; case RegexNode.Testgroup | AfterChild: if (CurIndex > 1) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, false); } break; case RegexNode.Alternate | AfterChild: case RegexNode.Testref | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); RegexFC cumul = TopFC(); _failed = !cumul.AddFC(child, false); } break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: if (node._m == 0) TopFC()._nullable = true; break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: case RegexNode.Capture | BeforeChild: case RegexNode.Capture | AfterChild: case RegexNode.Greedy | BeforeChild: case RegexNode.Greedy | AfterChild: break; case RegexNode.Require | BeforeChild: case RegexNode.Prevent | BeforeChild: SkipChild(); PushFC(new RegexFC(true)); break; case RegexNode.Require | AfterChild: case RegexNode.Prevent | AfterChild: break; case RegexNode.One: case RegexNode.Notone: PushFC(new RegexFC(node._ch, NodeType == RegexNode.Notone, false, ci)); break; case RegexNode.Oneloop: case RegexNode.Onelazy: PushFC(new RegexFC(node._ch, false, node._m == 0, ci)); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: PushFC(new RegexFC(node._ch, true, node._m == 0, ci)); break; case RegexNode.Multi: if (node._str.Length == 0) PushFC(new RegexFC(true)); else if (!rtl) PushFC(new RegexFC(node._str[0], false, false, ci)); else PushFC(new RegexFC(node._str[node._str.Length - 1], false, false, ci)); break; case RegexNode.Set: PushFC(new RegexFC(node._str, false, ci)); break; case RegexNode.Setloop: case RegexNode.Setlazy: PushFC(new RegexFC(node._str, node._m == 0, ci)); break; case RegexNode.Ref: PushFC(new RegexFC(RegexCharClass.AnyClass, true, false)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: PushFC(new RegexFC(true)); break; default: throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture))); } }
internal RegexNode ScanReplacement() { this._concatenation = new RegexNode(0x19, this._options); while (true) { int num = this.CharsRight(); if (num == 0) { return this._concatenation; } int pos = this.Textpos(); while ((num > 0) && (this.RightChar() != '$')) { this.MoveRight(); num--; } this.AddConcatenate(pos, this.Textpos() - pos, true); if (num > 0) { if (this.MoveRightGetChar() == '$') { this.AddUnitNode(this.ScanDollar()); } this.AddConcatenate(); } } }
// Processes the node, adding any prefix text to the builder. // Returns whether processing should continue with subsequent nodes. static bool Process(RegexNode node, ref ValueStringBuilder vsb) { if (!StackHelper.TryEnsureSufficientExecutionStack()) { // If we're too deep on the stack, just give up finding any more prefix. return(false); } // We don't bother to handle reversed input, so process at most one node // when handling RightToLeft. bool rtl = (node.Options & RegexOptions.RightToLeft) != 0; switch (node.Type) { // Concatenation case RegexNode.Concatenate: { int childCount = node.ChildCount(); for (int i = 0; i < childCount; i++) { if (!Process(node.Child(i), ref vsb)) { return(false); } } return(!rtl); } // Alternation: find a string that's a shared prefix of all branches case RegexNode.Alternate: { int childCount = node.ChildCount(); // Store the initial branch into the target builder int initialLength = vsb.Length; bool keepExploring = Process(node.Child(0), ref vsb); int addedLength = vsb.Length - initialLength; // Then explore the rest of the branches, finding the length // a prefix they all share in common with the initial branch. if (addedLength != 0) { var alternateSb = new ValueStringBuilder(64); // Process each branch. If we reach a point where we've proven there's // no overlap, we can bail early. for (int i = 1; i < childCount && addedLength != 0; i++) { alternateSb.Length = 0; // Process the branch. We want to keep exploring after this alternation, // but we can't if either this branch doesn't allow for it or if the prefix // supplied by this branch doesn't entirely match all the previous ones. keepExploring &= Process(node.Child(i), ref alternateSb); keepExploring &= alternateSb.Length == addedLength; addedLength = Math.Min(addedLength, alternateSb.Length); for (int j = 0; j < addedLength; j++) { if (vsb[initialLength + j] != alternateSb[j]) { addedLength = j; keepExploring = false; break; } } } alternateSb.Dispose(); // Then cull back on what was added based on the other branches. vsb.Length = initialLength + addedLength; } return(!rtl && keepExploring); } // One character case RegexNode.One when(node.Options& RegexOptions.IgnoreCase) == 0: vsb.Append(node.Ch); return(!rtl); // Multiple characters case RegexNode.Multi when(node.Options& RegexOptions.IgnoreCase) == 0: vsb.Append(node.Str); return(!rtl); // Loop of one character case RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Onelazy when node.M > 0 && (node.Options & RegexOptions.IgnoreCase) == 0: const int SingleCharIterationLimit = 32; // arbitrary cut-off to avoid creating super long strings unnecessarily int count = Math.Min(node.M, SingleCharIterationLimit); vsb.Append(node.Ch, count); return(count == node.N && !rtl); // Loop of a node case RegexNode.Loop or RegexNode.Lazyloop when node.M > 0: { const int NodeIterationLimit = 4; // arbitrary cut-off to avoid creating super long strings unnecessarily int limit = Math.Min(node.M, NodeIterationLimit); for (int i = 0; i < limit; i++) { if (!Process(node.Child(0), ref vsb)) { return(false); } } return(limit == node.N && !rtl); } // Grouping nodes for which we only care about their single child case RegexNode.Atomic: case RegexNode.Capture: return(Process(node.Child(0), ref vsb)); // Zero-width anchors and assertions case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.NonBoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.UpdateBumpalong: case RegexNode.Require: case RegexNode.Prevent: return(true); // Give up for anything else default: return(false); } }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emit code before /// and after each child of an interior node and at each leaf. /// It also computes various information about the tree, such as /// prefix data to help with optimizations. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture) { // Construct sparse capnum mapping if some numbers are unused. int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } // Every written code begins with a lazy branch. This will be back-patched // to point to the ending Stop after the whole expression has been written. Emit(RegexOpcode.Lazybranch, 0); // Emit every node. RegexNode curNode = tree.Root; int curChild = 0; while (true) { int curNodeChildCount = curNode.ChildCount(); if (curNodeChildCount == 0) { EmitFragment(curNode.Kind, curNode, 0); } else if (curChild < curNodeChildCount) { EmitFragment(curNode.Kind | BeforeChild, curNode, curChild); curNode = curNode.Child(curChild); _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Parent !; EmitFragment(curNode.Kind | AfterChild, curNode, curChild); curChild++; } // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array. PatchJump(0, _emitted.Length); Emit(RegexOpcode.Stop); int[] emitted = _emitted.AsSpan().ToArray(); // Convert the string table into an ordered string array. var strings = new string[_stringTable.Count]; foreach (KeyValuePair <string, int> stringEntry in _stringTable) { strings[stringEntry.Value] = stringEntry.Key; } // Return all that in a RegexCode object. return(new RegexCode(tree, culture, emitted, strings, _trackCount, _caps, capsize)); }
internal RegexNode MakeQuantifier(bool lazy, int min, int max) { RegexNode result; if (min == 0 && max == 0) return new RegexNode(RegexNode.Empty, _options); if (min == 1 && max == 1) return this; switch (_type) { case RegexNode.One: case RegexNode.Notone: case RegexNode.Set: MakeRep(lazy ? RegexNode.Onelazy : RegexNode.Oneloop, min, max); return this; default: result = new RegexNode(lazy ? RegexNode.Lazyloop : RegexNode.Loop, _options, min, max); result.AddChild(this); return result; } }
internal void PopGroup() { this._concatenation = this._stack; this._alternation = this._concatenation._next; this._group = this._alternation._next; this._stack = this._group._next; if ((this._group.Type() == 0x22) && (this._group.ChildCount() == 0)) { if (this._unit == null) { throw this.MakeException(SR.GetString("IllegalCondition")); } this._group.AddChild(this._unit); this._unit = null; } }
internal RegexNode ReduceAlternation() { if (this._children == null) { return(new RegexNode(0x16, this._options)); } bool flag = false; bool flag2 = false; RegexOptions none = RegexOptions.None; int num = 0; int index = 0; while (num < this._children.Count) { RegexCharClass class2; RegexNode node = this._children[num]; if (index < num) { this._children[index] = node; } if (node._type == 0x18) { for (int i = 0; i < node._children.Count; i++) { node._children[i]._next = this; } this._children.InsertRange(num + 1, node._children); index--; goto Label_01C2; } if ((node._type != 11) && (node._type != 9)) { goto Label_01AB; } RegexOptions options2 = node._options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); if (node._type == 11) { if ((flag && (none == options2)) && (!flag2 && RegexCharClass.IsMergeable(node._str))) { goto Label_011B; } flag = true; flag2 = !RegexCharClass.IsMergeable(node._str); none = options2; goto Label_01C2; } if ((!flag || (none != options2)) || flag2) { flag = true; flag2 = false; none = options2; goto Label_01C2; } Label_011B: index--; RegexNode node2 = this._children[index]; if (node2._type == 9) { class2 = new RegexCharClass(); class2.AddChar(node2._ch); } else { class2 = RegexCharClass.Parse(node2._str); } if (node._type == 9) { class2.AddChar(node._ch); } else { RegexCharClass cc = RegexCharClass.Parse(node._str); class2.AddCharClass(cc); } node2._type = 11; node2._str = class2.ToStringClass(); goto Label_01C2; Label_01AB: if (node._type == 0x16) { index--; } else { flag = false; flag2 = false; } Label_01C2: num++; index++; } if (index < num) { this._children.RemoveRange(index, num - index); } return(this.StripEnation(0x16)); }
internal void Reset(RegexOptions topopts) { this._currentPos = 0; this._autocap = 1; this._ignoreNextParen = false; if (this._optionsStack.Count > 0) { this._optionsStack.RemoveRange(0, this._optionsStack.Count - 1); } this._options = topopts; this._stack = null; }
/* * Since RegexReplacement shares the same parser as Regex, * the constructor takes a RegexNode which is a concatenation * of constant strings and backreferences. */ internal RegexReplacement(String rep, RegexNode concat, Hashtable _caps) { StringBuilder sb; ArrayList strings; ArrayList rules; int slot; _rep = rep; if (concat.Type() != RegexNode.Concatenate) { throw new ArgumentException(SR.GetString(SR.ReplacementError)); } sb = new StringBuilder(); strings = new ArrayList(); rules = new ArrayList(); for (int i = 0; i < concat.ChildCount(); i++) { RegexNode child = concat.Child(i); switch (child.Type()) { case RegexNode.Multi: sb.Append(child._str); break; case RegexNode.One: sb.Append(child._ch); break; case RegexNode.Ref: if (sb.Length > 0) { rules.Add(strings.Count); strings.Add(sb.ToString()); sb.Length = 0; } slot = child._m; if (_caps != null && slot >= 0) { slot = (int)_caps[slot]; } rules.Add(-Specials - 1 - slot); break; default: throw new ArgumentException(SR.GetString(SR.ReplacementError)); } } if (sb.Length > 0) { rules.Add(strings.Count); strings.Add(sb.ToString()); } _strings = strings; _rules = rules; }
internal void AddConcatenate(int pos, int cch, bool isReplacement) { if (cch != 0) { RegexNode node; if (cch > 1) { string str = this._pattern.Substring(pos, cch); if (this.UseOptionI() && !isReplacement) { StringBuilder builder = new StringBuilder(str.Length); for (int i = 0; i < str.Length; i++) { builder.Append(char.ToLower(str[i], this._culture)); } str = builder.ToString(); } node = new RegexNode(12, this._options, str); } else { char c = this._pattern[pos]; if (this.UseOptionI() && !isReplacement) { c = char.ToLower(c, this._culture); } node = new RegexNode(9, this._options, c); } this._concatenation.AddChild(node); } }
/// <summary> /// The main RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> private void EmitFragment(int nodetype, RegexNode node, int curIndex) { int bits = 0; if (nodetype <= RegexNode.Ref) { if (node.UseOptionR()) bits |= RegexCode.Rtl; if ((node._options & RegexOptions.IgnoreCase) != 0) bits |= RegexCode.Ci; } switch (nodetype) { case RegexNode.Concatenate | BeforeChild: case RegexNode.Concatenate | AfterChild: case RegexNode.Empty: break; case RegexNode.Alternate | BeforeChild: if (curIndex < node._children.Count - 1) { PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); } break; case RegexNode.Alternate | AfterChild: { if (curIndex < node._children.Count - 1) { int LBPos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(LBPos, CurPos()); } else { int I; for (I = 0; I < curIndex; I++) { PatchJump(PopInt(), CurPos()); } } break; } case RegexNode.Testref | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); Emit(RegexCode.Testref, MapCapnum(node._m)); Emit(RegexCode.Forejump); break; } break; case RegexNode.Testref | AfterChild: switch (curIndex) { case 0: { int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Forejump); if (node._children.Count > 1) break; // else fallthrough goto case 1; } case 1: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Testgroup | BeforeChild: switch (curIndex) { case 0: Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; } break; case RegexNode.Testgroup | AfterChild: switch (curIndex) { case 0: Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); break; case 1: int Branchpos = PopInt(); PushInt(CurPos()); Emit(RegexCode.Goto, 0); PatchJump(Branchpos, CurPos()); Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); if (node._children.Count > 2) break; // else fallthrough goto case 2; case 2: PatchJump(PopInt(), CurPos()); break; } break; case RegexNode.Loop | BeforeChild: case RegexNode.Lazyloop | BeforeChild: if (node._n < Int32.MaxValue || node._m > 1) Emit(node._m == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node._m == 0 ? 0 : 1 - node._m); else Emit(node._m == 0 ? RegexCode.Nullmark : RegexCode.Setmark); if (node._m == 0) { PushInt(CurPos()); Emit(RegexCode.Goto, 0); } PushInt(CurPos()); break; case RegexNode.Loop | AfterChild: case RegexNode.Lazyloop | AfterChild: { int StartJumpPos = CurPos(); int Lazy = (nodetype - (RegexNode.Loop | AfterChild)); if (node._n < Int32.MaxValue || node._m > 1) Emit(RegexCode.Branchcount + Lazy, PopInt(), node._n == Int32.MaxValue ? Int32.MaxValue : node._n - node._m); else Emit(RegexCode.Branchmark + Lazy, PopInt()); if (node._m == 0) PatchJump(PopInt(), StartJumpPos); } break; case RegexNode.Group | BeforeChild: case RegexNode.Group | AfterChild: break; case RegexNode.Capture | BeforeChild: Emit(RegexCode.Setmark); break; case RegexNode.Capture | AfterChild: Emit(RegexCode.Capturemark, MapCapnum(node._m), MapCapnum(node._n)); break; case RegexNode.Require | BeforeChild: // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Setjump); Emit(RegexCode.Setmark); break; case RegexNode.Require | AfterChild: Emit(RegexCode.Getmark); // NOTE: the following line causes lookahead/lookbehind to be // NON-BACKTRACKING. It can be commented out with (*) Emit(RegexCode.Forejump); break; case RegexNode.Prevent | BeforeChild: Emit(RegexCode.Setjump); PushInt(CurPos()); Emit(RegexCode.Lazybranch, 0); break; case RegexNode.Prevent | AfterChild: Emit(RegexCode.Backjump); PatchJump(PopInt(), CurPos()); Emit(RegexCode.Forejump); break; case RegexNode.Greedy | BeforeChild: Emit(RegexCode.Setjump); break; case RegexNode.Greedy | AfterChild: Emit(RegexCode.Forejump); break; case RegexNode.One: case RegexNode.Notone: Emit(node._type | bits, (int)node._ch); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: case RegexNode.Oneloop: case RegexNode.Onelazy: if (node._m > 0) Emit(((node._type == RegexNode.Oneloop || node._type == RegexNode.Onelazy) ? RegexCode.Onerep : RegexCode.Notonerep) | bits, (int)node._ch, node._m); if (node._n > node._m) Emit(node._type | bits, (int)node._ch, node._n == Int32.MaxValue ? Int32.MaxValue : node._n - node._m); break; case RegexNode.Setloop: case RegexNode.Setlazy: if (node._m > 0) Emit(RegexCode.Setrep | bits, StringCode(node._str), node._m); if (node._n > node._m) Emit(node._type | bits, StringCode(node._str), (node._n == Int32.MaxValue) ? Int32.MaxValue : node._n - node._m); break; case RegexNode.Multi: Emit(node._type | bits, StringCode(node._str)); break; case RegexNode.Set: Emit(node._type | bits, StringCode(node._str)); break; case RegexNode.Ref: Emit(node._type | bits, MapCapnum(node._m)); break; case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: case RegexNode.NonECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: Emit(node._type); break; default: throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString(CultureInfo.CurrentCulture))); } }