/* * The top level RegexCode generator. It does a depth-first walk * through the tree and calls EmitFragment to emits code before * and after each child of an interior node, and at each leaf. * * It runs two passes, first to count the size of the generated * code, and second to generate the code. * * CONSIDER: we need to time it against the alternative, which is * to just generate the code and grow the array as we go. */ internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; int capsize; RegexPrefix fcPrefix; RegexPrefix scPrefix; RegexPrefix prefix; int anchors; RegexBoyerMoore bmPrefix; bool rtl; // construct sparse capnum mapping if some numbers are unused if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; _caps = null; } else { capsize = tree._capnumlist.Length; _caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { _caps[tree._capnumlist[i]] = i; } } _counting = true; for (;;) { if (!_counting) { _emitted = new int[_count]; } curNode = tree._root; curChild = 0; Emit(RegexCode.Lazybranch, 0); for (;;) { if (curNode._children == null) { EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = (RegexNode)curNode._children[curChild]; PushInt(curChild); curChild = 0; continue; } if (EmptyStack()) { break; } curChild = PopInt(); curNode = curNode._next; EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } PatchJump(0, CurPos()); Emit(RegexCode.Stop); if (!_counting) { break; } _counting = false; } // if the set of possible first chars is very large, // don't bother scanning for it (common case: . == [^\n]) fcPrefix = RegexFCD.FirstChars(tree); // REVIEW : ChrisAn/DavidGut, 11/21/2000 - Huh... this code used to // : read "> 0XFFF", note the CAPITAL X... everything is golden, // : except that this really evaluates to 0 in the C# compiler. // : // : However! begining in CSC 9055 0XFFF will attempted to be // : evaluated as a float, causing a compiler error. So switching // : the constant to "0xFFF", note the lowercase x, causes // : everything to fail. // : // : What is this code really supposed to do???! // if (fcPrefix != null && RegexCharClass.SetSize(fcPrefix.Prefix) > 0) { fcPrefix = null; } // REVIEW: is this even used anywhere? Can we use it somehow? scPrefix = null; //RegexFCD.ScanChars(tree); prefix = RegexFCD.Prefix(tree); rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } anchors = RegexFCD.Anchors(tree); return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, scPrefix, anchors, rtl)); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { // construct sparse capnum mapping if some numbers are unused int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } RegexNode?curNode = tree.Root; int curChild = 0; Emit(RegexCode.Lazybranch, 0); while (true) { if (curNode.Children == null) { EmitFragment(curNode.NType, curNode, 0); } else if (curChild < curNode.Children.Count) { EmitFragment(curNode.NType | BeforeChild, curNode, curChild); curNode = curNode.Children[curChild]; _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Next; EmitFragment(curNode !.NType | AfterChild, curNode, curChild); curChild++; } PatchJump(0, _emitted.Length); Emit(RegexCode.Stop); RegexPrefix?fcPrefix = RegexFCD.FirstChars(tree); RegexPrefix prefix = RegexFCD.Prefix(tree); bool rtl = ((tree.Options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; RegexBoyerMoore?bmPrefix; if (prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } int anchors = RegexFCD.Anchors(tree); int[] emitted = _emitted.AsSpan().ToArray(); return(new RegexCode(emitted, _stringTable, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
/* * The top level RegexCode generator. It does a depth-first walk * through the tree and calls EmitFragment to emits code before * and after each child of an interior node, and at each leaf. * * It runs two passes, first to count the size of the generated * code, and second to generate the code. * * We should time it against the alternative, which is * to just generate the code and grow the array as we go. */ internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; int capsize; RegexPrefix fcPrefix; RegexPrefix prefix; int anchors; RegexBoyerMoore bmPrefix; bool rtl; // construct sparse capnum mapping if some numbers are unused if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; _caps = null; } else { capsize = tree._capnumlist.Length; _caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { _caps[tree._capnumlist[i]] = i; } } _counting = true; for (; ;) { if (!_counting) { _emitted = new int[_count]; } curNode = tree._root; curChild = 0; Emit(RegexCode.Lazybranch, 0); for (; ;) { if (curNode._children == null) { EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = (RegexNode)curNode._children[curChild]; PushInt(curChild); curChild = 0; continue; } if (EmptyStack()) { break; } curChild = PopInt(); curNode = curNode._next; EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } PatchJump(0, CurPos()); Emit(RegexCode.Stop); if (!_counting) { break; } _counting = false; } fcPrefix = RegexFCD.FirstChars(tree); prefix = RegexFCD.Prefix(tree); rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } anchors = RegexFCD.Anchors(tree); return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { int length; RegexBoyerMoore moore; if ((tree._capnumlist == null) || (tree._captop == tree._capnumlist.Length)) { length = tree._captop; this._caps = null; } else { length = tree._capnumlist.Length; this._caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { this._caps[tree._capnumlist[i]] = i; } } this._counting = true; Label_0076: if (!this._counting) { this._emitted = new int[this._count]; } RegexNode node = tree._root; int curIndex = 0; this.Emit(0x17, 0); Label_00A1: if (node._children == null) { this.EmitFragment(node._type, node, 0); } else if (curIndex < node._children.Count) { this.EmitFragment(node._type | 0x40, node, curIndex); node = (RegexNode)node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; goto Label_00A1; } if (!this.EmptyStack()) { curIndex = this.PopInt(); node = node._next; this.EmitFragment(node._type | 0x80, node, curIndex); curIndex++; goto Label_00A1; } this.PatchJump(0, this.CurPos()); this.Emit(40); if (this._counting) { this._counting = false; goto Label_0076; } RegexPrefix fcPrefix = RegexFCD.FirstChars(tree); if ((fcPrefix != null) && (RegexCharClass.SetSize(fcPrefix.Prefix) > 0)) { fcPrefix = null; } RegexPrefix scPrefix = null; RegexPrefix prefix3 = RegexFCD.Prefix(tree); bool rightToLeft = (tree._options & RegexOptions.RightToLeft) != RegexOptions.None; CultureInfo culture = ((tree._options & RegexOptions.CultureInvariant) != RegexOptions.None) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if ((prefix3 != null) && (prefix3.Prefix.Length > 0)) { moore = new RegexBoyerMoore(prefix3.Prefix, prefix3.CaseInsensitive, rightToLeft, culture); } else { moore = null; } return(new RegexCode(this._emitted, this._stringtable, this._trackcount, this._caps, length, moore, fcPrefix, scPrefix, RegexFCD.Anchors(tree), rightToLeft)); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { Span <int> emittedSpan = stackalloc int[EmittedSize]; Span <int> intStackSpan = stackalloc int[IntStackSize]; RegexWriter writer = new RegexWriter(emittedSpan, intStackSpan); // construct sparse capnum mapping if some numbers are unused int capsize; if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; writer._caps = null; } else { capsize = tree._capnumlist.Length; writer._caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { writer._caps[tree._capnumlist[i]] = i; } } RegexNode curNode = tree._root; int curChild = 0; writer.Emit(RegexCode.Lazybranch, 0); for (; ;) { if (curNode._children == null) { writer.EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { writer.EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = curNode._children[curChild]; writer._intStack.Append(curChild); curChild = 0; continue; } if (writer._intStack.Length == 0) { break; } curChild = writer._intStack.Pop(); curNode = curNode._next; writer.EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } writer.PatchJump(0, writer._emitted.Length); writer.Emit(RegexCode.Stop); RegexPrefix fcPrefix = RegexFCD.FirstChars(tree); RegexPrefix prefix = RegexFCD.Prefix(tree); bool rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; RegexBoyerMoore bmPrefix; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } int anchors = RegexFCD.Anchors(tree); int[] emitted = writer._emitted.AsReadOnlySpan().ToArray(); // Cleaning up and returning the borrowed arrays writer._emitted.Dispose(); writer._intStack.Dispose(); return(new RegexCode(emitted, writer._stringTable, writer._trackCount, writer._caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emit code before /// and after each child of an interior node and at each leaf. /// It also computes various information about the tree, such as /// prefix data to help with optimizations. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { // Construct sparse capnum mapping if some numbers are unused. int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } // Every written code begins with a lazy branch. This will be back-patched // to point to the ending Stop after the whole expression has been written. Emit(RegexCode.Lazybranch, 0); // Emit every node. RegexNode curNode = tree.Root; int curChild = 0; while (true) { int curNodeChildCount = curNode.ChildCount(); if (curNodeChildCount == 0) { EmitFragment(curNode.Type, curNode, 0); } else if (curChild < curNodeChildCount) { EmitFragment(curNode.Type | BeforeChild, curNode, curChild); curNode = curNode.Child(curChild); _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Next !; EmitFragment(curNode.Type | AfterChild, curNode, curChild); curChild++; } // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array. PatchJump(0, _emitted.Length); Emit(RegexCode.Stop); int[] emitted = _emitted.AsSpan().ToArray(); bool rtl = (tree.Options & RegexOptions.RightToLeft) != 0; // Compute prefixes to help optimize FindFirstChar. RegexBoyerMoore?bmPrefix = null; RegexPrefix? fcPrefix = null; RegexPrefix prefix = RegexFCD.Prefix(tree); if (prefix.Prefix.Length > 1 && prefix.Prefix.Length <= RegexBoyerMoore.MaxLimit) // if it's <= 1 || > MaxLimit, perf is better using fcPrefix { // Compute a Boyer-Moore prefix if we find a single string of sufficient length that always begins the expression. CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { // If we didn't find such a string, try to compute the characters set that might begin the string. fcPrefix = RegexFCD.FirstChars(tree); } // Compute any anchors starting the expression. int anchors = RegexFCD.Anchors(tree); // Convert the string table into an ordered string array/ var strings = new string[_stringTable.Count]; foreach (KeyValuePair <string, int> stringEntry in _stringTable) { strings[stringEntry.Value] = stringEntry.Key; } // Return all that in a RegexCode object. return(new RegexCode(tree, emitted, strings, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }