/// <summary> /// The top level RegexInterpreterCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emit code before /// and after each child of an interior node and at each leaf. /// It also computes various information about the tree, such as /// prefix data to help with optimizations. /// </summary> private RegexInterpreterCode EmitCode() { // Every written code begins with a lazy branch. This will be back-patched // to point to the ending Stop after the whole expression has been written. Emit(RegexOpcode.Lazybranch, 0); // Emit every node. RegexNode curNode = _tree.Root; int curChild = 0; while (true) { int curNodeChildCount = curNode.ChildCount(); if (curNodeChildCount == 0) { EmitFragment(curNode.Kind, curNode, 0); } else if (curChild < curNodeChildCount) { EmitFragment(curNode.Kind | BeforeChild, curNode, curChild); curNode = curNode.Child(curChild); _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Parent !; EmitFragment(curNode.Kind | AfterChild, curNode, curChild); curChild++; } // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array. PatchJump(0, _emitted.Length); Emit(RegexOpcode.Stop); int[] emitted = _emitted.AsSpan().ToArray(); // Convert the string table into an ordered string array. var strings = new string[_stringTable.Count]; foreach (KeyValuePair <string, int> stringEntry in _stringTable) { strings[stringEntry.Value] = stringEntry.Key; } // Return all that in a RegexCode object. return(new RegexInterpreterCode(_tree.FindOptimizations, _tree.Options, emitted, strings, _trackCount)); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { // construct sparse capnum mapping if some numbers are unused int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } RegexNode?curNode = tree.Root; int curChild = 0; Emit(RegexCode.Lazybranch, 0); while (true) { if (curNode.Children == null) { EmitFragment(curNode.NType, curNode, 0); } else if (curChild < curNode.Children.Count) { EmitFragment(curNode.NType | BeforeChild, curNode, curChild); curNode = curNode.Children[curChild]; _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Next; EmitFragment(curNode !.NType | AfterChild, curNode, curChild); curChild++; } PatchJump(0, _emitted.Length); Emit(RegexCode.Stop); RegexPrefix?fcPrefix = RegexFCD.FirstChars(tree); RegexPrefix prefix = RegexFCD.Prefix(tree); bool rtl = ((tree.Options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; RegexBoyerMoore?bmPrefix; if (prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } int anchors = RegexFCD.Anchors(tree); int[] emitted = _emitted.AsSpan().ToArray(); return(new RegexCode(emitted, _stringTable, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emit code before /// and after each child of an interior node and at each leaf. /// It also computes various information about the tree, such as /// prefix data to help with optimizations. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture) { // Construct sparse capnum mapping if some numbers are unused. int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } // Every written code begins with a lazy branch. This will be back-patched // to point to the ending Stop after the whole expression has been written. Emit(RegexOpcode.Lazybranch, 0); // Emit every node. RegexNode curNode = tree.Root; int curChild = 0; while (true) { int curNodeChildCount = curNode.ChildCount(); if (curNodeChildCount == 0) { EmitFragment(curNode.Kind, curNode, 0); } else if (curChild < curNodeChildCount) { EmitFragment(curNode.Kind | BeforeChild, curNode, curChild); curNode = curNode.Child(curChild); _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Parent !; EmitFragment(curNode.Kind | AfterChild, curNode, curChild); curChild++; } // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array. PatchJump(0, _emitted.Length); Emit(RegexOpcode.Stop); int[] emitted = _emitted.AsSpan().ToArray(); // Convert the string table into an ordered string array. var strings = new string[_stringTable.Count]; foreach (KeyValuePair <string, int> stringEntry in _stringTable) { strings[stringEntry.Value] = stringEntry.Key; } // Return all that in a RegexCode object. return(new RegexCode(tree, culture, emitted, strings, _trackCount, _caps, capsize)); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emit code before /// and after each child of an interior node and at each leaf. /// It also computes various information about the tree, such as /// prefix data to help with optimizations. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { // Construct sparse capnum mapping if some numbers are unused. int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } // Every written code begins with a lazy branch. This will be back-patched // to point to the ending Stop after the whole expression has been written. Emit(RegexCode.Lazybranch, 0); // Emit every node. RegexNode curNode = tree.Root; int curChild = 0; while (true) { int curNodeChildCount = curNode.ChildCount(); if (curNodeChildCount == 0) { EmitFragment(curNode.Type, curNode, 0); } else if (curChild < curNodeChildCount) { EmitFragment(curNode.Type | BeforeChild, curNode, curChild); curNode = curNode.Child(curChild); _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Next !; EmitFragment(curNode.Type | AfterChild, curNode, curChild); curChild++; } // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array. PatchJump(0, _emitted.Length); Emit(RegexCode.Stop); int[] emitted = _emitted.AsSpan().ToArray(); bool rtl = (tree.Options & RegexOptions.RightToLeft) != 0; bool compiled = (tree.Options & RegexOptions.Compiled) != 0; // Compute prefixes to help optimize FindFirstChar. RegexBoyerMoore?boyerMoorePrefix = null; (string CharClass, bool CaseInsensitive)[]? leadingCharClasses = null;
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emit code before /// and after each child of an interior node and at each leaf. /// It also computes various information about the tree, such as /// prefix data to help with optimizations. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { // Construct sparse capnum mapping if some numbers are unused. int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } // Every written code begins with a lazy branch. This will be back-patched // to point to the ending Stop after the whole expression has been written. Emit(RegexCode.Lazybranch, 0); // Emit every node. RegexNode curNode = tree.Root; int curChild = 0; while (true) { int curNodeChildCount = curNode.ChildCount(); if (curNodeChildCount == 0) { EmitFragment(curNode.Type, curNode, 0); } else if (curChild < curNodeChildCount) { EmitFragment(curNode.Type | BeforeChild, curNode, curChild); curNode = curNode.Child(curChild); _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Next !; EmitFragment(curNode.Type | AfterChild, curNode, curChild); curChild++; } // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array. PatchJump(0, _emitted.Length); Emit(RegexCode.Stop); int[] emitted = _emitted.AsSpan().ToArray(); bool rtl = (tree.Options & RegexOptions.RightToLeft) != 0; // Compute prefixes to help optimize FindFirstChar. RegexBoyerMoore?bmPrefix = null; RegexPrefix? fcPrefix = null; RegexPrefix prefix = RegexFCD.Prefix(tree); if (prefix.Prefix.Length > 1 && prefix.Prefix.Length <= RegexBoyerMoore.MaxLimit) // if it's <= 1 || > MaxLimit, perf is better using fcPrefix { // Compute a Boyer-Moore prefix if we find a single string of sufficient length that always begins the expression. CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { // If we didn't find such a string, try to compute the characters set that might begin the string. fcPrefix = RegexFCD.FirstChars(tree); } // Compute any anchors starting the expression. int anchors = RegexFCD.Anchors(tree); // Convert the string table into an ordered string array/ var strings = new string[_stringTable.Count]; foreach (KeyValuePair <string, int> stringEntry in _stringTable) { strings[stringEntry.Value] = stringEntry.Key; } // Return all that in a RegexCode object. return(new RegexCode(tree, emitted, strings, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
private int PopInt() { return(_intStack.Pop()); }