/* * This is a related computation: it takes a RegexTree and computes the * leading substring if it see one. It's quite trivial and gives up easily. */ internal static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; curNode = tree._root; for (;;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Onelazy: if (curNode._m > 0) { string pref = String.Empty.PadRight(curNode._m, curNode._ch); return new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase)); } else return RegexPrefix.Empty; case RegexNode.One: return new RegexPrefix(curNode._ch.ToString(CultureInfo.InvariantCulture), 0 != (curNode._options & RegexOptions.IgnoreCase)); case RegexNode.Multi: return new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return RegexPrefix.Empty; } if (concatNode == null || nextChild >= concatNode.ChildCount()) return RegexPrefix.Empty; curNode = concatNode.Child(nextChild++); } }
internal static int Anchors(RegexTree tree) { RegexNode node2 = null; int num = 0; int num2 = 0; RegexNode node = tree._root; Label_000D: switch (node._type) { case 14: case 15: case 0x10: case 0x12: case 0x13: case 20: case 0x15: case 0x29: return (num2 | AnchorFromType(node._type)); case 0x11: case 0x16: case 0x18: case 0x1a: case 0x1b: case 0x1d: return num2; case 0x17: case 30: case 0x1f: break; case 0x19: if (node.ChildCount() > 0) { node2 = node; num = 0; } break; case 0x1c: case 0x20: node = node.Child(0); node2 = null; goto Label_000D; default: return num2; } if ((node2 == null) || (num >= node2.ChildCount())) { return num2; } node = node2.Child(num++); goto Label_000D; }
// This is the only function that should be called from outside. // It takes a RegexTree and creates a corresponding RegexCode. internal static RegexCode Write(RegexTree t) { RegexWriter w = new RegexWriter(); RegexCode retval = w.RegexCodeFromRegexTree(t); #if DBG if (t.Debug) { retval.Dump(); } #endif return retval; }
/* * This is the one of the only two functions that should be called from outside. * It takes a RegexTree and computes the set of chars that can start it. */ internal static RegexPrefix FirstChars(RegexTree t) { RegexFCD s = new RegexFCD(); RegexFC fc = s.RegexFCFromRegexTree(t); if (fc == null || fc._nullable) return null; CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; return new RegexPrefix(fc.GetFirstChars(culture), fc.IsCaseInsensitive()); }
/// <summary> /// This is the only function that should be called from outside. /// It takes a RegexTree and creates a corresponding RegexCode. /// </summary> public static RegexCode Write(RegexTree tree, CultureInfo culture) { var writer = new RegexWriter(stackalloc int[EmittedSize], stackalloc int[IntStackSize]); RegexCode code = writer.RegexCodeFromRegexTree(tree, culture); writer.Dispose(); #if DEBUG if (tree.Debug) { tree.Dump(); code.Dump(); } #endif return(code); }
/// <summary> /// This is the only function that should be called from outside. /// It takes a RegexTree and creates a corresponding RegexCode. /// </summary> internal static RegexCode Write(RegexTree t) { Span <int> emittedSpan = stackalloc int[EmittedSize]; Span <int> intStackSpan = stackalloc int[IntStackSize]; var w = new RegexWriter(emittedSpan, intStackSpan); RegexCode retval = w.RegexCodeFromRegexTree(t); #if DEBUG if (t.Debug) { t.Dump(); retval.Dump(); } #endif return(retval); }
public readonly bool RightToLeft; // true if right to left public RegexCode(RegexTree tree, int[] codes, string[] strings, int trackcount, Hashtable?caps, int capsize, RegexBoyerMoore?bmPrefix, RegexPrefix?fcPrefix, int anchors, bool rightToLeft) { Tree = tree; Codes = codes; Strings = strings; StringsAsciiLookup = new int[strings.Length][]; TrackCount = trackcount; Caps = caps; CapSize = capsize; BMPrefix = bmPrefix; FCPrefix = fcPrefix; Anchors = anchors; RightToLeft = rightToLeft; }
private RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode node = tree._root; int curIndex = 0; Label_0009: if (node._children == null) { this.CalculateFC(node._type, node, 0); } else if ((curIndex < node._children.Count) && !this._skipAllChildren) { this.CalculateFC(node._type | 0x40, node, curIndex); if (!this._skipchild) { node = node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; } else { curIndex++; this._skipchild = false; } goto Label_0009; } this._skipAllChildren = false; if (!this.IntIsEmpty()) { curIndex = this.PopInt(); node = node._next; this.CalculateFC(node._type | 0x80, node, curIndex); if (this._failed) { return(null); } curIndex++; goto Label_0009; } if (this.FCIsEmpty()) { return(null); } return(this.PopFC()); }
/// <summary>Stores the supplied arguments and capture information, returning the parsed expression.</summary> private RegexTree Init(string pattern, RegexOptions options, TimeSpan matchTimeout, [NotNull] ref CultureInfo?culture) { this.pattern = pattern; roptions = options; internalMatchTimeout = matchTimeout; culture ??= RegexParser.GetTargetCulture(options); // Parse the pattern. RegexTree tree = RegexParser.Parse(pattern, options, culture); // Store the relevant information, constructing the appropriate factory. capnames = tree.CaptureNameToNumberMapping; capslist = tree.CaptureNames; caps = tree.CaptureNumberSparseMapping; capsize = tree.CaptureCount; return(tree); }
/// <summary> /// This is the one of the only two functions that should be called from outside. /// It takes a RegexTree and computes the set of chars that can start it. /// </summary> public static RegexPrefix?FirstChars(RegexTree t) { // Create/rent buffers Span <int> intSpan = stackalloc int[StackBufferSize]; RegexFCD s = new RegexFCD(intSpan); RegexFC fc = s.RegexFCFromRegexTree(t); s.Dispose(); if (fc == null || fc._nullable) { return(null); } CultureInfo culture = ((t.Options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; return(new RegexPrefix(fc.GetFirstChars(culture), fc.CaseInsensitive)); }
/// <summary> /// This is the one of the only two functions that should be called from outside. /// It takes a RegexTree and computes the set of chars that can start it. /// </summary> public static RegexPrefix?FirstChars(RegexTree t) { var s = new RegexFCD(stackalloc int[StackBufferSize]); RegexFC?fc = s.RegexFCFromRegexTree(t); s.Dispose(); if (fc == null || fc._nullable) { return(null); } if (fc.CaseInsensitive) { fc.AddLowercase(((t.Options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture); } return(new RegexPrefix(fc.GetFirstChars(), fc.CaseInsensitive)); }
/// <summary> /// This is the only function that should be called from outside. /// It takes a RegexTree and creates a corresponding RegexCode. /// </summary> public static RegexCode Write(RegexTree tree) { Span <int> emittedSpan = stackalloc int[EmittedSize]; Span <int> intStackSpan = stackalloc int[IntStackSize]; var writer = new RegexWriter(emittedSpan, intStackSpan); RegexCode code = writer.RegexCodeFromRegexTree(tree); writer.Dispose(); #if DEBUG if (tree.Debug) { tree.Dump(); code.Dump(); } #endif return(code); }
private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo?culture) { ValidatePattern(pattern); ValidateOptions(options); ValidateMatchTimeout(matchTimeout); this.pattern = pattern; internalMatchTimeout = matchTimeout; roptions = options; culture ??= GetTargetCulture(options); #if DEBUG if (IsDebug) { Debug.WriteLine($"Pattern: {pattern} Options: {options & ~RegexOptions.Debug} Timeout: {(matchTimeout == InfiniteMatchTimeout ? "infinite" : matchTimeout.ToString())}"); } #endif // Parse the input RegexTree tree = RegexParser.Parse(pattern, roptions, culture); // Generate the RegexCode from the node tree. This is required for interpreting, // and is used as input into RegexOptions.Compiled and RegexOptions.NonBacktracking. _code = RegexWriter.Write(tree); if ((options & RegexOptions.NonBacktracking) != 0) { // NonBacktracking doesn't support captures (other than the implicit top-level capture). capnames = null; capslist = null; caps = null; capsize = 1; } else { capnames = tree.CapNames; capslist = tree.CapsList; caps = _code.Caps; capsize = _code.CapSize; } }
internal RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode node = tree._root; int curIndex = 0; Label_0009: if (node._children == null) { this.CalculateFC(node._type, node, 0); } else if ((curIndex < node._children.Count) && !this._earlyexit) { this.CalculateFC(node._type | 0x40, node, curIndex); if (!this._skipchild) { node = (RegexNode)node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; } else { curIndex++; this._skipchild = false; } goto Label_0009; } this._earlyexit = false; if (!this.EmptyInt()) { curIndex = this.PopInt(); node = node._next; this.CalculateFC(node._type | 0x80, node, curIndex); curIndex++; goto Label_0009; } if (this.EmptyFC()) { return(new RegexFC("\0", true, false)); } return(this.PopFC()); }
private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture) { ValidatePattern(pattern); ValidateOptions(options); ValidateMatchTimeout(matchTimeout); this.pattern = pattern; internalMatchTimeout = matchTimeout; roptions = options; // Parse the input RegexTree tree = RegexParser.Parse(pattern, roptions, culture); // Generate the RegexCode from the node tree. This is required for interpreting, // and is used as input into RegexOptions.Compiled and RegexOptions.NonBacktracking. _code = RegexWriter.Write(tree, culture); capnames = tree.CapNames; capslist = tree.CapsList; caps = _code.Caps; capsize = _code.CapSize; }
/// <summary>Initializes the instance.</summary> /// <remarks> /// This is separated out of the constructor to allow the Regex ctor that doesn't /// take a RegexOptions to avoid rooting the regex compiler, such that it can be trimmed away. /// </remarks> private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo?culture) { ValidatePattern(pattern); ValidateOptions(options); ValidateMatchTimeout(matchTimeout); this.pattern = pattern; roptions = options; internalMatchTimeout = matchTimeout; // Parse the input RegexTree tree = RegexParser.Parse(pattern, roptions, culture ?? ((options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture)); // Extract the relevant information capnames = tree.CapNames; capslist = tree.CapsList; _code = RegexWriter.Write(tree); caps = _code.Caps; capsize = _code.CapSize; InitializeReferences(); }
/// <summary>Initializes the instance.</summary> /// <remarks> /// This is separated out of the constructor so that an app only using 'new Regex(pattern)' /// rather than 'new Regex(pattern, options)' can avoid statically referencing the Regex /// compiler, such that a tree shaker / linker can trim it away if it's not otherwise used. /// </remarks> private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo?culture) { ValidatePattern(pattern); ValidateOptions(options); ValidateMatchTimeout(matchTimeout); this.pattern = pattern; roptions = options; internalMatchTimeout = matchTimeout; #if DEBUG if (IsDebug) { Debug.Write($"Pattern: {pattern}"); RegexOptions displayOptions = options & ~RegexOptions.Debug; if (displayOptions != RegexOptions.None) { Debug.Write($"Options: {displayOptions}"); } if (matchTimeout != InfiniteMatchTimeout) { Debug.Write($"Timeout: {matchTimeout}"); } } #endif // Parse the input RegexTree tree = RegexParser.Parse(pattern, roptions, culture ?? ((options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture)); // Extract the relevant information capnames = tree.CapNames; capslist = tree.CapsList; _code = RegexWriter.Write(tree); caps = _code.Caps; capsize = _code.CapSize; InitializeReferences(); }
/// <summary>The top-level driver. Initializes everything then calls the Generate* methods.</summary> public RegexRunnerFactory?FactoryInstanceFromCode(string pattern, RegexTree regexTree, RegexOptions options, bool hasTimeout) { if (!regexTree.Root.SupportsCompilation(out _)) { return(null); } _regexTree = regexTree; _options = options; _hasTimeout = hasTimeout; // Pick a unique number for the methods we generate. uint regexNum = (uint)Interlocked.Increment(ref s_regexCount); // Get a description of the regex to use in the name. This is helpful when profiling, and is opt-in. string description = string.Empty; if (s_includePatternInName) { const int DescriptionLimit = 100; // arbitrary limit to avoid very long method names description = string.Concat("_", pattern.Length > DescriptionLimit ? pattern.AsSpan(0, DescriptionLimit) : pattern); } DynamicMethod tryfindNextPossibleStartPositionMethod = DefineDynamicMethod($"Regex{regexNum}_TryFindNextPossibleStartingPosition{description}", typeof(bool), typeof(CompiledRegexRunner), s_paramTypes); EmitTryFindNextPossibleStartingPosition(); DynamicMethod tryMatchAtCurrentPositionMethod = DefineDynamicMethod($"Regex{regexNum}_TryMatchAtCurrentPosition{description}", typeof(bool), typeof(CompiledRegexRunner), s_paramTypes); EmitTryMatchAtCurrentPosition(); DynamicMethod scanMethod = DefineDynamicMethod($"Regex{regexNum}_Scan{description}", null, typeof(CompiledRegexRunner), new[] { typeof(RegexRunner), typeof(ReadOnlySpan <char>) }); EmitScan(options, tryfindNextPossibleStartPositionMethod, tryMatchAtCurrentPositionMethod); return(new CompiledRegexRunnerFactory(scanMethod)); }
/* * This is a related computation: it takes a RegexTree and computes the * leading substring if it see one. It's quite trivial and gives up easily. */ internal static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; curNode = tree._root; for (; ;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Onelazy: if (curNode._m > 0) { string pref = String.Empty.PadRight(curNode._m, curNode._ch); return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase))); } else { return(RegexPrefix.Empty); } case RegexNode.One: return(new RegexPrefix(curNode._ch.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Multi: return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(RegexPrefix.Empty); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(RegexPrefix.Empty); } curNode = concatNode.Child(nextChild++); } }
/* * The top level RegexCode generator. It does a depth-first walk * through the tree and calls EmitFragment to emits code before * and after each child of an interior node, and at each leaf. * * It runs two passes, first to count the size of the generated * code, and second to generate the code. * * CONSIDER: we need to time it against the alternative, which is * to just generate the code and grow the array as we go. */ internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; int capsize; RegexPrefix fcPrefix; RegexPrefix scPrefix; RegexPrefix prefix; int anchors; RegexBoyerMoore bmPrefix; bool rtl; // construct sparse capnum mapping if some numbers are unused if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; _caps = null; } else { capsize = tree._capnumlist.Length; _caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { _caps[tree._capnumlist[i]] = i; } } _counting = true; for (;;) { if (!_counting) { _emitted = new int[_count]; } curNode = tree._root; curChild = 0; Emit(RegexCode.Lazybranch, 0); for (;;) { if (curNode._children == null) { EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = (RegexNode)curNode._children[curChild]; PushInt(curChild); curChild = 0; continue; } if (EmptyStack()) { break; } curChild = PopInt(); curNode = curNode._next; EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } PatchJump(0, CurPos()); Emit(RegexCode.Stop); if (!_counting) { break; } _counting = false; } // if the set of possible first chars is very large, // don't bother scanning for it (common case: . == [^\n]) fcPrefix = RegexFCD.FirstChars(tree); // REVIEW : ChrisAn/DavidGut, 11/21/2000 - Huh... this code used to // : read "> 0XFFF", note the CAPITAL X... everything is golden, // : except that this really evaluates to 0 in the C# compiler. // : // : However! begining in CSC 9055 0XFFF will attempted to be // : evaluated as a float, causing a compiler error. So switching // : the constant to "0xFFF", note the lowercase x, causes // : everything to fail. // : // : What is this code really supposed to do???! // if (fcPrefix != null && RegexCharClass.SetSize(fcPrefix.Prefix) > 0) { fcPrefix = null; } // REVIEW: is this even used anywhere? Can we use it somehow? scPrefix = null; //RegexFCD.ScanChars(tree); prefix = RegexFCD.Prefix(tree); rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } anchors = RegexFCD.Anchors(tree); return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, scPrefix, anchors, rtl)); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { // construct sparse capnum mapping if some numbers are unused int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } RegexNode?curNode = tree.Root; int curChild = 0; Emit(RegexCode.Lazybranch, 0); while (true) { if (curNode.Children == null) { EmitFragment(curNode.NType, curNode, 0); } else if (curChild < curNode.Children.Count) { EmitFragment(curNode.NType | BeforeChild, curNode, curChild); curNode = curNode.Children[curChild]; _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Next; EmitFragment(curNode !.NType | AfterChild, curNode, curChild); curChild++; } PatchJump(0, _emitted.Length); Emit(RegexCode.Stop); RegexPrefix?fcPrefix = RegexFCD.FirstChars(tree); RegexPrefix prefix = RegexFCD.Prefix(tree); bool rtl = ((tree.Options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; RegexBoyerMoore?bmPrefix; if (prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } int anchors = RegexFCD.Anchors(tree); int[] emitted = _emitted.AsSpan().ToArray(); return(new RegexCode(emitted, _stringTable, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
internal void GenerateRegexType(String pattern, RegexOptions opts, String name, bool ispublic, RegexCode code, RegexTree tree, Type factory) { FieldInfo patternF = RegexField("pattern"); FieldInfo optionsF = RegexField("roptions"); FieldInfo factoryF = RegexField("factory"); FieldInfo capsF = RegexField("caps"); FieldInfo capnamesF = RegexField("capnames"); FieldInfo capslistF = RegexField("capslist"); FieldInfo capsizeF = RegexField("capsize"); Type[] noTypeArray = new Type[0]; ConstructorBuilder cbuilder; DefineType(name, ispublic, typeof(Regex)); { // define constructor _methbuilder = null; MethodAttributes ma = System.Reflection.MethodAttributes.Public; cbuilder = _typebuilder.DefineConstructor(ma, CallingConventions.Standard, noTypeArray); _ilg = cbuilder.GetILGenerator(); { // call base constructor Ldthis(); _ilg.Emit(OpCodes.Call, typeof(Regex).GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null, new Type[0], new ParameterModifier[0])); // set pattern Ldthis(); Ldstr(pattern); Stfld(patternF); // set options Ldthis(); Ldc((int) opts); Stfld(optionsF); // set factory Ldthis(); Newobj(factory.GetConstructor(noTypeArray)); Stfld(factoryF); // set caps if (code._caps != null) GenerateCreateHashtable(capsF, code._caps); // set capnames if (tree._capnames != null) GenerateCreateHashtable(capnamesF, tree._capnames); // set capslist if (tree._capslist != null) { Ldthis(); Ldc(tree._capslist.Length); _ilg.Emit(OpCodes.Newarr, typeof(String)); // create new string array Stfld(capslistF); for (int i=0; i< tree._capslist.Length; i++) { Ldthisfld(capslistF); Ldc(i); Ldstr(tree._capslist[i]); _ilg.Emit(OpCodes.Stelem_Ref); } } // set capsize Ldthis(); Ldc(code._capsize); Stfld(capsizeF); // set runnerref and replref by calling InitializeReferences() Ldthis(); Call(typeof(Regex).GetMethod("InitializeReferences", BindingFlags.Instance | BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic)); Ret(); } } // bake the constructor and type, then save the assembly cbuilder = null; _typebuilder.CreateType(); _ilg = null; _typebuilder = null; }
// The main FC computation. It does a shortcutted depth-first walk // through the tree and calls CalculateFC to emits code before // and after each child of an interior node, and at each leaf. internal RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; curNode = tree._root; curChild = 0; for (;;) { if (curNode._children == null) { CalculateFC(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count && !_earlyexit) { CalculateFC(curNode._type | BeforeChild, curNode, curChild); if (!_skipchild) { curNode = (RegexNode)curNode._children[curChild]; PushInt(curChild); curChild = 0; } else { curChild++; _skipchild = false; } continue; } _earlyexit = false; if (EmptyInt()) break; curChild = PopInt(); curNode = curNode._next; CalculateFC(curNode._type | AfterChild, curNode, curChild); curChild++; } if (EmptyFC()) return new RegexFC(RegexCharClass.Any, true, false); return PopFC(); }
internal static RegexPrefix Prefix(RegexTree tree) { RegexNode node2 = null; int num = 0; RegexNode node = tree._root; Label_000B: switch (node._type) { case 3: case 6: if (node._m <= 0) { return RegexPrefix.Empty; } return new RegexPrefix(string.Empty.PadRight(node._m, node._ch), RegexOptions.None != (node._options & RegexOptions.IgnoreCase)); case 9: return new RegexPrefix(node._ch.ToString(CultureInfo.InvariantCulture), RegexOptions.None != (node._options & RegexOptions.IgnoreCase)); case 12: return new RegexPrefix(node._str, RegexOptions.None != (node._options & RegexOptions.IgnoreCase)); case 14: case 15: case 0x10: case 0x12: case 0x13: case 20: case 0x15: case 0x17: case 30: case 0x1f: case 0x29: break; case 0x19: if (node.ChildCount() > 0) { node2 = node; num = 0; } break; case 0x1c: case 0x20: node = node.Child(0); node2 = null; goto Label_000B; default: return RegexPrefix.Empty; } if ((node2 == null) || (num >= node2.ChildCount())) { return RegexPrefix.Empty; } node = node2.Child(num++); goto Label_000B; }
/* * Yet another related computation: it takes a RegexTree and computes the * leading anchors that it encounters. */ internal static int Anchors(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; int result = 0; curNode = tree._root; for (; ;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: return result | AnchorFromType(curNode._type); case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return result; } if (concatNode == null || nextChild >= concatNode.ChildCount()) return result; curNode = concatNode.Child(nextChild++); } }
internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { int length; RegexBoyerMoore moore; if ((tree._capnumlist == null) || (tree._captop == tree._capnumlist.Length)) { length = tree._captop; this._caps = null; } else { length = tree._capnumlist.Length; this._caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { this._caps[tree._capnumlist[i]] = i; } } this._counting = true; Label_0076: if (!this._counting) { this._emitted = new int[this._count]; } RegexNode node = tree._root; int curIndex = 0; this.Emit(0x17, 0); Label_00A1: if (node._children == null) { this.EmitFragment(node._type, node, 0); } else if (curIndex < node._children.Count) { this.EmitFragment(node._type | 0x40, node, curIndex); node = (RegexNode)node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; goto Label_00A1; } if (!this.EmptyStack()) { curIndex = this.PopInt(); node = node._next; this.EmitFragment(node._type | 0x80, node, curIndex); curIndex++; goto Label_00A1; } this.PatchJump(0, this.CurPos()); this.Emit(40); if (this._counting) { this._counting = false; goto Label_0076; } RegexPrefix fcPrefix = RegexFCD.FirstChars(tree); if ((fcPrefix != null) && (RegexCharClass.SetSize(fcPrefix.Prefix) > 0)) { fcPrefix = null; } RegexPrefix scPrefix = null; RegexPrefix prefix3 = RegexFCD.Prefix(tree); bool rightToLeft = (tree._options & RegexOptions.RightToLeft) != RegexOptions.None; CultureInfo culture = ((tree._options & RegexOptions.CultureInvariant) != RegexOptions.None) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if ((prefix3 != null) && (prefix3.Prefix.Length > 0)) { moore = new RegexBoyerMoore(prefix3.Prefix, prefix3.CaseInsensitive, rightToLeft, culture); } else { moore = null; } return(new RegexCode(this._emitted, this._stringtable, this._trackcount, this._caps, length, moore, fcPrefix, scPrefix, RegexFCD.Anchors(tree), rightToLeft)); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { Span <int> emittedSpan = stackalloc int[EmittedSize]; Span <int> intStackSpan = stackalloc int[IntStackSize]; RegexWriter writer = new RegexWriter(emittedSpan, intStackSpan); // construct sparse capnum mapping if some numbers are unused int capsize; if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; writer._caps = null; } else { capsize = tree._capnumlist.Length; writer._caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { writer._caps[tree._capnumlist[i]] = i; } } RegexNode curNode = tree._root; int curChild = 0; writer.Emit(RegexCode.Lazybranch, 0); for (; ;) { if (curNode._children == null) { writer.EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { writer.EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = curNode._children[curChild]; writer._intStack.Append(curChild); curChild = 0; continue; } if (writer._intStack.Length == 0) { break; } curChild = writer._intStack.Pop(); curNode = curNode._next; writer.EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } writer.PatchJump(0, writer._emitted.Length); writer.Emit(RegexCode.Stop); RegexPrefix fcPrefix = RegexFCD.FirstChars(tree); RegexPrefix prefix = RegexFCD.Prefix(tree); bool rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; RegexBoyerMoore bmPrefix; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } int anchors = RegexFCD.Anchors(tree); int[] emitted = writer._emitted.AsReadOnlySpan().ToArray(); // Cleaning up and returning the borrowed arrays writer._emitted.Dispose(); writer._intStack.Dispose(); return(new RegexCode(emitted, writer._stringTable, writer._trackCount, writer._caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
internal static RegexCode Write(RegexTree t) { RegexWriter writer = new RegexWriter(); return writer.RegexCodeFromRegexTree(t); }
internal RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode node = tree._root; int curIndex = 0; Label_0009: if (node._children == null) { this.CalculateFC(node._type, node, 0); } else if ((curIndex < node._children.Count) && !this._earlyexit) { this.CalculateFC(node._type | 0x40, node, curIndex); if (!this._skipchild) { node = (RegexNode) node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; } else { curIndex++; this._skipchild = false; } goto Label_0009; } this._earlyexit = false; if (!this.EmptyInt()) { curIndex = this.PopInt(); node = node._next; this.CalculateFC(node._type | 0x80, node, curIndex); curIndex++; goto Label_0009; } if (this.EmptyFC()) { return new RegexFC("\0", true, false); } return this.PopFC(); }
internal static RegexPrefix ScanChars(RegexTree tree) { RegexNode node2 = null; int num = 0; string prefix = null; bool ci = false; RegexNode node = tree._root; Label_0010: switch (node._type) { case 3: case 6: if (node._n == 0x7fffffff) { prefix = RegexCharClass.SetFromChar(node._ch); ci = RegexOptions.None != (node._options & RegexOptions.IgnoreCase); break; } return null; case 4: case 7: if (node._n == 0x7fffffff) { prefix = RegexCharClass.SetInverseFromChar(node._ch); ci = RegexOptions.None != (node._options & RegexOptions.IgnoreCase); break; } return null; case 5: case 8: if ((node._n == 0x7fffffff) && ((node._str2 == null) || (node._str2.Length == 0))) { prefix = node._str; ci = RegexOptions.None != (node._options & RegexOptions.IgnoreCase); break; } return null; case 14: case 15: case 0x10: case 0x12: case 0x13: case 20: case 0x15: case 0x17: case 30: case 0x1f: case 0x29: break; case 0x19: if (node.ChildCount() > 0) { node2 = node; num = 0; } break; case 0x1c: case 0x20: node = node.Child(0); node2 = null; goto Label_0010; default: return null; } if (prefix != null) { return new RegexPrefix(prefix, ci); } if ((node2 == null) || (num >= node2.ChildCount())) { return null; } node = node2.Child(num++); goto Label_0010; }
/// <summary>Analyzes a <see cref="RegexInterpreterCode"/> to learn about the structure of the tree.</summary> public static AnalysisResults Analyze(RegexTree regexTree) { var results = new AnalysisResults(regexTree); results._complete = TryAnalyze(regexTree.Root, results, isAtomicByAncestor: true, isInLoop: false); return(results);
// This is a related computation: it takes a RegexTree and computes the // leading []* construct if it see one. It's quite trivial and gives up easily. internal static RegexPrefix ScanChars(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; String foundSet = null; bool caseInsensitive = false; curNode = tree._root; for (;;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; case RegexNode.Oneloop: case RegexNode.Onelazy: if (curNode._n != infinite) return null; foundSet = RegexCharClass.SetFromChar(curNode._ch); caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase)); break; case RegexNode.Notoneloop: case RegexNode.Notonelazy: if (curNode._n != infinite) return null; foundSet = RegexCharClass.SetInverseFromChar(curNode._ch); caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase)); break; case RegexNode.Setloop: case RegexNode.Setlazy: if (curNode._n != infinite || (curNode._str2 != null && curNode._str2.Length != 0)) return null; foundSet = curNode._str; caseInsensitive = (0 != (curNode._options & RegexOptions.IgnoreCase)); break; default: return null; } if (foundSet != null) return new RegexPrefix(foundSet, caseInsensitive); if (concatNode == null || nextChild >= concatNode.ChildCount()) return null; curNode = concatNode.Child(nextChild++); } }
internal static RegexPrefix Prefix(RegexTree tree) { RegexNode node2 = null; int num2; int num = 0; RegexNode node = tree._root; Label_000B: num2 = node._type; switch (num2) { case 3: case 6: case 12: num2 = node._type; switch (num2) { case 3: case 6: if (node._m > 0) { StringBuilder builder = new StringBuilder(); builder.Append(node._ch, node._m); return new RegexPrefix(builder.ToString(), RegexOptions.None != (node._options & RegexOptions.IgnoreCase)); } goto Label_0151; } if (num2 != 12) { goto Label_0151; } return new RegexPrefix(node._str, RegexOptions.None != (node._options & RegexOptions.IgnoreCase)); case 14: case 15: case 0x10: case 0x12: case 0x13: case 20: case 0x15: case 0x17: case 30: case 0x1f: case 0x29: break; case 0x19: if (node.ChildCount() > 0) { node2 = node; num = 0; } break; case 0x1c: case 0x20: node = node.Child(0); node2 = null; goto Label_000B; default: return RegexPrefix.Empty; } if ((node2 == null) || (num >= node2.ChildCount())) { return RegexPrefix.Empty; } node = node2.Child(num++); goto Label_000B; Label_0151: return RegexPrefix.Empty; }
/// <summary>Computes a character class for the first character in <paramref name="tree"/>.</summary> /// <remarks>true if a character class could be computed; otherwise, false.</remarks> public static (string CharClass, bool CaseInsensitive)[]? ComputeFirstCharClass(RegexTree tree) { var s = new RegexPrefixAnalyzer(stackalloc int[StackBufferSize]); RegexFC?fc = s.RegexFCFromRegexTree(tree); s.Dispose(); if (fc == null || fc._nullable) { return(null); } if (fc.CaseInsensitive) { fc.AddLowercase(((tree.Options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture); } return(new[] { (fc.GetFirstChars(), fc.CaseInsensitive) });
/* * The main FC computation. It does a shortcutted depth-first walk * through the tree and calls CalculateFC to emits code before * and after each child of an interior node, and at each leaf. */ private RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; curNode = tree._root; curChild = 0; for (; ;) { if (curNode._children == null) { // This is a leaf node CalculateFC(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count && !_skipAllChildren) { // This is an interior node, and we have more children to analyze CalculateFC(curNode._type | BeforeChild, curNode, curChild); if (!_skipchild) { curNode = (RegexNode)curNode._children[curChild]; // this stack is how we get a depth first walk of the tree. PushInt(curChild); curChild = 0; } else { curChild++; _skipchild = false; } continue; } // This is an interior node where we've finished analyzing all the children, or // the end of a leaf node. _skipAllChildren = false; if (IntIsEmpty()) break; curChild = PopInt(); curNode = curNode._next; CalculateFC(curNode._type | AfterChild, curNode, curChild); if (_failed) return null; curChild++; } if (FCIsEmpty()) return null; return PopFC(); }
/// <summary>Computes the leading substring in <paramref name="tree"/>.</summary> /// <remarks>It's quite trivial and gives up easily, in which case an empty string is returned.</remarks> public static (string Prefix, bool CaseInsensitive) ComputeLeadingSubstring(RegexTree tree) { RegexNode curNode = tree.Root; RegexNode?concatNode = null; int nextChild = 0; while (true) { switch (curNode.Type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Atomic: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Oneloopatomic: case RegexNode.Onelazy: // In release, cutoff at a length to which we can still reasonably construct a string and Boyer-Moore search. // In debug, use a smaller cutoff to exercise the cutoff path in tests const int Cutoff = #if DEBUG 50; #else RegexBoyerMoore.MaxLimit; #endif if (curNode.M > 0 && curNode.M < Cutoff) { return(new string(curNode.Ch, curNode.M), (curNode.Options & RegexOptions.IgnoreCase) != 0); } return(string.Empty, false); case RegexNode.One: return(curNode.Ch.ToString(), (curNode.Options & RegexOptions.IgnoreCase) != 0); case RegexNode.Multi: return(curNode.Str !, (curNode.Options & RegexOptions.IgnoreCase) != 0); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(string.Empty, false); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(string.Empty, false); } curNode = concatNode.Child(nextChild++); } }
// This is a related computation: it takes a RegexTree and computes the // leading substring if it see one. It's quite trivial and gives up easily. internal static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; curNode = tree._root; for (;;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Onelazy: case RegexNode.Multi: goto OuterloopBreak; case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return RegexPrefix.Empty; } if (concatNode == null || nextChild >= concatNode.ChildCount()) return RegexPrefix.Empty; curNode = concatNode.Child(nextChild++); } OuterloopBreak: ; switch (curNode._type) { case RegexNode.Multi: return new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase)); case RegexNode.Oneloop: goto case RegexNode.Onelazy; case RegexNode.Onelazy: if (curNode._m > 0) { StringBuilder sb = new StringBuilder(); sb.Append(curNode._ch, curNode._m); return new RegexPrefix(sb.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase)); } // else fallthrough goto default; default: return RegexPrefix.Empty; } }
internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { int length; RegexBoyerMoore moore; if ((tree._capnumlist == null) || (tree._captop == tree._capnumlist.Length)) { length = tree._captop; this._caps = null; } else { length = tree._capnumlist.Length; this._caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { this._caps[tree._capnumlist[i]] = i; } } this._counting = true; Label_0076: if (!this._counting) { this._emitted = new int[this._count]; } RegexNode node = tree._root; int curIndex = 0; this.Emit(0x17, 0); Label_00A1: if (node._children == null) { this.EmitFragment(node._type, node, 0); } else if (curIndex < node._children.Count) { this.EmitFragment(node._type | 0x40, node, curIndex); node = (RegexNode) node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; goto Label_00A1; } if (!this.EmptyStack()) { curIndex = this.PopInt(); node = node._next; this.EmitFragment(node._type | 0x80, node, curIndex); curIndex++; goto Label_00A1; } this.PatchJump(0, this.CurPos()); this.Emit(40); if (this._counting) { this._counting = false; goto Label_0076; } RegexPrefix fcPrefix = RegexFCD.FirstChars(tree); if ((fcPrefix != null) && (RegexCharClass.SetSize(fcPrefix.Prefix) > 0)) { fcPrefix = null; } RegexPrefix scPrefix = null; RegexPrefix prefix3 = RegexFCD.Prefix(tree); bool rightToLeft = (tree._options & RegexOptions.RightToLeft) != RegexOptions.None; CultureInfo culture = ((tree._options & RegexOptions.CultureInvariant) != RegexOptions.None) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if ((prefix3 != null) && (prefix3.Prefix.Length > 0)) { moore = new RegexBoyerMoore(prefix3.Prefix, prefix3.CaseInsensitive, rightToLeft, culture); } else { moore = null; } return new RegexCode(this._emitted, this._stringtable, this._trackcount, this._caps, length, moore, fcPrefix, scPrefix, RegexFCD.Anchors(tree), rightToLeft); }
/// <summary> /// This is a related computation: it takes a RegexTree and computes the /// leading substring if it see one. It's quite trivial and gives up easily. /// </summary> public static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode = tree.Root; RegexNode?concatNode = null; int nextChild = 0; while (true) { switch (curNode.Type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Atomic: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Oneloopatomic: case RegexNode.Onelazy: // In release, cutoff at a length to which we can still reasonably construct a string // In debug, use a smaller cutoff to exercise the cutoff path in tests const int Cutoff = #if DEBUG 50; #else 1_000_000; #endif if (curNode.M > 0 && curNode.M < Cutoff) { string pref = new string(curNode.Ch, curNode.M); return(new RegexPrefix(pref, 0 != (curNode.Options & RegexOptions.IgnoreCase))); } return(RegexPrefix.Empty); case RegexNode.One: return(new RegexPrefix(curNode.Ch.ToString(), 0 != (curNode.Options & RegexOptions.IgnoreCase))); case RegexNode.Multi: return(new RegexPrefix(curNode.Str !, 0 != (curNode.Options & RegexOptions.IgnoreCase))); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(RegexPrefix.Empty); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(RegexPrefix.Empty); } curNode = concatNode.Child(nextChild++); } }
internal void GenerateRegexType(string pattern, RegexOptions opts, string name, bool ispublic, RegexCode code, RegexTree tree, Type factory) { FieldInfo ft = this.RegexField("pattern"); FieldInfo info2 = this.RegexField("roptions"); FieldInfo info3 = this.RegexField("factory"); FieldInfo field = this.RegexField("caps"); FieldInfo info5 = this.RegexField("capnames"); FieldInfo info6 = this.RegexField("capslist"); FieldInfo info7 = this.RegexField("capsize"); Type[] parameterTypes = new Type[0]; this.DefineType(name, ispublic, typeof(Regex)); this._methbuilder = null; MethodAttributes @public = MethodAttributes.Public; base._ilg = this._typebuilder.DefineConstructor(@public, CallingConventions.Standard, parameterTypes).GetILGenerator(); base.Ldthis(); base._ilg.Emit(OpCodes.Call, typeof(Regex).GetConstructor(BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance, null, new Type[0], new ParameterModifier[0])); base.Ldthis(); base.Ldstr(pattern); base.Stfld(ft); base.Ldthis(); base.Ldc((int) opts); base.Stfld(info2); base.Ldthis(); base.Newobj(factory.GetConstructor(parameterTypes)); base.Stfld(info3); if (code._caps != null) { this.GenerateCreateHashtable(field, code._caps); } if (tree._capnames != null) { this.GenerateCreateHashtable(info5, tree._capnames); } if (tree._capslist != null) { base.Ldthis(); base.Ldc(tree._capslist.Length); base._ilg.Emit(OpCodes.Newarr, typeof(string)); base.Stfld(info6); for (int i = 0; i < tree._capslist.Length; i++) { base.Ldthisfld(info6); base.Ldc(i); base.Ldstr(tree._capslist[i]); base._ilg.Emit(OpCodes.Stelem_Ref); } } base.Ldthis(); base.Ldc(code._capsize); base.Stfld(info7); base.Ldthis(); base.Call(typeof(Regex).GetMethod("InitializeReferences", BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Static | BindingFlags.Instance)); base.Ret(); this._typebuilder.CreateType(); base._ilg = null; this._typebuilder = null; }
/* * The top level RegexCode generator. It does a depth-first walk * through the tree and calls EmitFragment to emits code before * and after each child of an interior node, and at each leaf. * * It runs two passes, first to count the size of the generated * code, and second to generate the code. * * We should time it against the alternative, which is * to just generate the code and grow the array as we go. */ internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; int capsize; RegexPrefix fcPrefix; RegexPrefix prefix; int anchors; RegexBoyerMoore bmPrefix; bool rtl; // construct sparse capnum mapping if some numbers are unused if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; _caps = null; } else { capsize = tree._capnumlist.Length; _caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { _caps[tree._capnumlist[i]] = i; } } _counting = true; for (; ;) { if (!_counting) { _emitted = new int[_count]; } curNode = tree._root; curChild = 0; Emit(RegexCode.Lazybranch, 0); for (; ;) { if (curNode._children == null) { EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = (RegexNode)curNode._children[curChild]; PushInt(curChild); curChild = 0; continue; } if (EmptyStack()) { break; } curChild = PopInt(); curNode = curNode._next; EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } PatchJump(0, CurPos()); Emit(RegexCode.Stop); if (!_counting) { break; } _counting = false; } fcPrefix = RegexFCD.FirstChars(tree); prefix = RegexFCD.Prefix(tree); rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } anchors = RegexFCD.Anchors(tree); return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// /// It runs two passes, first to count the size of the generated /// code, and second to generate the code. /// /// We should time it against the alternative, which is /// to just generate the code and grow the array as we go. /// </summary> private RegexCode RegexCodeFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; int capsize; RegexPrefix fcPrefix; RegexPrefix prefix; int anchors; RegexBoyerMoore bmPrefix; bool rtl; // construct sparse capnum mapping if some numbers are unused if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; _caps = null; } else { capsize = tree._capnumlist.Length; _caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) _caps[tree._capnumlist[i]] = i; } _counting = true; for (; ;) { if (!_counting) _emitted = new int[_count]; curNode = tree._root; curChild = 0; Emit(RegexCode.Lazybranch, 0); for (; ;) { if (curNode._children == null) { EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = (RegexNode)curNode._children[curChild]; PushInt(curChild); curChild = 0; continue; } if (EmptyStack()) break; curChild = PopInt(); curNode = curNode._next; EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } PatchJump(0, CurPos()); Emit(RegexCode.Stop); if (!_counting) break; _counting = false; } fcPrefix = RegexFCD.FirstChars(tree); prefix = RegexFCD.Prefix(tree); rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if (prefix != null && prefix.Prefix.Length > 0) bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); else bmPrefix = null; anchors = RegexFCD.Anchors(tree); return new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl); }
/// <summary> /// This is the only function that should be called from outside. /// It takes a <see cref="RegexTree"/> and creates a corresponding <see cref="RegexInterpreterCode"/>. /// </summary> public static RegexInterpreterCode Write(RegexTree tree) { using var writer = new RegexWriter(tree, stackalloc int[EmittedSize], stackalloc int[IntStackSize]); return(writer.EmitCode()); }
private RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode node = tree._root; int curIndex = 0; Label_0009: if (node._children == null) { this.CalculateFC(node._type, node, 0); } else if ((curIndex < node._children.Count) && !this._skipAllChildren) { this.CalculateFC(node._type | 0x40, node, curIndex); if (!this._skipchild) { node = node._children[curIndex]; this.PushInt(curIndex); curIndex = 0; } else { curIndex++; this._skipchild = false; } goto Label_0009; } this._skipAllChildren = false; if (!this.IntIsEmpty()) { curIndex = this.PopInt(); node = node._next; this.CalculateFC(node._type | 0x80, node, curIndex); if (this._failed) { return null; } curIndex++; goto Label_0009; } if (this.FCIsEmpty()) { return null; } return this.PopFC(); }
/* * The main FC computation. It does a shortcutted depth-first walk * through the tree and calls CalculateFC to emits code before * and after each child of an interior node, and at each leaf. */ private RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; curNode = tree._root; curChild = 0; for (; ;) { if (curNode._children == null) { // This is a leaf node CalculateFC(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count && !_skipAllChildren) { // This is an interior node, and we have more children to analyze CalculateFC(curNode._type | BeforeChild, curNode, curChild); if (!_skipchild) { curNode = (RegexNode)curNode._children[curChild]; // this stack is how we get a depth first walk of the tree. PushInt(curChild); curChild = 0; } else { curChild++; _skipchild = false; } continue; } // This is an interior node where we've finished analyzing all the children, or // the end of a leaf node. _skipAllChildren = false; if (IntIsEmpty()) { break; } curChild = PopInt(); curNode = curNode._next; CalculateFC(curNode._type | AfterChild, curNode, curChild); if (_failed) { return(null); } curChild++; } if (FCIsEmpty()) { return(null); } return(PopFC()); }
/// <summary> /// This is the only function that should be called from outside. /// It takes a <see cref="RegexTree"/> and creates a corresponding <see cref="RegexCode"/>. /// </summary> public static RegexCode Write(RegexTree tree, CultureInfo culture) { using var writer = new RegexWriter(stackalloc int[EmittedSize], stackalloc int[IntStackSize]); return(writer.RegexCodeFromRegexTree(tree, culture)); }
private Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, bool addToCache) { if (pattern == null) { throw new ArgumentNullException(nameof(pattern)); } if (options < RegexOptions.None || (((int)options) >> MaxOptionShift) != 0) { throw new ArgumentOutOfRangeException(nameof(options)); } if ((options & RegexOptions.ECMAScript) != 0 && (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.CultureInvariant #if DEBUG | RegexOptions.Debug #endif )) != 0) { throw new ArgumentOutOfRangeException(nameof(options)); } ValidateMatchTimeout(matchTimeout); // After parameter validation assign this.pattern = pattern; roptions = options; internalMatchTimeout = matchTimeout; // Cache handling. Try to look up this regex in the cache. CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; var key = new CachedCodeEntryKey(options, culture.ToString(), pattern); CachedCodeEntry cached = GetCachedCode(key, false); if (cached == null) { // Parse the input RegexTree tree = RegexParser.Parse(pattern, roptions, culture); // Extract the relevant information capnames = tree.CapNames; capslist = tree.CapsList; _code = RegexWriter.Write(tree); caps = _code.Caps; capsize = _code.CapSize; InitializeReferences(); tree = null; if (addToCache) { cached = GetCachedCode(key, true); } } else { caps = cached.Caps; capnames = cached.Capnames; capslist = cached.Capslist; capsize = cached.Capsize; _code = cached.Code; #if FEATURE_COMPILED factory = cached.Factory; #endif // Cache runner and replacement _runnerref = cached.Runnerref; _replref = cached.ReplRef; _refsInitialized = true; } #if FEATURE_COMPILED // if the compile option is set, then compile the code if it's not already if (UseOptionC() && factory == null) { factory = Compile(_code, roptions); if (addToCache && cached != null) { cached.AddCompiled(factory); } _code = null; } #endif }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emit code before /// and after each child of an interior node and at each leaf. /// It also computes various information about the tree, such as /// prefix data to help with optimizations. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture) { // Construct sparse capnum mapping if some numbers are unused. int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } // Every written code begins with a lazy branch. This will be back-patched // to point to the ending Stop after the whole expression has been written. Emit(RegexOpcode.Lazybranch, 0); // Emit every node. RegexNode curNode = tree.Root; int curChild = 0; while (true) { int curNodeChildCount = curNode.ChildCount(); if (curNodeChildCount == 0) { EmitFragment(curNode.Kind, curNode, 0); } else if (curChild < curNodeChildCount) { EmitFragment(curNode.Kind | BeforeChild, curNode, curChild); curNode = curNode.Child(curChild); _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Parent !; EmitFragment(curNode.Kind | AfterChild, curNode, curChild); curChild++; } // Patch the starting Lazybranch, emit the final Stop, and get the resulting code array. PatchJump(0, _emitted.Length); Emit(RegexOpcode.Stop); int[] emitted = _emitted.AsSpan().ToArray(); // Convert the string table into an ordered string array. var strings = new string[_stringTable.Count]; foreach (KeyValuePair <string, int> stringEntry in _stringTable) { strings[stringEntry.Value] = stringEntry.Key; } // Return all that in a RegexCode object. return(new RegexCode(tree, culture, emitted, strings, _trackCount, _caps, capsize)); }
internal static RegexCode Write(RegexTree t) { RegexWriter writer = new RegexWriter(); return(writer.RegexCodeFromRegexTree(t)); }
internal static RegexPrefix FirstChars(RegexTree t) { RegexFC xfc = new RegexFCD().RegexFCFromRegexTree(t); if (xfc._nullable) { return null; } CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != RegexOptions.None) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; return new RegexPrefix(xfc.GetFirstChars(culture), xfc.IsCaseInsensitive()); }