/// <summary> /// This is the only function that should be called from outside. /// It takes a RegexTree and creates a corresponding RegexCode. /// </summary> public static RegexCode Write(RegexTree tree) { Span <int> emittedSpan = stackalloc int[EmittedSize]; Span <int> intStackSpan = stackalloc int[IntStackSize]; var writer = new RegexWriter(emittedSpan, intStackSpan); RegexCode code = writer.RegexCodeFromRegexTree(tree); writer.Dispose(); #if DEBUG if (tree.Debug) { tree.Dump(); code.Dump(); } #endif return(code); }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// </summary> public RegexCode RegexCodeFromRegexTree(RegexTree tree) { // construct sparse capnum mapping if some numbers are unused int capsize; if (tree.CapNumList == null || tree.CapTop == tree.CapNumList.Length) { capsize = tree.CapTop; _caps = null; } else { capsize = tree.CapNumList.Length; _caps = tree.Caps; for (int i = 0; i < tree.CapNumList.Length; i++) { _caps[tree.CapNumList[i]] = i; } } _capPositions = new int[capsize]; RegexNode curNode = tree.Root; int curChild = 0; Emit(RegexCode.Lazybranch, 0); while (true) { if (curNode.Children == null) { EmitFragment(curNode.NType, curNode, 0); } else if (curChild < curNode.Children.Count) { EmitFragment(curNode.NType | BeforeChild, curNode, curChild); curNode = curNode.Children[curChild]; _intStack.Append(curChild); curChild = 0; continue; } if (_intStack.Length == 0) { break; } curChild = _intStack.Pop(); curNode = curNode.Next; EmitFragment(curNode.NType | AfterChild, curNode, curChild); curChild++; } PatchJump(0, _emitted.Length); Emit(RegexCode.Stop); RegexPrefix fcPrefix = RegexFCD.FirstChars(tree); RegexPrefix prefix = RegexFCD.Prefix(tree); bool rtl = ((tree.Options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; RegexBoyerMoore bmPrefix; if (prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } int anchors = RegexFCD.Anchors(tree); int[] emitted = _emitted.AsSpan().ToArray(); return(new RegexCode(emitted, _stringTable, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl, _resetMatchStartFound, _capPositions)); }
/* * This is a related computation: it takes a RegexTree and computes the * leading substring if it see one. It's quite trivial and gives up easily. */ internal static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; curNode = tree._root; for (; ;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Onelazy: if (curNode._m > 0) { string pref = string.Empty.PadRight(curNode._m, curNode._ch); return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase))); } else { return(RegexPrefix.Empty); } case RegexNode.One: return(new RegexPrefix(curNode._ch.ToString(), 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Multi: return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(RegexPrefix.Empty); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(RegexPrefix.Empty); } curNode = concatNode.Child(nextChild++); } }
/// <summary> /// The top level RegexCode generator. It does a depth-first walk /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// /// It runs two passes, first to count the size of the generated /// code, and second to generate the code. /// /// We should time it against the alternative, which is /// to just generate the code and grow the array as we go. /// </summary> private RegexCode RegexCodeFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; int capsize; RegexPrefix fcPrefix; RegexPrefix prefix; int anchors; RegexBoyerMoore bmPrefix; bool rtl; // construct sparse capnum mapping if some numbers are unused if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; _caps = null; } else { capsize = tree._capnumlist.Length; _caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { _caps[tree._capnumlist[i]] = i; } } _capPositions = new int[capsize]; _counting = true; for (; ;) { if (!_counting) { _emitted = new int[_count]; } curNode = tree._root; curChild = 0; Emit(RegexCode.Lazybranch, 0); for (; ;) { if (curNode._children == null) { EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = curNode._children[curChild]; PushInt(curChild); curChild = 0; continue; } if (EmptyStack()) { break; } curChild = PopInt(); curNode = curNode._next; EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } PatchJump(0, CurPos()); Emit(RegexCode.Stop); if (!_counting) { break; } _counting = false; } fcPrefix = RegexFCD.FirstChars(tree); prefix = RegexFCD.Prefix(tree); rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } anchors = RegexFCD.Anchors(tree); return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl, _resetMatchStartFound, _capPositions)); }
/* * The main FC computation. It does a shortcutted depth-first walk * through the tree and calls CalculateFC to emits code before * and after each child of an interior node, and at each leaf. */ private RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; curNode = tree._root; curChild = 0; for (; ;) { if (curNode._children == null) { // This is a leaf node CalculateFC(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count && !_skipAllChildren) { // This is an interior node, and we have more children to analyze CalculateFC(curNode._type | BeforeChild, curNode, curChild); if (!_skipchild) { curNode = curNode._children[curChild]; // this stack is how we get a depth first walk of the tree. PushInt(curChild); curChild = 0; } else { curChild++; _skipchild = false; } continue; } // This is an interior node where we've finished analyzing all the children, or // the end of a leaf node. _skipAllChildren = false; if (IntIsEmpty()) { break; } curChild = PopInt(); curNode = curNode._next; CalculateFC(curNode._type | AfterChild, curNode, curChild); if (_failed) { return(null); } curChild++; } if (FCIsEmpty()) { return(null); } return(PopFC()); }