/* * This is a related computation: it takes a RegexTree and computes the * leading substring if it see one. It's quite trivial and gives up easily. */ internal static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; curNode = tree._root; for (;;) { switch (curNode._type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Onelazy: if (curNode._m > 0) { string pref = String.Empty.PadRight(curNode._m, curNode._ch); return(new RegexPrefix(pref, 0 != (curNode._options & RegexOptions.IgnoreCase))); } else { return(RegexPrefix.Empty); } case RegexNode.One: return(new RegexPrefix(curNode._ch.ToString(CultureInfo.InvariantCulture), 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Multi: return(new RegexPrefix(curNode._str, 0 != (curNode._options & RegexOptions.IgnoreCase))); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(RegexPrefix.Empty); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(RegexPrefix.Empty); } curNode = concatNode.Child(nextChild++); } }
/* * The top level RegexCode generator. It does a depth-first walk * through the tree and calls EmitFragment to emits code before * and after each child of an interior node, and at each leaf. * * It runs two passes, first to count the size of the generated * code, and second to generate the code. * * <CONSIDER>we need to time it against the alternative, which is * to just generate the code and grow the array as we go.</CONSIDER> */ internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; int capsize; RegexPrefix fcPrefix; RegexPrefix prefix; int anchors; RegexBoyerMoore bmPrefix; bool rtl; // construct sparse capnum mapping if some numbers are unused if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; _caps = null; } else { capsize = tree._capnumlist.Length; _caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { _caps[tree._capnumlist[i]] = i; } } _counting = true; for (;;) { if (!_counting) { _emitted = new int[_count]; } curNode = tree._root; curChild = 0; Emit(RegexCode.Lazybranch, 0); for (;;) { if (curNode._children == null) { EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = (RegexNode)curNode._children[curChild]; PushInt(curChild); curChild = 0; continue; } if (EmptyStack()) { break; } curChild = PopInt(); curNode = curNode._next; EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } PatchJump(0, CurPos()); Emit(RegexCode.Stop); if (!_counting) { break; } _counting = false; } fcPrefix = RegexFCD.FirstChars(tree); prefix = RegexFCD.Prefix(tree); rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } anchors = RegexFCD.Anchors(tree); return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }
/* * The main FC computation. It does a shortcutted depth-first walk * through the tree and calls CalculateFC to emits code before * and after each child of an interior node, and at each leaf. */ private RegexFC RegexFCFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; curNode = tree._root; curChild = 0; for (;;) { if (curNode._children == null) { // This is a leaf node CalculateFC(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count && !_skipAllChildren) { // This is an interior node, and we have more children to analyze CalculateFC(curNode._type | BeforeChild, curNode, curChild); if (!_skipchild) { curNode = (RegexNode)curNode._children[curChild]; // this stack is how we get a depth first walk of the tree. PushInt(curChild); curChild = 0; } else { curChild++; _skipchild = false; } continue; } // This is an interior node where we've finished analyzing all the children, or // the end of a leaf node. _skipAllChildren = false; if (IntIsEmpty()) { break; } curChild = PopInt(); curNode = curNode._next; CalculateFC(curNode._type | AfterChild, curNode, curChild); if (_failed) { return(null); } curChild++; } if (FCIsEmpty()) { return(null); } return(PopFC()); }