/* * This is the one of the only two functions that should be called from outside. * It takes a RegexTree and computes the set of chars that can start it. */ internal static RegexPrefix FirstChars(RegexTree t) { RegexFCD s = new RegexFCD(); RegexFC fc = s.RegexFCFromRegexTree(t); if (fc == null || fc._nullable) { return(null); } CultureInfo culture = ((t._options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; return(new RegexPrefix(fc.GetFirstChars(culture), fc.IsCaseInsensitive())); }
internal void Dump() { int i; Debug.WriteLine("Direction: " + (_rightToLeft ? "right-to-left" : "left-to-right")); Debug.WriteLine("Firstchars: " + (_fcPrefix == null ? "n/a" : RegexCharClass.SetDescription(_fcPrefix.Prefix))); Debug.WriteLine("Prefix: " + (_bmPrefix == null ? "n/a" : Regex.Escape(_bmPrefix.ToString()))); Debug.WriteLine("Anchors: " + RegexFCD.AnchorDescription(_anchors)); Debug.WriteLine(""); if (_bmPrefix != null) { Debug.WriteLine("BoyerMoore:"); Debug.WriteLine(_bmPrefix.Dump(" ")); } for (i = 0; i < _codes.Length;) { Debug.WriteLine(OpcodeDescription(i)); i += OpcodeSize(_codes[i]); } Debug.WriteLine(""); }
/* * The top level RegexCode generator. It does a depth-first walk * through the tree and calls EmitFragment to emits code before * and after each child of an interior node, and at each leaf. * * It runs two passes, first to count the size of the generated * code, and second to generate the code. * * <CONSIDER>we need to time it against the alternative, which is * to just generate the code and grow the array as we go.</CONSIDER> */ internal RegexCode RegexCodeFromRegexTree(RegexTree tree) { RegexNode curNode; int curChild; int capsize; RegexPrefix fcPrefix; RegexPrefix prefix; int anchors; RegexBoyerMoore bmPrefix; bool rtl; // construct sparse capnum mapping if some numbers are unused if (tree._capnumlist == null || tree._captop == tree._capnumlist.Length) { capsize = tree._captop; _caps = null; } else { capsize = tree._capnumlist.Length; _caps = tree._caps; for (int i = 0; i < tree._capnumlist.Length; i++) { _caps[tree._capnumlist[i]] = i; } } _counting = true; for (;;) { if (!_counting) { _emitted = new int[_count]; } curNode = tree._root; curChild = 0; Emit(RegexCode.Lazybranch, 0); for (;;) { if (curNode._children == null) { EmitFragment(curNode._type, curNode, 0); } else if (curChild < curNode._children.Count) { EmitFragment(curNode._type | BeforeChild, curNode, curChild); curNode = (RegexNode)curNode._children[curChild]; PushInt(curChild); curChild = 0; continue; } if (EmptyStack()) { break; } curChild = PopInt(); curNode = curNode._next; EmitFragment(curNode._type | AfterChild, curNode, curChild); curChild++; } PatchJump(0, CurPos()); Emit(RegexCode.Stop); if (!_counting) { break; } _counting = false; } fcPrefix = RegexFCD.FirstChars(tree); prefix = RegexFCD.Prefix(tree); rtl = ((tree._options & RegexOptions.RightToLeft) != 0); CultureInfo culture = (tree._options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; if (prefix != null && prefix.Prefix.Length > 0) { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); } else { bmPrefix = null; } anchors = RegexFCD.Anchors(tree); return(new RegexCode(_emitted, _stringtable, _trackcount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl)); }