/// <summary> /// Yet another related computation: it takes a RegexTree and computes /// the leading anchors that it encounters. /// </summary> public static int Anchors(RegexTree tree) { RegexNode curNode; RegexNode concatNode = null; int nextChild = 0; int result = 0; curNode = tree.Root; for (; ;) { switch (curNode.NType) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: return(result | AnchorFromType(curNode.NType)); case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(result); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(result); } curNode = concatNode.Child(nextChild++); } }
/// <summary> /// Nested repeaters just get multiplied with each other if they're not /// too lumpy /// </summary> private RegexNode ReduceRep() { RegexNode u = this; RegexNode child; int type = Type(); int min = M; int max = N; for (; ;) { if (u.ChildCount() == 0) { break; } child = u.Child(0); // multiply reps of the same type only if (child.Type() != type) { int childType = child.Type(); if (!(childType >= Oneloop && childType <= Setloop && type == Loop || childType >= Onelazy && childType <= Setlazy && type == Lazyloop)) { break; } } // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})? // [but things like (a {2,})+ are not too lumpy...] if (u.M == 0 && child.M > 1 || child.N < child.M * 2) { break; } u = child; if (u.M > 0) { u.M = min = ((int.MaxValue - 1) / u.M < min) ? int.MaxValue : u.M * min; } if (u.N > 0) { u.N = max = ((int.MaxValue - 1) / u.N < max) ? int.MaxValue : u.N * max; } } return(min == int.MaxValue ? new RegexNode(Nothing, Options) : u); }
/// <summary> /// This is a related computation: it takes a RegexTree and computes the /// leading substring if it see one. It's quite trivial and gives up easily. /// </summary> public static RegexPrefix Prefix(RegexTree tree) { RegexNode curNode = tree.Root; RegexNode concatNode = null; int nextChild = 0; for (; ;) { switch (curNode.NType) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; nextChild = 0; } break; case RegexNode.Greedy: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: case RegexNode.Onelazy: // In release, cutoff at a length to which we can still reasonably construct a string // In debug, use a smaller cutoff to exercise the cutoff path in tests const int Cutoff = #if DEBUG 50; #else 1_000_000; #endif if (curNode.M > 0 && curNode.M < Cutoff) { string pref = string.Empty.PadRight(curNode.M, curNode.Ch); return(new RegexPrefix(pref, 0 != (curNode.Options & RegexOptions.IgnoreCase))); } else { return(RegexPrefix.Empty); } case RegexNode.One: return(new RegexPrefix(curNode.Ch.ToString(), 0 != (curNode.Options & RegexOptions.IgnoreCase))); case RegexNode.Multi: return(new RegexPrefix(curNode.Str, 0 != (curNode.Options & RegexOptions.IgnoreCase))); case RegexNode.Bol: case RegexNode.Eol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: case RegexNode.Prevent: break; default: return(RegexPrefix.Empty); } if (concatNode == null || nextChild >= concatNode.ChildCount()) { return(RegexPrefix.Empty); } curNode = concatNode.Child(nextChild++); } }