/// <summary> /// Create a new incremental symbolic regex builder. /// </summary> /// <param name="solver">Effective Boolean algebra over S.</param> public SymbolicRegexBuilder(ICharAlgebra <S> solver) { this.solver = solver; this.epsilon = SymbolicRegex <S> .MkEpsilon(this); this.nothing = SymbolicRegex <S> .MkFalse(this); singletonCache[solver.False] = this.nothing; this.dot = SymbolicRegex <S> .MkTrue(this); singletonCache[solver.True] = this.dot; this.dotStar = SymbolicRegex <S> .MkDotStar(this, this.dot); this.startAnchor = SymbolicRegex <S> .MkStartAnchor(this); this.endAnchor = SymbolicRegex <S> .MkEndAnchor(this); this.eolAnchor = SymbolicRegex <S> .MkEolAnchor(this); this.bolAnchor = SymbolicRegex <S> .MkBolAnchor(this); this.newLine = SymbolicRegex <S> .MkNewline(this); singletonCache[this.newLine.set] = this.newLine; this.bolRegex = SymbolicRegex <S> .MkLoop(this, SymbolicRegex <S> .MkConcat(this, this.dotStar, this.newLine), 0, 1); this.eolRegex = SymbolicRegex <S> .MkLoop(this, SymbolicRegex <S> .MkConcat(this, this.newLine, this.dotStar), 0, 1); }
public SymbolicRegex <S> UnrollRE(SymbolicRegex <S> re) { // Create a regular expression without loops by unrolling // each loop a random number of times as dictated by the // maxUnroll parameter SymbolicRegex <S> newRoot = null; switch (re.Kind) { case SymbolicRegexKind.Concat: newRoot = builder.MkConcat(UnrollRE(re.Left), UnrollRE(re.Right)); break; case SymbolicRegexKind.IfThenElse: newRoot = builder.MkIfThenElse(re.IteCond, UnrollRE(re.Left), UnrollRE(re.Right)); break; case SymbolicRegexKind.Or: var alts = Array.ConvertAll(re.alts.ToArray(), UnrollRE); newRoot = builder.MkOr(alts); break; case SymbolicRegexKind.Loop: newRoot = UnrollRE(UnrollLoop(re)); break; default: //anchors or singleton or epsilon newRoot = re; break; } return(newRoot); }
public void TestSymbolicRegex_Reverse() { CharSetSolver css = new CharSetSolver(); //----- var R1 = new Regex(@"abc"); var sr1 = css.RegexConverter.ConvertToSymbolicRegex(R1, true); var rev1 = sr1.Reverse(); var matcher1 = new SymbolicRegex <BDD>(rev1, css, rev1.ComputeMinterms()); Assert.IsTrue(matcher1.IsMatch("cba")); //----- var R2 = new Regex(@"^(foo|ab+d)+$"); var sr2 = css.RegexConverter.ConvertToSymbolicRegex(R2, true); var rev2 = sr2.Reverse(); var matcher2 = new SymbolicRegex <BDD>(rev2, css, rev2.ComputeMinterms()); Assert.IsTrue(sr2.Equals(rev2.Reverse())); Assert.IsTrue(matcher2.IsMatch("oof")); Assert.IsTrue(matcher2.IsMatch("oofdbbaoofoofdbbadba")); var sampler = new SymbolicRegexSampler <BDD>(css.RegexConverter.srBuilder, rev2, 10); var samples = sampler.GetPositiveDataset(100); foreach (var sample in samples) { Assert.IsTrue(matcher2.IsMatch(sample)); } }
internal SymbolicRegex <T> Transform <T>(SymbolicRegex <S> sr, SymbolicRegexBuilder <T> builderT, Func <S, T> predicateTransformer) { switch (sr.kind) { case SymbolicRegexKind.StartAnchor: return(builderT.startAnchor); case SymbolicRegexKind.EndAnchor: return(builderT.endAnchor); case SymbolicRegexKind.Epsilon: return(builderT.epsilon); case SymbolicRegexKind.Singleton: return(builderT.MkSingleton(predicateTransformer(sr.set))); case SymbolicRegexKind.Loop: return(builderT.MkLoop(Transform(sr.left, builderT, predicateTransformer), sr.lower, sr.upper)); case SymbolicRegexKind.Or: return(builderT.MkOr(sr.alts.Transform(builderT, predicateTransformer))); case SymbolicRegexKind.Concat: return(builderT.MkConcat(Transform(sr.left, builderT, predicateTransformer), Transform(sr.right, builderT, predicateTransformer))); default: //ITE return (builderT.MkIfThenElse(Transform(sr.IteCond, builderT, predicateTransformer), Transform(sr.left, builderT, predicateTransformer), Transform(sr.right, builderT, predicateTransformer))); } }
/// <summary> /// Make a concatenation of given regexes, if any regex is nothing then return nothing, eliminate /// intermediate epsilons /// </summary> public SymbolicRegex <S> MkConcat(params SymbolicRegex <S>[] regexes) { if (regexes.Length == 0) { return(this.epsilon); } var sr = regexes[regexes.Length - 1]; if (sr.IsNothing) { return(this.nothing); } else { //exclude epsilons from the concatenation for (int i = regexes.Length - 2; i >= 0; i--) { if (regexes[i].IsNothing) { return(this.nothing); } else if (sr.IsEpsilon) { sr = regexes[i]; } else if (!regexes[i].IsEpsilon) { sr = SymbolicRegex <S> .MkConcat(this, regexes[i], sr); } } return(sr); } }
public void TestSerialization_SymbolicRegexMatcher() { var regex = new Regex(@"[0-9]"); var matcher = (SymbolicRegex <ulong>)regex.Compile(); matcher.Serialize("matcher.bin"); var matcher_ = SymbolicRegex <ulong> .Deserialize("matcher.bin"); }
public SymbolicRegexSampler(SymbolicRegex <S> sr, int maxUnroll, int cornerCaseProb = 5, int maxSamplingIter = 3) { this.cornerCaseProb = cornerCaseProb; this.maxSamplingIter = maxSamplingIter; this.maxUnroll = maxUnroll; this.sr = sr; this.builder = sr.builder; rand = new Random(); }
public void TestDerivative_IsMatch2() { var regex = @"^(abc|bbd|add|dde|ddd){1,2000}$"; CharSetSolver css = new CharSetSolver(); var sr = css.RegexConverter.ConvertToSymbolicRegex(regex, RegexOptions.None, true); Func <string, BDD[]> F = s => Array.ConvertAll <char, BDD>(s.ToCharArray(), c => css.MkCharConstraint(c)); var matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms()); Assert.IsTrue(matcher.IsMatch("addddd")); Assert.IsFalse(matcher.IsMatch("adddddd")); }
public void TestSymbolicRegexBDD_IsMatch() { var css = new CharSetSolver(); var R = new Regex(@"^abc[\0-\xFF]+$"); var sr = R.ConvertToSymbolicRegexBDD(css); var matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms()); var str = "abc" + CreateRandomString(1000); Assert.IsTrue(matcher.IsMatch(str)); Assert.IsFalse(matcher.IsMatch(str + "\uFFFD\uFFFD\uFFFD")); }
/// <summary> /// Make a singleton sequence regex /// </summary> public SymbolicRegex <S> MkSingleton(S set) { SymbolicRegex <S> res; if (!singletonCache.TryGetValue(set, out res)) { res = SymbolicRegex <S> .MkSingleton(this, set); singletonCache[set] = res; } return(res); }
private SymbolicRegex <S> ConvertNodeNotoneloopToSymbolicRegex(RegexNode node) { bool ignoreCase = ((node._options & RegexOptions.IgnoreCase) != 0); S cond = solver.MkNot(solver.MkCharConstraint(node._ch, ignoreCase)); if (!description.ContainsKey(cond)) { description[cond] = string.Format("[^{0}]", Rex.RexEngine.Escape(node._ch)); } SymbolicRegex <S> body = this.srBuilder.MkSingleton(cond); SymbolicRegex <S> loop = this.srBuilder.MkLoop(body, node._m, node._n); return(loop); }
public void TestDerivative_IsMatch1() { var regex = @"^\w\d\w{1,8}$"; CharSetSolver css = new CharSetSolver(); var sr = css.RegexConverter.ConvertToSymbolicRegex(regex, RegexOptions.None, true); Func <string, BDD[]> F = s => Array.ConvertAll <char, BDD>(s.ToCharArray(), c => css.MkCharConstraint(c)); var matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms()); Assert.IsTrue(matcher.IsMatch("a0d")); Assert.IsFalse(matcher.IsMatch("a0")); Assert.IsTrue(matcher.IsMatch("a5def")); Assert.IsFalse(matcher.IsMatch("aa")); Assert.IsTrue(matcher.IsMatch("a3abcdefg")); Assert.IsTrue(matcher.IsMatch("a3abcdefgh")); Assert.IsFalse(matcher.IsMatch("a3abcdefghi")); }
private SymbolicRegex <S> ConvertNodeSetloopToSymbolicRegex(RegexNode node) { //ranges and categories are encoded in set string set = node._str; S moveCond = CreateConditionFromSet((node._options & RegexOptions.IgnoreCase) != 0, set); if (!description.ContainsKey(moveCond)) { description[moveCond] = RegexCharClass.SetDescription(set); } SymbolicRegex <S> body = this.srBuilder.MkSingleton(moveCond); SymbolicRegex <S> loop = this.srBuilder.MkLoop(body, node._m, node._n); return(loop); }
public void TestDerivative_IsMatch4() { var R = new Regex(@"(ab|ba)+|ababbba", RegexOptions.Singleline); CharSetSolver css = new CharSetSolver(); var sr = css.RegexConverter.ConvertToSymbolicRegex(R, true); var matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms()); Assert.IsTrue(matcher.IsMatch("ababba")); var matches = R.Matches("xaababbba"); Assert.IsTrue(matches.Count == 2); Assert.IsTrue(matches[0].Value == "abab"); Assert.IsTrue(matches[1].Value == "ba"); var R2 = new Regex(@"ababbba|(ab|ba)+", RegexOptions.Singleline); Assert.IsTrue(R2.Matches("ababba").Count == 1); }
public void TestDerivative_IsMatch5() { var R = new Regex(@"^(ab*a|bbba*)$", RegexOptions.Singleline); CharSetSolver css = new CharSetSolver(); var A = css.Convert(R.ToString(), R.Options).Determinize().Minimize().Normalize(); //A.ShowGraph("A"); var R1 = new Regex(@"^.*(ab*a|bbba*)$", RegexOptions.Singleline); var A1 = css.Convert(R1.ToString(), R1.Options).Determinize().Minimize().Normalize(); //A1.ShowGraph("A1"); var sr = css.RegexConverter.ConvertToSymbolicRegex(R, true); var sr1 = css.RegexConverter.ConvertToSymbolicRegex(R1, true); var matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms()); var matcher1 = new SymbolicRegex <BDD>(sr1, css, sr1.ComputeMinterms()); Assert.IsTrue(matcher.IsMatch("aa")); Assert.IsTrue(matcher.IsMatch("abbbbbbbbbba")); Assert.IsTrue(matcher.IsMatch("bbb")); Assert.IsTrue(matcher.IsMatch("bbbaaaaaaaaa")); Assert.IsFalse(matcher.IsMatch("baba")); Assert.IsFalse(matcher.IsMatch("abab")); //-------------- Assert.IsTrue(matcher1.IsMatch("xxxxaa")); Assert.IsTrue(matcher1.IsMatch("xxabbbbbbbbbba")); Assert.IsTrue(matcher1.IsMatch("xxbbb")); Assert.IsTrue(matcher1.IsMatch("xxxbbbaaaaaaaaa")); Assert.IsFalse(matcher1.IsMatch("babab")); Assert.IsFalse(matcher1.IsMatch("ababx")); //--- var R2 = new Regex(@"bbba*|ab*a", RegexOptions.Singleline); var matches = R2.Matches("xxabbba"); Assert.AreEqual <int>(1, matches.Count); Assert.AreEqual <int>(2, matches[0].Index); Assert.AreEqual <string>("abbba", matches[0].Value); var matches2 = R2.Matches("xxabbbbaa"); Assert.AreEqual <int>(1, matches2.Count); Assert.AreEqual <int>(2, matches2[0].Index); Assert.AreEqual <string>("abbbba", matches2[0].Value); var matches3 = R2.Matches("xxabbbbbbbbbaa"); Assert.AreEqual <int>(1, matches3.Count); var matches4 = R2.Matches("xxxbbbbbbbbbaa"); Assert.AreEqual <int>(3, matches4.Count); }
/// <summary> /// Make a disjunction of given regexes, simplify by eliminating any regex that accepts no inputs /// </summary> public SymbolicRegex <S> MkOr(SymbolicRegexSet <S> regexset) { if (regexset.IsNothing) { return(this.nothing); } else if (regexset.IsEverything) { return(this.dotStar); } else if (regexset.IsSigleton) { return(regexset.GetTheElement()); } else { return(SymbolicRegex <S> .MkOr(this, regexset)); } }
/// <summary> /// Make loop regex /// </summary> public SymbolicRegex <S> MkLoop(SymbolicRegex <S> regex, int lower = 0, int upper = int.MaxValue) { if (lower == 1 && upper == 1) { return(regex); } else if (lower == 0 && upper == 0) { return(this.epsilon); } else if (lower == 0 && upper == int.MaxValue && regex.kind == SymbolicRegexKind.Singleton && this.solver.AreEquivalent(this.solver.True, regex.set)) { return(this.dotStar); } else { return(SymbolicRegex <S> .MkLoop(this, regex, lower, upper)); } }
/// <summary> /// Tries to compile a regex into a symbolic regex /// </summary> /// <param name="regex">given regex</param> /// <param name="result">if the return value is true then this is the result of compilation</param> /// <param name="whyfailed">if the return value is false then this is the reason why compilation failed</param> /// <param name="css">given solver, if null a new one is created</param> /// <param name="simplify">if true then lower loop bounds are unwound (default is true)</param> public static bool TryCompile(this Regex regex, out SymbolicRegex <BV> result, out string whyfailed, CharSetSolver css = null, bool simplify = true) { if (css == null) { css = new CharSetSolver(); } try { result = Compile(regex, css, simplify); whyfailed = ""; return(true); } catch (AutomataException e) { result = null; whyfailed = e.Message; return(false); } }
public void TestDerivative_IsMatch3() { var R = new Regex(@".*(ab|ba)+$", RegexOptions.Singleline); var R1 = new Regex(@"(ab|ba)+", RegexOptions.Singleline); CharSetSolver css = new CharSetSolver(); var sr = css.RegexConverter.ConvertToSymbolicRegex(R, true); var matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms()); Assert.IsTrue(matcher.IsMatch("xxabbabbaba")); Assert.IsTrue(matcher.IsMatch("abba")); Assert.IsTrue(R1.IsMatch("baba")); Assert.IsFalse(R1.IsMatch("bb")); var matches = R1.Matches("xxabbabbaba"); Assert.IsTrue(matches.Count == 2); Assert.IsTrue(matches[0].Index == 2); Assert.IsTrue(matches[0].Value == "abba"); Assert.IsTrue(matches[1].Value == "baba"); Assert.IsTrue(matches[1].Index == 7); }
SymbolicRegex <S> MkOr2(SymbolicRegex <S> x, SymbolicRegex <S> y) { if (x.IsEverything || y.IsEverything) { return(this.dotStar); } else if (x.IsNothing) { return(y); } else if (y.IsNothing) { return(x); } else { var or = SymbolicRegex <S> .MkOr(this, x, y); return(or); } }
private static void ValidateMatches(Regex re, SymbolicRegex <BV> sr, string str, Tuple <int, int>[] sr_res, Tuple <int, int>[] re_res) { //--- correctness check of different matches --- //Assert.IsTrue(re_matches.Count == sr_matches.Count); var sr_matches_minus_re_matches = new HashSet <Tuple <int, int> >(sr_res); sr_matches_minus_re_matches.ExceptWith(re_res); var re_matches_minus_sr_matches = new HashSet <Tuple <int, int> >(re_res); re_matches_minus_sr_matches.ExceptWith(sr_res); foreach (var pair in sr_matches_minus_re_matches) { Assert.IsTrue(re.IsMatch(str.Substring(pair.Item1, pair.Item2))); Assert.IsTrue(sr.IsMatch(str.Substring(pair.Item1, pair.Item2))); } foreach (var pair in re_matches_minus_sr_matches) { Assert.IsTrue(re.IsMatch(str.Substring(pair.Item1, pair.Item2))); Assert.IsTrue(sr.IsMatch(str.Substring(pair.Item1, pair.Item2))); } }
private SymbolicRegex <S> UnrollLoop(SymbolicRegex <S> node) { // select the number of times the loop will be unrolled int times = SampleLoopIterations(node.LowerBound, node.UpperBound); switch (times) { case 0: return(builder.epsilon); case 1: return(node.Left); } SymbolicRegex <S> loop = node.Left; SymbolicRegex <S> root = node.Left; for (int i = 0; i < times - 1; i++) { root = builder.MkConcat(root, loop); } return(root); }
public void TestSerialization_StartAnchorBugFix() { var regex1 = new Regex(@"b|a{1,2}"); var matcher1 = (SymbolicRegex <ulong>)regex1.Compile(); matcher1.Serialize("test1.bin"); var matcher1_ = SymbolicRegex <ulong> .Deserialize("test1.bin"); ////--------------------- //var regex2 = new Regex(@"b(ba|a)?b"); //var matcher2 = (SymbolicRegexMatcher<BV>)regex2.Compile(); //matcher2.Serialize("test2.bin"); //var matcher2_ = SymbolicRegexMatcher<BV>.Deserialize("test2.bin"); //--------------------- //Assert.IsTrue(matcher1_.Pattern.Right.Left.Left.Kind == SymbolicRegexKind.Or); //Assert.IsTrue(matcher1_.Pattern.Right.Left.Left.OrCount == 2); ////--- //var hs = new HashSet<SymbolicRegexNode<BV>>(matcher1_.Pattern.Right.Left.Left.Alts); ////matcher1_.Pattern.Right.Left.Left.ShowGraph(0,"m1"); ////matcher2_.Pattern.Right.Left.Left.ShowGraph(0,"m2"); //matcher2_.Pattern.ShowGraph(0, "p2"); }
public string TestRegex_GenerateInput(int nrOfMatches, int randomTextSizeLimit, SymbolicRegex <BV> sr) { if (nrOfMatches < 1) { throw new ArgumentOutOfRangeException(); } string str = sr.GenerateRandomMember(); for (int i = 1; i < nrOfMatches; i++) { if (randomTextSizeLimit > 0) { int k = rnd.Next(0, randomTextSizeLimit); string tmp = sr.GenerateRandomMember(); int j = rnd.Next(1, tmp.Length); str += tmp.Substring(0, j) + CreateRandomString(k) + tmp.Substring(j); } str += sr.GenerateRandomMember(); } return(str); }
string GenerateRandomMember(SymbolicRegex <S> root) { // TODO: ITE is currently not supported. string sample = ""; Stack <SymbolicRegex <S> > nodeQueue = new Stack <SymbolicRegex <S> >(); SymbolicRegex <S> curNode = null; nodeQueue.Push(UnrollRE(root)); while (nodeQueue.Count > 0 || curNode != null) { if (curNode == null) { curNode = nodeQueue.Pop(); } switch (curNode.Kind) { case SymbolicRegexKind.Singleton: if (!builder.solver.IsSatisfiable(curNode.Set)) { throw new AutomataException(AutomataExceptionKind.SetIsEmpty); } sample += builder.solver.ChooseUniformly(curNode.Set); curNode = null; break; case SymbolicRegexKind.Loop: curNode = curNode.Left; break; case SymbolicRegexKind.Epsilon: curNode = null; break; case SymbolicRegexKind.Concat: nodeQueue.Push(curNode.Right); curNode = curNode.Left; break; case SymbolicRegexKind.Or: int choice = rand.Next(curNode.OrCount); int i = 0; foreach (var elem in curNode.Alts) { if (i == choice) { curNode = elem; break; } else { i += 1; } } break; case SymbolicRegexKind.EndAnchor: case SymbolicRegexKind.StartAnchor: curNode = null; break; default: throw new NotImplementedException(curNode.Kind.ToString()); } } return(sample); }
/// <summary> /// Goes over the symbolic regex, removes anchors, adds .* if anchors were not present. /// Creates an equivalent regex with implicit start and end anchors. /// </summary> internal SymbolicRegex <S> RemoveAnchors(SymbolicRegex <S> sr, bool isBeg, bool isEnd) { switch (sr.Kind) { case SymbolicRegexKind.Concat: { #region concat var left = RemoveAnchors(sr.Left, isBeg, false); var right = RemoveAnchors(sr.Right, false, isEnd); //empty language concatenated with anything else reduces to empty language if (left.IsNothing) { return(left); } else if (right.IsNothing) { return(right); } else if (left.IsEverything && right.IsEverything) { //.*.* simplifies to .* return(left); } else if (left.Kind == SymbolicRegexKind.Epsilon) { //()r simplifies to r return(right); } else if (right.Kind == SymbolicRegexKind.Epsilon) { //l() simplifies to l return(left); } else if (left == sr.Left && right == sr.Right) { //there was no change return(sr); } else { return(this.MkConcat(left, right)); } #endregion } case SymbolicRegexKind.Epsilon: { #region epsilon if (isBeg || isEnd) { //this is the start or the end but there is no anchor so return .* return(this.dotStar); } else { //just return () return(sr); } #endregion } case SymbolicRegexKind.IfThenElse: { #region ite var left = RemoveAnchors(sr.Left, isBeg, isEnd); var right = RemoveAnchors(sr.Right, isBeg, isEnd); var cond = RemoveAnchors(sr.IteCond, isBeg, isEnd); if (left == sr.Left && right == sr.Right && sr.IteCond == cond) { return(sr); } else { return(this.MkIfThenElse(cond, left, right)); } #endregion } case SymbolicRegexKind.Loop: { #region loop //this call only verifies absense of start and end anchors inside the loop body (Left) //because any anchor causes an exception RemoveAnchors(sr.Left, false, false); var loop = sr; if (loop.IsEverything) { return(loop); } if (isEnd) { loop = MkConcat(loop, this.dotStar); } if (isBeg) { loop = MkConcat(this.dotStar, loop); } return(loop); #endregion } case SymbolicRegexKind.Or: { #region or var choices = sr.alts.RemoveAnchors(isBeg, isEnd); return(this.MkOr(choices)); #endregion } case SymbolicRegexKind.StartAnchor: { #region anchor ^ if (isBeg) //^ at the beginning { if (isEnd) //^ also at the end { return(this.dotStar); } else { if (sr.IsStartOfLineAnchor) { return(this.bolRegex); } else { return(this.epsilon); } } } else { //treat the anchor as a regex that accepts nothing return(this.nothing); } #endregion } case SymbolicRegexKind.EndAnchor: { #region anchor $ if (isEnd) //$ at the end { if (isBeg) //$ also at the beginning { return(this.dotStar); } else { if (sr.IsEndOfLineAnchor) { return(this.eolRegex); } else { return(this.epsilon); } } } else { //treat the anchor as regex that accepts nothing return(this.nothing); } #endregion } default: // SymbolicRegexKind.Singleton: { #region singleton var res = sr; if (isEnd) { //add .* at the end res = this.MkConcat(res, this.dotStar); } if (isBeg) { //add .* at the beginning res = this.MkConcat(this.dotStar, res); } return(res); #endregion } } }
internal SymbolicRegex <S> MkDerivative(S elem, bool isFirst, bool isLast, SymbolicRegex <S> sr) { if (sr.IsEverything) { return(this.dotStar); } else if (sr.IsNothing) { return(this.nothing); } else { switch (sr.kind) { case SymbolicRegexKind.StartAnchor: case SymbolicRegexKind.EndAnchor: case SymbolicRegexKind.Epsilon: { return(this.nothing); } case SymbolicRegexKind.Singleton: { #region d(a,R) = epsilon if (a in R) else nothing if (this.solver.IsSatisfiable(this.solver.MkAnd(elem, sr.set))) { return(this.epsilon); } else { return(this.nothing); } #endregion } case SymbolicRegexKind.Loop: { #region d(a, R*) = d(a,R)R* var step = MkDerivative(elem, isFirst, isLast, sr.left); if (step.IsNothing) { return(this.nothing); } if (sr.IsStar) { var deriv = this.MkConcat(step, sr); return(deriv); } else if (sr.IsPlus) { var star = this.MkLoop(sr.left); var deriv = this.MkConcat(step, star); return(deriv); } else if (sr.IsMaybe) { return(step); } else { //also decrement the upper bound if it was not maximum int //there cannot be a case when upper == lower == 1 //such a loop is never created by MkLoop it will just return the first argument //and case upper == 1, lower == 0 is the previous case //so upper > 1 holds here int newupper = (sr.upper == int.MaxValue ? int.MaxValue : sr.upper - 1); int newlower = (sr.lower == 0 ? 0 : sr.lower - 1); var rest = this.MkLoop(sr.left, newlower, newupper); var deriv = this.MkConcat(step, rest); return(deriv); } #endregion } case SymbolicRegexKind.Concat: { #region d(a, AB) = d(a,A)B | (if A nullable then d(a,B)) var first = this.MkConcat(this.MkDerivative(elem, isFirst, isLast, sr.left), sr.right); if (sr.left.IsNullable(isFirst, isLast)) { var second = this.MkDerivative(elem, isFirst, isLast, sr.right); var deriv = this.MkOr2(first, second); return(deriv); } else { return(first); } #endregion } case SymbolicRegexKind.Or: { #region d(a,A|B) = d(a,A)|d(a,B) var alts_deriv = sr.alts.MkDerivative(elem, isFirst, isLast); return(this.MkOr(alts_deriv)); #endregion } default: //ITE { #region d(a,Ite(A,B,C)) = Ite(d(a,A),d(a,B),d(a,C)) var condD = this.MkDerivative(elem, isFirst, isLast, sr.iteCond); if (condD.IsNothing) { var rightD = this.MkDerivative(elem, isFirst, isLast, sr.right); return(rightD); } else if (condD.IsEverything) { var leftD = this.MkDerivative(elem, isFirst, isLast, sr.left); return(leftD); } else { var leftD = this.MkDerivative(elem, isFirst, isLast, sr.left); var rightD = this.MkDerivative(elem, isFirst, isLast, sr.right); var ite = this.MkIfThenElse(condD, leftD, rightD); return(ite); } #endregion } } } }
public void TestRegex_GenerateInputFile(int nrOfMatches, int randomTextSizeLimit, SymbolicRegex <BV> sr, int id) { string str = TestRegex_GenerateInput(nrOfMatches, randomTextSizeLimit, sr); File.WriteAllText(regexinputsPath + "input." + id + ".txt", str, System.Text.Encoding.Unicode); }
unsafe public void TestRegex_CompileToSymbolicRegex_Matches_Comparison() { RegexOptions options = RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture; CharSetSolver css = new CharSetSolver(); //1 sec timeout for matching Regex[] regexes = Array.ConvertAll(File.ReadAllLines(regexesWithoutAnchorsFile), x => new Regex(x, options, new TimeSpan(0, 0, 1))); ClearLog(); //make sure k is at most regexes.Length //int k = regexes.Length; int k_from = 1; int k_to = 50; // regexes.Length - 1; int k = k_to - k_from + 1; int sr_comp_ms = System.Environment.TickCount; SymbolicRegex <BV>[] srs = new SymbolicRegex <BV> [k]; SymbolicRegex <BV>[] srs_U = new SymbolicRegex <BV> [k]; SymbolicRegex <BV>[] srs_B = new SymbolicRegex <BV> [k]; for (int i = 0; i < k; i++) { srs[i] = regexes[k_from + i].Compile(css); } sr_comp_ms = System.Environment.TickCount - sr_comp_ms; for (int i = 0; i < k; i++) { srs_U[i] = regexes[k_from + i].Compile(css); srs_B[i] = regexes[k_from + i].Compile(css); } Log("Compile time(ms): " + sr_comp_ms); var str = File.ReadAllText(inputFile); var str1 = new StringBuilder(); for (int i = 0; i < str.Length; i++) { char c = str[i]; if (!(char.IsHighSurrogate(c) || char.IsLowSurrogate(c))) { str1.Append(c); } } //eliminate surrogates str = str1.ToString(); var bytes = System.Text.UnicodeEncoding.UTF8.GetBytes(str); Assert.IsFalse(Array.Exists(bytes, b => (b & 0xF0) == 0xF0)); fixed(char *strp = str) { //------ int sr_tot_ms = System.Environment.TickCount; int sr_tot_matches = 0; Tuple <int, int>[] sr_matches = null; for (int i = 0; i < k; i++) { sr_matches = srs[i].Matches(str); sr_tot_matches += sr_matches.Length; } sr_tot_ms = System.Environment.TickCount - sr_tot_ms; //-------------- Log("Matches(string): " + sr_tot_ms); //------ int sr_tot_ms_U = System.Environment.TickCount; int sr_tot_matches_U = 0; Tuple <int, int>[] sr_matches_U = null; for (int i = 0; i < k; i++) { sr_matches_U = srs_U[i].Matches_(str); sr_tot_matches_U += sr_matches_U.Length; } sr_tot_ms_U = System.Environment.TickCount - sr_tot_ms_U; //-------------- Log("Matches_(string): " + sr_tot_ms_U); ////------ //int sr_tot_ms_B = System.Environment.TickCount; //int sr_tot_matches_B = 0; //Tuple<int, int>[] sr_matches_B = null; //for (int i = 0; i < k; i++) //{ // sr_matches_B = srs_B[i].Matches(bytes); // sr_tot_matches_B += sr_matches_B.Length; //} //sr_tot_ms_B = System.Environment.TickCount - sr_tot_ms_B; ////-------------- //Log("Matches(byte[]): " + sr_tot_ms_B); //var diff = new HashSet<Tuple<int, int>>(sr_matches); //diff.ExceptWith(sr_matches_B); Assert.IsTrue(sr_tot_matches == sr_tot_matches_U); //Assert.IsTrue(sr_tot_matches == sr_tot_matches_B); //check also that the the last match is the same Assert.AreEqual <Sequence <Tuple <int, int> > >( new Sequence <Tuple <int, int> >(sr_matches), new Sequence <Tuple <int, int> >(sr_matches_U)); Console.WriteLine(string.Format("total: Matches(string):{0}ms, Matches_(char):{1}ms, matchcount={2}", sr_tot_ms, sr_tot_ms_U, sr_tot_matches)); } }
public void TestRegex_CompileToSymbolicRegex_Matches() { RegexOptions options = RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture; CharSetSolver css = new CharSetSolver(); //1 sec timeout for matching Regex[] regexes = Array.ConvertAll(File.ReadAllLines(regexesWithoutAnchorsFile), x => new Regex(x, options, new TimeSpan(0, 0, 1))); ClearLog(); //make sure k is at most regexes.Length, regexes.Length is around 1600 int k = 20; int sr_comp_ms = System.Environment.TickCount; SymbolicRegex <BV>[] srs = new SymbolicRegex <BV> [k]; for (int i = 0; i < k; i++) { srs[i] = regexes[i].Compile(css); } sr_comp_ms = System.Environment.TickCount - sr_comp_ms; Log("Compile time(ms): " + sr_comp_ms); var str = File.ReadAllText(inputFile); //first filter out those regexes that cause tiomeout in .net HashSet <int> timeouts = new HashSet <int>(); if (k > 20) { //some regexes above 20 cause timeouts, exclude those //--- .net --- for (int i = 0; i < k; i++) { try { var re_matches = regexes[i].Matches(str); int tmp = re_matches.Count; Log("ok: " + i); } catch (System.Text.RegularExpressions.RegexMatchTimeoutException) { timeouts.Add(i); Log("timeout: " + i); } } } //------------- //--- aut --- int sr_tot_ms = System.Environment.TickCount; int sr_tot_matches = 0; for (int i = 0; i < k; i++) { //here we could also allow the regexes that timed out in .net //but the Assert below would fail if (!timeouts.Contains(i)) { var sr_matches = srs[i].Matches(str); sr_tot_matches += sr_matches.Length; } } sr_tot_ms = System.Environment.TickCount - sr_tot_ms; //-------------- Log("AUT: " + sr_tot_ms); //--- .net --- int re_tot_ms = System.Environment.TickCount; int re_tot_matches = 0; for (int i = 0; i < k; i++) { if (!timeouts.Contains(i)) { var re_matches = regexes[i].Matches(str); re_tot_matches += re_matches.Count; } } re_tot_ms = System.Environment.TickCount - re_tot_ms; //-------------- Log(".NET: " + re_tot_ms); //allow some variation (+- 5 in either direction) Assert.IsTrue(sr_tot_matches <= re_tot_matches + 5); Assert.IsTrue(re_tot_matches <= sr_tot_matches + 5); Console.WriteLine(string.Format("total: AUT:{0}ms, .NET:{1}ms, matchcount={2}", sr_tot_ms, re_tot_ms, re_tot_matches)); }