Пример #1
0
        /// <summary>
        /// Create a new incremental symbolic regex builder.
        /// </summary>
        /// <param name="solver">Effective Boolean algebra over S.</param>
        public SymbolicRegexBuilder(ICharAlgebra <S> solver)
        {
            this.solver  = solver;
            this.epsilon = SymbolicRegex <S> .MkEpsilon(this);

            this.nothing = SymbolicRegex <S> .MkFalse(this);

            singletonCache[solver.False] = this.nothing;
            this.dot = SymbolicRegex <S> .MkTrue(this);

            singletonCache[solver.True] = this.dot;
            this.dotStar = SymbolicRegex <S> .MkDotStar(this, this.dot);

            this.startAnchor = SymbolicRegex <S> .MkStartAnchor(this);

            this.endAnchor = SymbolicRegex <S> .MkEndAnchor(this);

            this.eolAnchor = SymbolicRegex <S> .MkEolAnchor(this);

            this.bolAnchor = SymbolicRegex <S> .MkBolAnchor(this);

            this.newLine = SymbolicRegex <S> .MkNewline(this);

            singletonCache[this.newLine.set] = this.newLine;
            this.bolRegex = SymbolicRegex <S> .MkLoop(this, SymbolicRegex <S> .MkConcat(this, this.dotStar, this.newLine), 0, 1);

            this.eolRegex = SymbolicRegex <S> .MkLoop(this, SymbolicRegex <S> .MkConcat(this, this.newLine, this.dotStar), 0, 1);
        }
Пример #2
0
        public SymbolicRegex <S> UnrollRE(SymbolicRegex <S> re)
        {
            // Create a regular expression without loops by unrolling
            // each loop a random number of times as dictated by the
            // maxUnroll parameter
            SymbolicRegex <S> newRoot = null;

            switch (re.Kind)
            {
            case SymbolicRegexKind.Concat:
                newRoot = builder.MkConcat(UnrollRE(re.Left),
                                           UnrollRE(re.Right));
                break;

            case SymbolicRegexKind.IfThenElse:
                newRoot = builder.MkIfThenElse(re.IteCond,
                                               UnrollRE(re.Left), UnrollRE(re.Right));
                break;

            case SymbolicRegexKind.Or:
                var alts = Array.ConvertAll(re.alts.ToArray(), UnrollRE);
                newRoot = builder.MkOr(alts);
                break;

            case SymbolicRegexKind.Loop:
                newRoot = UnrollRE(UnrollLoop(re));
                break;

            default:     //anchors or singleton or epsilon
                newRoot = re;
                break;
            }
            return(newRoot);
        }
Пример #3
0
        public void TestSymbolicRegex_Reverse()
        {
            CharSetSolver css = new CharSetSolver();
            //-----
            var R1       = new Regex(@"abc");
            var sr1      = css.RegexConverter.ConvertToSymbolicRegex(R1, true);
            var rev1     = sr1.Reverse();
            var matcher1 = new SymbolicRegex <BDD>(rev1, css, rev1.ComputeMinterms());

            Assert.IsTrue(matcher1.IsMatch("cba"));
            //-----
            var R2       = new Regex(@"^(foo|ab+d)+$");
            var sr2      = css.RegexConverter.ConvertToSymbolicRegex(R2, true);
            var rev2     = sr2.Reverse();
            var matcher2 = new SymbolicRegex <BDD>(rev2, css, rev2.ComputeMinterms());

            Assert.IsTrue(sr2.Equals(rev2.Reverse()));
            Assert.IsTrue(matcher2.IsMatch("oof"));
            Assert.IsTrue(matcher2.IsMatch("oofdbbaoofoofdbbadba"));
            var sampler = new SymbolicRegexSampler <BDD>(css.RegexConverter.srBuilder, rev2, 10);
            var samples = sampler.GetPositiveDataset(100);

            foreach (var sample in samples)
            {
                Assert.IsTrue(matcher2.IsMatch(sample));
            }
        }
Пример #4
0
        internal SymbolicRegex <T> Transform <T>(SymbolicRegex <S> sr, SymbolicRegexBuilder <T> builderT, Func <S, T> predicateTransformer)
        {
            switch (sr.kind)
            {
            case SymbolicRegexKind.StartAnchor:
                return(builderT.startAnchor);

            case SymbolicRegexKind.EndAnchor:
                return(builderT.endAnchor);

            case SymbolicRegexKind.Epsilon:
                return(builderT.epsilon);

            case SymbolicRegexKind.Singleton:
                return(builderT.MkSingleton(predicateTransformer(sr.set)));

            case SymbolicRegexKind.Loop:
                return(builderT.MkLoop(Transform(sr.left, builderT, predicateTransformer), sr.lower, sr.upper));

            case SymbolicRegexKind.Or:
                return(builderT.MkOr(sr.alts.Transform(builderT, predicateTransformer)));

            case SymbolicRegexKind.Concat:
                return(builderT.MkConcat(Transform(sr.left, builderT, predicateTransformer),
                                         Transform(sr.right, builderT, predicateTransformer)));

            default:     //ITE
                return
                    (builderT.MkIfThenElse(Transform(sr.IteCond, builderT, predicateTransformer),
                                           Transform(sr.left, builderT, predicateTransformer),
                                           Transform(sr.right, builderT, predicateTransformer)));
            }
        }
Пример #5
0
        /// <summary>
        /// Make a concatenation of given regexes, if any regex is nothing then return nothing, eliminate
        /// intermediate epsilons
        /// </summary>
        public SymbolicRegex <S> MkConcat(params SymbolicRegex <S>[] regexes)
        {
            if (regexes.Length == 0)
            {
                return(this.epsilon);
            }

            var sr = regexes[regexes.Length - 1];

            if (sr.IsNothing)
            {
                return(this.nothing);
            }
            else
            {
                //exclude epsilons from the concatenation
                for (int i = regexes.Length - 2; i >= 0; i--)
                {
                    if (regexes[i].IsNothing)
                    {
                        return(this.nothing);
                    }
                    else if (sr.IsEpsilon)
                    {
                        sr = regexes[i];
                    }
                    else if (!regexes[i].IsEpsilon)
                    {
                        sr = SymbolicRegex <S> .MkConcat(this, regexes[i], sr);
                    }
                }
                return(sr);
            }
        }
Пример #6
0
        public void TestSerialization_SymbolicRegexMatcher()
        {
            var regex   = new Regex(@"[0-9]");
            var matcher = (SymbolicRegex <ulong>)regex.Compile();

            matcher.Serialize("matcher.bin");
            var matcher_ = SymbolicRegex <ulong> .Deserialize("matcher.bin");
        }
Пример #7
0
 public SymbolicRegexSampler(SymbolicRegex <S> sr, int maxUnroll, int cornerCaseProb = 5, int maxSamplingIter = 3)
 {
     this.cornerCaseProb  = cornerCaseProb;
     this.maxSamplingIter = maxSamplingIter;
     this.maxUnroll       = maxUnroll;
     this.sr      = sr;
     this.builder = sr.builder;
     rand         = new Random();
 }
Пример #8
0
        public void TestDerivative_IsMatch2()
        {
            var                  regex   = @"^(abc|bbd|add|dde|ddd){1,2000}$";
            CharSetSolver        css     = new CharSetSolver();
            var                  sr      = css.RegexConverter.ConvertToSymbolicRegex(regex, RegexOptions.None, true);
            Func <string, BDD[]> F       = s => Array.ConvertAll <char, BDD>(s.ToCharArray(), c => css.MkCharConstraint(c));
            var                  matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms());

            Assert.IsTrue(matcher.IsMatch("addddd"));
            Assert.IsFalse(matcher.IsMatch("adddddd"));
        }
Пример #9
0
        public void TestSymbolicRegexBDD_IsMatch()
        {
            var css     = new CharSetSolver();
            var R       = new Regex(@"^abc[\0-\xFF]+$");
            var sr      = R.ConvertToSymbolicRegexBDD(css);
            var matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms());
            var str     = "abc" + CreateRandomString(1000);

            Assert.IsTrue(matcher.IsMatch(str));
            Assert.IsFalse(matcher.IsMatch(str + "\uFFFD\uFFFD\uFFFD"));
        }
Пример #10
0
        /// <summary>
        /// Make a singleton sequence regex
        /// </summary>
        public SymbolicRegex <S> MkSingleton(S set)
        {
            SymbolicRegex <S> res;

            if (!singletonCache.TryGetValue(set, out res))
            {
                res = SymbolicRegex <S> .MkSingleton(this, set);

                singletonCache[set] = res;
            }
            return(res);
        }
        private SymbolicRegex <S> ConvertNodeNotoneloopToSymbolicRegex(RegexNode node)
        {
            bool ignoreCase = ((node._options & RegexOptions.IgnoreCase) != 0);
            S    cond       = solver.MkNot(solver.MkCharConstraint(node._ch, ignoreCase));

            if (!description.ContainsKey(cond))
            {
                description[cond] = string.Format("[^{0}]", Rex.RexEngine.Escape(node._ch));
            }

            SymbolicRegex <S> body = this.srBuilder.MkSingleton(cond);
            SymbolicRegex <S> loop = this.srBuilder.MkLoop(body, node._m, node._n);

            return(loop);
        }
Пример #12
0
        public void TestDerivative_IsMatch1()
        {
            var                  regex   = @"^\w\d\w{1,8}$";
            CharSetSolver        css     = new CharSetSolver();
            var                  sr      = css.RegexConverter.ConvertToSymbolicRegex(regex, RegexOptions.None, true);
            Func <string, BDD[]> F       = s => Array.ConvertAll <char, BDD>(s.ToCharArray(), c => css.MkCharConstraint(c));
            var                  matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms());

            Assert.IsTrue(matcher.IsMatch("a0d"));
            Assert.IsFalse(matcher.IsMatch("a0"));
            Assert.IsTrue(matcher.IsMatch("a5def"));
            Assert.IsFalse(matcher.IsMatch("aa"));
            Assert.IsTrue(matcher.IsMatch("a3abcdefg"));
            Assert.IsTrue(matcher.IsMatch("a3abcdefgh"));
            Assert.IsFalse(matcher.IsMatch("a3abcdefghi"));
        }
        private SymbolicRegex <S> ConvertNodeSetloopToSymbolicRegex(RegexNode node)
        {
            //ranges and categories are encoded in set
            string set = node._str;

            S moveCond = CreateConditionFromSet((node._options & RegexOptions.IgnoreCase) != 0, set);

            if (!description.ContainsKey(moveCond))
            {
                description[moveCond] = RegexCharClass.SetDescription(set);
            }

            SymbolicRegex <S> body = this.srBuilder.MkSingleton(moveCond);
            SymbolicRegex <S> loop = this.srBuilder.MkLoop(body, node._m, node._n);

            return(loop);
        }
Пример #14
0
        public void TestDerivative_IsMatch4()
        {
            var           R       = new Regex(@"(ab|ba)+|ababbba", RegexOptions.Singleline);
            CharSetSolver css     = new CharSetSolver();
            var           sr      = css.RegexConverter.ConvertToSymbolicRegex(R, true);
            var           matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms());

            Assert.IsTrue(matcher.IsMatch("ababba"));
            var matches = R.Matches("xaababbba");

            Assert.IsTrue(matches.Count == 2);
            Assert.IsTrue(matches[0].Value == "abab");
            Assert.IsTrue(matches[1].Value == "ba");
            var R2 = new Regex(@"ababbba|(ab|ba)+", RegexOptions.Singleline);

            Assert.IsTrue(R2.Matches("ababba").Count == 1);
        }
Пример #15
0
        public void TestDerivative_IsMatch5()
        {
            var           R   = new Regex(@"^(ab*a|bbba*)$", RegexOptions.Singleline);
            CharSetSolver css = new CharSetSolver();
            var           A   = css.Convert(R.ToString(), R.Options).Determinize().Minimize().Normalize();
            //A.ShowGraph("A");
            var R1 = new Regex(@"^.*(ab*a|bbba*)$", RegexOptions.Singleline);
            var A1 = css.Convert(R1.ToString(), R1.Options).Determinize().Minimize().Normalize();
            //A1.ShowGraph("A1");
            var sr       = css.RegexConverter.ConvertToSymbolicRegex(R, true);
            var sr1      = css.RegexConverter.ConvertToSymbolicRegex(R1, true);
            var matcher  = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms());
            var matcher1 = new SymbolicRegex <BDD>(sr1, css, sr1.ComputeMinterms());

            Assert.IsTrue(matcher.IsMatch("aa"));
            Assert.IsTrue(matcher.IsMatch("abbbbbbbbbba"));
            Assert.IsTrue(matcher.IsMatch("bbb"));
            Assert.IsTrue(matcher.IsMatch("bbbaaaaaaaaa"));
            Assert.IsFalse(matcher.IsMatch("baba"));
            Assert.IsFalse(matcher.IsMatch("abab"));
            //--------------
            Assert.IsTrue(matcher1.IsMatch("xxxxaa"));
            Assert.IsTrue(matcher1.IsMatch("xxabbbbbbbbbba"));
            Assert.IsTrue(matcher1.IsMatch("xxbbb"));
            Assert.IsTrue(matcher1.IsMatch("xxxbbbaaaaaaaaa"));
            Assert.IsFalse(matcher1.IsMatch("babab"));
            Assert.IsFalse(matcher1.IsMatch("ababx"));
            //---
            var R2      = new Regex(@"bbba*|ab*a", RegexOptions.Singleline);
            var matches = R2.Matches("xxabbba");

            Assert.AreEqual <int>(1, matches.Count);
            Assert.AreEqual <int>(2, matches[0].Index);
            Assert.AreEqual <string>("abbba", matches[0].Value);
            var matches2 = R2.Matches("xxabbbbaa");

            Assert.AreEqual <int>(1, matches2.Count);
            Assert.AreEqual <int>(2, matches2[0].Index);
            Assert.AreEqual <string>("abbbba", matches2[0].Value);
            var matches3 = R2.Matches("xxabbbbbbbbbaa");

            Assert.AreEqual <int>(1, matches3.Count);
            var matches4 = R2.Matches("xxxbbbbbbbbbaa");

            Assert.AreEqual <int>(3, matches4.Count);
        }
Пример #16
0
 /// <summary>
 /// Make a disjunction of given regexes, simplify by eliminating any regex that accepts no inputs
 /// </summary>
 public SymbolicRegex <S> MkOr(SymbolicRegexSet <S> regexset)
 {
     if (regexset.IsNothing)
     {
         return(this.nothing);
     }
     else if (regexset.IsEverything)
     {
         return(this.dotStar);
     }
     else if (regexset.IsSigleton)
     {
         return(regexset.GetTheElement());
     }
     else
     {
         return(SymbolicRegex <S> .MkOr(this, regexset));
     }
 }
Пример #17
0
 /// <summary>
 /// Make loop regex
 /// </summary>
 public SymbolicRegex <S> MkLoop(SymbolicRegex <S> regex, int lower = 0, int upper = int.MaxValue)
 {
     if (lower == 1 && upper == 1)
     {
         return(regex);
     }
     else if (lower == 0 && upper == 0)
     {
         return(this.epsilon);
     }
     else if (lower == 0 && upper == int.MaxValue && regex.kind == SymbolicRegexKind.Singleton && this.solver.AreEquivalent(this.solver.True, regex.set))
     {
         return(this.dotStar);
     }
     else
     {
         return(SymbolicRegex <S> .MkLoop(this, regex, lower, upper));
     }
 }
Пример #18
0
 /// <summary>
 /// Tries to compile a regex into a symbolic regex
 /// </summary>
 /// <param name="regex">given regex</param>
 /// <param name="result">if the return value is true then this is the result of compilation</param>
 /// <param name="whyfailed">if the return value is false then this is the reason why compilation failed</param>
 /// <param name="css">given solver, if null a new one is created</param>
 /// <param name="simplify">if true then lower loop bounds are unwound (default is true)</param>
 public static bool TryCompile(this Regex regex, out SymbolicRegex <BV> result, out string whyfailed, CharSetSolver css = null, bool simplify = true)
 {
     if (css == null)
     {
         css = new CharSetSolver();
     }
     try
     {
         result    = Compile(regex, css, simplify);
         whyfailed = "";
         return(true);
     }
     catch (AutomataException e)
     {
         result    = null;
         whyfailed = e.Message;
         return(false);
     }
 }
Пример #19
0
        public void TestDerivative_IsMatch3()
        {
            var           R       = new Regex(@".*(ab|ba)+$", RegexOptions.Singleline);
            var           R1      = new Regex(@"(ab|ba)+", RegexOptions.Singleline);
            CharSetSolver css     = new CharSetSolver();
            var           sr      = css.RegexConverter.ConvertToSymbolicRegex(R, true);
            var           matcher = new SymbolicRegex <BDD>(sr, css, sr.ComputeMinterms());

            Assert.IsTrue(matcher.IsMatch("xxabbabbaba"));
            Assert.IsTrue(matcher.IsMatch("abba"));
            Assert.IsTrue(R1.IsMatch("baba"));
            Assert.IsFalse(R1.IsMatch("bb"));
            var matches = R1.Matches("xxabbabbaba");

            Assert.IsTrue(matches.Count == 2);
            Assert.IsTrue(matches[0].Index == 2);
            Assert.IsTrue(matches[0].Value == "abba");
            Assert.IsTrue(matches[1].Value == "baba");
            Assert.IsTrue(matches[1].Index == 7);
        }
Пример #20
0
        SymbolicRegex <S> MkOr2(SymbolicRegex <S> x, SymbolicRegex <S> y)
        {
            if (x.IsEverything || y.IsEverything)
            {
                return(this.dotStar);
            }
            else if (x.IsNothing)
            {
                return(y);
            }
            else if (y.IsNothing)
            {
                return(x);
            }
            else
            {
                var or = SymbolicRegex <S> .MkOr(this, x, y);

                return(or);
            }
        }
        private static void ValidateMatches(Regex re, SymbolicRegex <BV> sr, string str, Tuple <int, int>[] sr_res, Tuple <int, int>[] re_res)
        {
            //--- correctness check of different matches ---
            //Assert.IsTrue(re_matches.Count == sr_matches.Count);
            var sr_matches_minus_re_matches = new HashSet <Tuple <int, int> >(sr_res);

            sr_matches_minus_re_matches.ExceptWith(re_res);
            var re_matches_minus_sr_matches = new HashSet <Tuple <int, int> >(re_res);

            re_matches_minus_sr_matches.ExceptWith(sr_res);
            foreach (var pair in sr_matches_minus_re_matches)
            {
                Assert.IsTrue(re.IsMatch(str.Substring(pair.Item1, pair.Item2)));
                Assert.IsTrue(sr.IsMatch(str.Substring(pair.Item1, pair.Item2)));
            }
            foreach (var pair in re_matches_minus_sr_matches)
            {
                Assert.IsTrue(re.IsMatch(str.Substring(pair.Item1, pair.Item2)));
                Assert.IsTrue(sr.IsMatch(str.Substring(pair.Item1, pair.Item2)));
            }
        }
Пример #22
0
        private SymbolicRegex <S> UnrollLoop(SymbolicRegex <S> node)
        {
            // select the number of times the loop will be unrolled
            int times = SampleLoopIterations(node.LowerBound, node.UpperBound);

            switch (times)
            {
            case 0:
                return(builder.epsilon);

            case 1:
                return(node.Left);
            }
            SymbolicRegex <S> loop = node.Left;
            SymbolicRegex <S> root = node.Left;

            for (int i = 0; i < times - 1; i++)
            {
                root = builder.MkConcat(root, loop);
            }
            return(root);
        }
Пример #23
0
        public void TestSerialization_StartAnchorBugFix()
        {
            var regex1   = new Regex(@"b|a{1,2}");
            var matcher1 = (SymbolicRegex <ulong>)regex1.Compile();

            matcher1.Serialize("test1.bin");
            var matcher1_ = SymbolicRegex <ulong> .Deserialize("test1.bin");

            ////---------------------
            //var regex2 = new Regex(@"b(ba|a)?b");
            //var matcher2 = (SymbolicRegexMatcher<BV>)regex2.Compile();
            //matcher2.Serialize("test2.bin");
            //var matcher2_ = SymbolicRegexMatcher<BV>.Deserialize("test2.bin");
            //---------------------
            //Assert.IsTrue(matcher1_.Pattern.Right.Left.Left.Kind == SymbolicRegexKind.Or);
            //Assert.IsTrue(matcher1_.Pattern.Right.Left.Left.OrCount == 2);
            ////---
            //var hs = new HashSet<SymbolicRegexNode<BV>>(matcher1_.Pattern.Right.Left.Left.Alts);
            ////matcher1_.Pattern.Right.Left.Left.ShowGraph(0,"m1");
            ////matcher2_.Pattern.Right.Left.Left.ShowGraph(0,"m2");
            //matcher2_.Pattern.ShowGraph(0, "p2");
        }
        public string TestRegex_GenerateInput(int nrOfMatches, int randomTextSizeLimit, SymbolicRegex <BV> sr)
        {
            if (nrOfMatches < 1)
            {
                throw new ArgumentOutOfRangeException();
            }

            string str = sr.GenerateRandomMember();

            for (int i = 1; i < nrOfMatches; i++)
            {
                if (randomTextSizeLimit > 0)
                {
                    int    k   = rnd.Next(0, randomTextSizeLimit);
                    string tmp = sr.GenerateRandomMember();
                    int    j   = rnd.Next(1, tmp.Length);
                    str += tmp.Substring(0, j) + CreateRandomString(k) + tmp.Substring(j);
                }
                str += sr.GenerateRandomMember();
            }
            return(str);
        }
Пример #25
0
        string GenerateRandomMember(SymbolicRegex <S> root)
        {
            // TODO: ITE is currently not supported.
            string sample = "";
            Stack <SymbolicRegex <S> > nodeQueue = new Stack <SymbolicRegex <S> >();
            SymbolicRegex <S>          curNode   = null;

            nodeQueue.Push(UnrollRE(root));
            while (nodeQueue.Count > 0 || curNode != null)
            {
                if (curNode == null)
                {
                    curNode = nodeQueue.Pop();
                }
                switch (curNode.Kind)
                {
                case SymbolicRegexKind.Singleton:
                    if (!builder.solver.IsSatisfiable(curNode.Set))
                    {
                        throw new AutomataException(AutomataExceptionKind.SetIsEmpty);
                    }

                    sample += builder.solver.ChooseUniformly(curNode.Set);
                    curNode = null;
                    break;

                case SymbolicRegexKind.Loop:
                    curNode = curNode.Left;
                    break;

                case SymbolicRegexKind.Epsilon:
                    curNode = null;
                    break;

                case SymbolicRegexKind.Concat:
                    nodeQueue.Push(curNode.Right);
                    curNode = curNode.Left;
                    break;

                case SymbolicRegexKind.Or:
                    int choice = rand.Next(curNode.OrCount);
                    int i      = 0;
                    foreach (var elem in curNode.Alts)
                    {
                        if (i == choice)
                        {
                            curNode = elem;
                            break;
                        }
                        else
                        {
                            i += 1;
                        }
                    }
                    break;

                case SymbolicRegexKind.EndAnchor:
                case SymbolicRegexKind.StartAnchor:
                    curNode = null;
                    break;

                default:
                    throw new NotImplementedException(curNode.Kind.ToString());
                }
            }
            return(sample);
        }
Пример #26
0
        /// <summary>
        /// Goes over the symbolic regex, removes anchors, adds .* if anchors were not present.
        /// Creates an equivalent regex with implicit start and end anchors.
        /// </summary>
        internal SymbolicRegex <S> RemoveAnchors(SymbolicRegex <S> sr, bool isBeg, bool isEnd)
        {
            switch (sr.Kind)
            {
            case SymbolicRegexKind.Concat:
            {
                #region concat
                var left  = RemoveAnchors(sr.Left, isBeg, false);
                var right = RemoveAnchors(sr.Right, false, isEnd);
                //empty language concatenated with anything else reduces to empty language
                if (left.IsNothing)
                {
                    return(left);
                }
                else if (right.IsNothing)
                {
                    return(right);
                }
                else if (left.IsEverything && right.IsEverything)
                {
                    //.*.* simplifies to .*
                    return(left);
                }
                else if (left.Kind == SymbolicRegexKind.Epsilon)
                {
                    //()r simplifies to r
                    return(right);
                }
                else if (right.Kind == SymbolicRegexKind.Epsilon)
                {
                    //l() simplifies to l
                    return(left);
                }
                else if (left == sr.Left && right == sr.Right)
                {
                    //there was no change
                    return(sr);
                }
                else
                {
                    return(this.MkConcat(left, right));
                }
                #endregion
            }

            case SymbolicRegexKind.Epsilon:
            {
                #region epsilon
                if (isBeg || isEnd)
                {
                    //this is the start or the end but there is no anchor so return .*
                    return(this.dotStar);
                }
                else
                {
                    //just return ()
                    return(sr);
                }
                #endregion
            }

            case SymbolicRegexKind.IfThenElse:
            {
                #region ite
                var left  = RemoveAnchors(sr.Left, isBeg, isEnd);
                var right = RemoveAnchors(sr.Right, isBeg, isEnd);
                var cond  = RemoveAnchors(sr.IteCond, isBeg, isEnd);
                if (left == sr.Left && right == sr.Right && sr.IteCond == cond)
                {
                    return(sr);
                }
                else
                {
                    return(this.MkIfThenElse(cond, left, right));
                }
                #endregion
            }

            case SymbolicRegexKind.Loop:
            {
                #region loop
                //this call only verifies absense of start and end anchors inside the loop body (Left)
                //because any anchor causes an exception
                RemoveAnchors(sr.Left, false, false);
                var loop = sr;
                if (loop.IsEverything)
                {
                    return(loop);
                }
                if (isEnd)
                {
                    loop = MkConcat(loop, this.dotStar);
                }
                if (isBeg)
                {
                    loop = MkConcat(this.dotStar, loop);
                }
                return(loop);

                #endregion
            }

            case SymbolicRegexKind.Or:
            {
                #region or
                var choices = sr.alts.RemoveAnchors(isBeg, isEnd);
                return(this.MkOr(choices));

                #endregion
            }

            case SymbolicRegexKind.StartAnchor:
            {
                #region anchor ^
                if (isBeg)         //^ at the beginning
                {
                    if (isEnd)     //^ also at the end
                    {
                        return(this.dotStar);
                    }
                    else
                    {
                        if (sr.IsStartOfLineAnchor)
                        {
                            return(this.bolRegex);
                        }
                        else
                        {
                            return(this.epsilon);
                        }
                    }
                }
                else
                {
                    //treat the anchor as a regex that accepts nothing
                    return(this.nothing);
                }
                #endregion
            }

            case SymbolicRegexKind.EndAnchor:
            {
                #region anchor $
                if (isEnd)         //$ at the end
                {
                    if (isBeg)     //$ also at the beginning
                    {
                        return(this.dotStar);
                    }
                    else
                    {
                        if (sr.IsEndOfLineAnchor)
                        {
                            return(this.eolRegex);
                        }
                        else
                        {
                            return(this.epsilon);
                        }
                    }
                }
                else
                {
                    //treat the anchor as regex that accepts nothing
                    return(this.nothing);
                }
                #endregion
            }

            default:     // SymbolicRegexKind.Singleton:
            {
                #region singleton
                var res = sr;
                if (isEnd)
                {
                    //add .* at the end
                    res = this.MkConcat(res, this.dotStar);
                }
                if (isBeg)
                {
                    //add .* at the beginning
                    res = this.MkConcat(this.dotStar, res);
                }
                return(res);

                #endregion
            }
            }
        }
Пример #27
0
        internal SymbolicRegex <S> MkDerivative(S elem, bool isFirst, bool isLast, SymbolicRegex <S> sr)
        {
            if (sr.IsEverything)
            {
                return(this.dotStar);
            }
            else if (sr.IsNothing)
            {
                return(this.nothing);
            }
            else
            {
                switch (sr.kind)
                {
                case SymbolicRegexKind.StartAnchor:
                case SymbolicRegexKind.EndAnchor:
                case SymbolicRegexKind.Epsilon:
                {
                    return(this.nothing);
                }

                case SymbolicRegexKind.Singleton:
                {
                    #region d(a,R) = epsilon if (a in R) else nothing
                    if (this.solver.IsSatisfiable(this.solver.MkAnd(elem, sr.set)))
                    {
                        return(this.epsilon);
                    }
                    else
                    {
                        return(this.nothing);
                    }
                    #endregion
                }

                case SymbolicRegexKind.Loop:
                {
                    #region d(a, R*) = d(a,R)R*
                    var step = MkDerivative(elem, isFirst, isLast, sr.left);
                    if (step.IsNothing)
                    {
                        return(this.nothing);
                    }
                    if (sr.IsStar)
                    {
                        var deriv = this.MkConcat(step, sr);
                        return(deriv);
                    }
                    else if (sr.IsPlus)
                    {
                        var star  = this.MkLoop(sr.left);
                        var deriv = this.MkConcat(step, star);
                        return(deriv);
                    }
                    else if (sr.IsMaybe)
                    {
                        return(step);
                    }
                    else
                    {
                        //also decrement the upper bound if it was not maximum int
                        //there cannot be a case when upper == lower == 1
                        //such a loop is never created by MkLoop it will just return the first argument
                        //and case upper == 1, lower == 0 is the previous case
                        //so upper > 1 holds here
                        int newupper = (sr.upper == int.MaxValue ? int.MaxValue : sr.upper - 1);
                        int newlower = (sr.lower == 0 ? 0 : sr.lower - 1);
                        var rest     = this.MkLoop(sr.left, newlower, newupper);
                        var deriv    = this.MkConcat(step, rest);
                        return(deriv);
                    }
                    #endregion
                }

                case SymbolicRegexKind.Concat:
                {
                    #region d(a, AB) = d(a,A)B | (if A nullable then d(a,B))
                    var first = this.MkConcat(this.MkDerivative(elem, isFirst, isLast, sr.left), sr.right);
                    if (sr.left.IsNullable(isFirst, isLast))
                    {
                        var second = this.MkDerivative(elem, isFirst, isLast, sr.right);
                        var deriv  = this.MkOr2(first, second);
                        return(deriv);
                    }
                    else
                    {
                        return(first);
                    }
                    #endregion
                }

                case SymbolicRegexKind.Or:
                {
                    #region d(a,A|B) = d(a,A)|d(a,B)
                    var alts_deriv = sr.alts.MkDerivative(elem, isFirst, isLast);
                    return(this.MkOr(alts_deriv));

                    #endregion
                }

                default:     //ITE
                {
                    #region d(a,Ite(A,B,C)) = Ite(d(a,A),d(a,B),d(a,C))
                    var condD = this.MkDerivative(elem, isFirst, isLast, sr.iteCond);
                    if (condD.IsNothing)
                    {
                        var rightD = this.MkDerivative(elem, isFirst, isLast, sr.right);
                        return(rightD);
                    }
                    else if (condD.IsEverything)
                    {
                        var leftD = this.MkDerivative(elem, isFirst, isLast, sr.left);
                        return(leftD);
                    }
                    else
                    {
                        var leftD  = this.MkDerivative(elem, isFirst, isLast, sr.left);
                        var rightD = this.MkDerivative(elem, isFirst, isLast, sr.right);
                        var ite    = this.MkIfThenElse(condD, leftD, rightD);
                        return(ite);
                    }
                    #endregion
                }
                }
            }
        }
        public void TestRegex_GenerateInputFile(int nrOfMatches, int randomTextSizeLimit, SymbolicRegex <BV> sr, int id)
        {
            string str = TestRegex_GenerateInput(nrOfMatches, randomTextSizeLimit, sr);

            File.WriteAllText(regexinputsPath + "input." + id + ".txt", str, System.Text.Encoding.Unicode);
        }
        unsafe public void TestRegex_CompileToSymbolicRegex_Matches_Comparison()
        {
            RegexOptions  options = RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture;
            CharSetSolver css     = new CharSetSolver();

            //1 sec timeout for matching
            Regex[] regexes = Array.ConvertAll(File.ReadAllLines(regexesWithoutAnchorsFile), x => new Regex(x, options, new TimeSpan(0, 0, 1)));

            ClearLog();

            //make sure k is at most regexes.Length
            //int k = regexes.Length;
            int k_from = 1;
            int k_to   = 50; // regexes.Length - 1;
            int k      = k_to - k_from + 1;

            int sr_comp_ms = System.Environment.TickCount;

            SymbolicRegex <BV>[] srs   = new SymbolicRegex <BV> [k];
            SymbolicRegex <BV>[] srs_U = new SymbolicRegex <BV> [k];
            SymbolicRegex <BV>[] srs_B = new SymbolicRegex <BV> [k];
            for (int i = 0; i < k; i++)
            {
                srs[i] = regexes[k_from + i].Compile(css);
            }
            sr_comp_ms = System.Environment.TickCount - sr_comp_ms;

            for (int i = 0; i < k; i++)
            {
                srs_U[i] = regexes[k_from + i].Compile(css);
                srs_B[i] = regexes[k_from + i].Compile(css);
            }

            Log("Compile time(ms): " + sr_comp_ms);

            var str  = File.ReadAllText(inputFile);
            var str1 = new StringBuilder();

            for (int i = 0; i < str.Length; i++)
            {
                char c = str[i];
                if (!(char.IsHighSurrogate(c) || char.IsLowSurrogate(c)))
                {
                    str1.Append(c);
                }
            }
            //eliminate surrogates
            str = str1.ToString();
            var bytes = System.Text.UnicodeEncoding.UTF8.GetBytes(str);

            Assert.IsFalse(Array.Exists(bytes, b => (b & 0xF0) == 0xF0));

            fixed(char *strp = str)
            {
                //------
                int sr_tot_ms      = System.Environment.TickCount;
                int sr_tot_matches = 0;

                Tuple <int, int>[] sr_matches = null;
                for (int i = 0; i < k; i++)
                {
                    sr_matches      = srs[i].Matches(str);
                    sr_tot_matches += sr_matches.Length;
                }
                sr_tot_ms = System.Environment.TickCount - sr_tot_ms;
                //--------------

                Log("Matches(string): " + sr_tot_ms);

                //------
                int sr_tot_ms_U      = System.Environment.TickCount;
                int sr_tot_matches_U = 0;

                Tuple <int, int>[] sr_matches_U = null;
                for (int i = 0; i < k; i++)
                {
                    sr_matches_U      = srs_U[i].Matches_(str);
                    sr_tot_matches_U += sr_matches_U.Length;
                }
                sr_tot_ms_U = System.Environment.TickCount - sr_tot_ms_U;
                //--------------

                Log("Matches_(string): " + sr_tot_ms_U);

                ////------
                //int sr_tot_ms_B = System.Environment.TickCount;
                //int sr_tot_matches_B = 0;
                //Tuple<int, int>[] sr_matches_B = null;
                //for (int i = 0; i < k; i++)
                //{
                //    sr_matches_B = srs_B[i].Matches(bytes);
                //    sr_tot_matches_B += sr_matches_B.Length;
                //}
                //sr_tot_ms_B = System.Environment.TickCount - sr_tot_ms_B;
                ////--------------

                //Log("Matches(byte[]): " + sr_tot_ms_B);

                //var diff = new HashSet<Tuple<int, int>>(sr_matches);
                //diff.ExceptWith(sr_matches_B);

                Assert.IsTrue(sr_tot_matches == sr_tot_matches_U);
                //Assert.IsTrue(sr_tot_matches == sr_tot_matches_B);

                //check also that the the last match is the same
                Assert.AreEqual <Sequence <Tuple <int, int> > >(
                    new Sequence <Tuple <int, int> >(sr_matches),
                    new Sequence <Tuple <int, int> >(sr_matches_U));

                Console.WriteLine(string.Format("total: Matches(string):{0}ms, Matches_(char):{1}ms, matchcount={2}", sr_tot_ms, sr_tot_ms_U, sr_tot_matches));
            }
        }
        public void TestRegex_CompileToSymbolicRegex_Matches()
        {
            RegexOptions  options = RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture;
            CharSetSolver css     = new CharSetSolver();

            //1 sec timeout for matching
            Regex[] regexes = Array.ConvertAll(File.ReadAllLines(regexesWithoutAnchorsFile), x => new Regex(x, options, new TimeSpan(0, 0, 1)));


            ClearLog();

            //make sure k is at most regexes.Length, regexes.Length is around 1600
            int k = 20;

            int sr_comp_ms = System.Environment.TickCount;

            SymbolicRegex <BV>[] srs = new SymbolicRegex <BV> [k];
            for (int i = 0; i < k; i++)
            {
                srs[i] = regexes[i].Compile(css);
            }
            sr_comp_ms = System.Environment.TickCount - sr_comp_ms;

            Log("Compile time(ms): " + sr_comp_ms);

            var str = File.ReadAllText(inputFile);

            //first filter out those regexes that cause tiomeout in .net

            HashSet <int> timeouts = new HashSet <int>();

            if (k > 20)
            {
                //some regexes above 20 cause timeouts, exclude those
                //--- .net ---
                for (int i = 0; i < k; i++)
                {
                    try
                    {
                        var re_matches = regexes[i].Matches(str);
                        int tmp        = re_matches.Count;
                        Log("ok: " + i);
                    }
                    catch (System.Text.RegularExpressions.RegexMatchTimeoutException)
                    {
                        timeouts.Add(i);
                        Log("timeout: " + i);
                    }
                }
            }
            //-------------

            //--- aut ---
            int sr_tot_ms      = System.Environment.TickCount;
            int sr_tot_matches = 0;

            for (int i = 0; i < k; i++)
            {
                //here we could also allow the regexes that timed out in .net
                //but the Assert below would fail
                if (!timeouts.Contains(i))
                {
                    var sr_matches = srs[i].Matches(str);
                    sr_tot_matches += sr_matches.Length;
                }
            }
            sr_tot_ms = System.Environment.TickCount - sr_tot_ms;
            //--------------

            Log("AUT: " + sr_tot_ms);

            //--- .net ---
            int re_tot_ms      = System.Environment.TickCount;
            int re_tot_matches = 0;

            for (int i = 0; i < k; i++)
            {
                if (!timeouts.Contains(i))
                {
                    var re_matches = regexes[i].Matches(str);
                    re_tot_matches += re_matches.Count;
                }
            }
            re_tot_ms = System.Environment.TickCount - re_tot_ms;
            //--------------


            Log(".NET: " + re_tot_ms);

            //allow some variation (+- 5 in either direction)
            Assert.IsTrue(sr_tot_matches <= re_tot_matches + 5);
            Assert.IsTrue(re_tot_matches <= sr_tot_matches + 5);


            Console.WriteLine(string.Format("total: AUT:{0}ms, .NET:{1}ms, matchcount={2}", sr_tot_ms, re_tot_ms, re_tot_matches));
        }