Beispiel #1
0
        public static BV64Algebra Create(CharSetSolver solver, BDD[] minterms)
        {
            if (minterms.Length > 64)
            {
                throw new AutomataException(AutomataExceptionKind.NrOfMintermsCanBeAtMost64);
            }
            var dtree         = DecisionTree.Create(solver, minterms);
            var partitionBase = Array.ConvertAll(minterms, m => m.ToRanges());
            var partition     = Array.ConvertAll(partitionBase, p => new IntervalSet(p));

            return(new BV64Algebra(dtree, partition));
        }
Beispiel #2
0
        //check if delta(S,T,c) exists
        static string ShortStringStoT(int S, int T, Automaton <BDD> aut, int limit, CharSetSolver solver)
        {
            if (S == T)
            {
                return("");
            }

            var aut1 = Automaton <BDD> .Create(aut.Algebra, S, new int[] { T }, aut.GetMoves());

            var contst  = aut1.Determinize().Minimize();
            var finst   = contst.GetFinalStates();
            var strings = new Dictionary <int, string>();

            strings[contst.InitialState] = "";
            Dictionary <int, int> dist    = new Dictionary <int, int>();
            HashSet <int>         visited = new HashSet <int>();
            List <int>            toVisit = new List <int>();

            visited.Add(contst.InitialState);
            toVisit.Add(contst.InitialState);
            dist[contst.InitialState] = 0;
            while (toVisit.Count > 0)
            {
                var curr = toVisit[0];
                toVisit.RemoveAt(0);
                if (dist[curr] <= limit)
                {
                    foreach (var move in contst.GetMovesFrom(curr))
                    {
                        if (!visited.Contains(move.TargetState))
                        {
                            dist[move.TargetState] = dist[move.SourceState] + 1;
                            visited.Add(move.TargetState);
                            toVisit.Add(move.TargetState);
                            char wit = 'a';
                            foreach (var w in solver.GenerateAllCharacters(move.Label, false))
                            {
                                wit = w;
                                break;
                            }
                            strings[move.TargetState] = strings[move.SourceState] + wit;
                            if (finst.Contains(move.TargetState))
                            {
                                return(strings[move.TargetState]);
                            }
                        }
                    }
                }
            }

            throw new AutomataException("this code shouldn't be reachable");
        }
Beispiel #3
0
        /// <summary>
        /// Compiles this regex and possibly other regexes into a common symbolic regex representing their intersection
        /// </summary>
        /// <param name="regex">this regex</param>
        /// <param name="regexes">more regexes to intersect with</param>
        /// <param name="keepAnchors">if false missing anchors are replaced by .* else just omitted</param>
        /// <param name="unwindLowerBounds">if true then lower bounds of loops are unwound</param>
        /// <returns></returns>
        public static RegexMatcher Compile(this Regex regex, bool keepAnchors, bool unwindLowerBounds, bool isMatchOnly = false, params Regex[] regexes)
        {
            //first test if this regex is a simple string, i.e., a toplevel multi-node
            RegexTree rt = RegexParser.Parse(regex.ToString(), regex.Options);

            if (regexes.Length == 0)
            {
                if (rt._root._type == RegexNode.Capture && rt._root.Child(0)._type == RegexNode.Multi)
                {
                    //this is an explicit string
                    var pattern = rt._root.Child(0)._str;
                    return(new FixedStringMatcher(pattern, (regex.Options & RegexOptions.IgnoreCase) == RegexOptions.IgnoreCase));
                }
            }

            if (context == null)
            {
                context = new CharSetSolver();
            }

            var first = context.RegexConverter.ConvertToSymbolicRegex(rt._root, keepAnchors, unwindLowerBounds);

            if (!isMatchOnly && first.CheckIfContainsLazyLoop() && !first.CheckIfAllLoopsAreLazy())
            {
                throw new AutomataException("Match generation with mixed lazy and eager loops currently not supported.");
            }

            var others = Array.ConvertAll(regexes, r => context.RegexConverter.ConvertToSymbolicRegex(r, keepAnchors, unwindLowerBounds));
            var all    = new SymbolicRegexNode <BDD> [1 + regexes.Length];

            all[0] = first;
            for (int i = 1; i <= others.Length; i++)
            {
                all[i] = others[i - 1];
            }
            var          srBuilder = context.RegexConverter.srBuilder;
            var          conj      = srBuilder.MkAnd(all);
            var          partition = conj.ComputeMinterms();
            RegexMatcher matcher;

            if (partition.Length > 64)
            {
                //more than 64 bits needed to represent a set
                matcher = new SymbolicRegexBV(conj, context, partition);
            }
            else
            {
                //enough to use 64 bits
                matcher = new SymbolicRegexUInt64(conj, context, partition);
            }
            return(matcher);
        }
Beispiel #4
0
        /// <summary>
        /// Generate a random member accepted by the regex
        /// </summary>
        /// <param name="regex">given regex</param>
        /// <param name="maxUnroll">maximum nr of times a loop is unrolled</param>
        /// <param name="cornerCaseProb">inverse of pobability of taking a corner case (lower/upper bound) of the number of iterations a loop may be unrolled.</param>
        /// <param name="charClassRestriction">restrict all generated members to this character class (null means no restriction)</param>
        public static string GenerateRandomMember(this Regex regex, string charClassRestriction = null, int maxUnroll = 10, int cornerCaseProb = 5)
        {
            var solver = new CharSetSolver();
            var sr     = solver.RegexConverter.ConvertToSymbolicRegex(regex);

            if (charClassRestriction != null)
            {
                sr = sr.Restrict(solver.MkCharSetFromRegexCharClass(charClassRestriction));
            }

            var sampler = new SymbolicRegexSampler <BDD>(solver.RegexConverter.srBuilder, sr, maxUnroll, cornerCaseProb);

            return(sampler.GenerateRandomMember());
        }
Beispiel #5
0
        /// <summary>
        /// Generate a dataset of random members accepted by the regex
        /// </summary>
        /// <param name="regex">given regex</param>
        /// <param name="size">number of members</param>
        /// <param name="maxUnroll">maximum nr of times a loop is unrolled</param>
        /// <param name="cornerCaseProb">inverse of pobability of taking a corner case (lower/upper bound) of the number of iterations a loop may be unrolled.</param>
        /// <param name="charClassRestriction">restrict all generated members to this character class (null means no restriction)</param>
        /// <param name="maxSamplingIter">Maximum number of iterations in order to collect the requested number of samples</param>
        public static HashSet <string> GenerateRandomDataSet(this Regex regex, int size = 10, string charClassRestriction = null, int maxUnroll = 10, int cornerCaseProb = 5, int maxSamplingIter = 3)
        {
            var solver = new CharSetSolver();
            var sr     = solver.RegexConverter.ConvertToSymbolicRegex(regex);

            if (charClassRestriction != null)
            {
                sr = sr.Restrict(solver.MkCharSetFromRegexCharClass(charClassRestriction));
            }

            var sampler = new SymbolicRegexSampler <BDD>(solver.RegexConverter.srBuilder, sr, maxUnroll, cornerCaseProb);

            return(sampler.GetPositiveDataset(size));
        }
Beispiel #6
0
        //check if delta(S,T,c) exists
        static bool MoveFromStoTContainsC(char c, int S, int T, Automaton <BDD> aut, CharSetSolver solver)
        {
            var ccond = solver.MkCharConstraint(c);

            foreach (var move in aut.GetMovesFrom(S))
            {
                if (move.TargetState == T)
                {
                    if (solver.IsSatisfiable(solver.MkAnd(move.Label, ccond)))
                    {
                        return(true);
                    }
                }
            }
            return(false);
        }
Beispiel #7
0
 //check if delta(S,T,c) exists
 static bool MoveFromStoT(int S, int T, Automaton <BDD> aut, CharSetSolver solver, out char witness)
 {
     foreach (var move in aut.GetMovesFrom(S))
     {
         if (move.TargetState == T)
         {
             foreach (var w in solver.GenerateAllCharacters(move.Label, false))
             {
                 witness = w;
                 return(true);
             }
         }
     }
     witness = 'a';
     return(false);
 }
Beispiel #8
0
        /// <summary>
        /// Copmiles a regex into a symbolic regex
        /// </summary>
        /// <param name="regex">given regex</param>
        /// <param name="css">given solver, if null a new one is created</param>
        /// <param name="simplify">if true then lower loop bounds are unwound (default is true)</param>
        /// <returns></returns>
        public static SymbolicRegex <BV> Compile(this Regex regex, CharSetSolver css = null, bool simplify = true)
        {
            if (css == null)
            {
                css = new CharSetSolver();
            }
            var       sr_bdd = css.RegexConverter.ConvertToSymbolicRegex(regex, true);
            BVAlgebra bva    = new BVAlgebra(css, sr_bdd.ComputeMinterms());
            SymbolicRegexBuilder <BV> builder = new SymbolicRegexBuilder <BV>(bva);
            var sr_bv = sr_bdd.builder.Transform <BV>(sr_bdd, builder, builder.solver.ConvertFromCharSet);

            if (simplify)
            {
                sr_bv = sr_bv.Simplify();
            }
            sr_bv.InitializeMatcher();
            return(sr_bv);
        }
Beispiel #9
0
 /// <summary>
 /// Returns true if the regex can be compiled into a symbolic regex.
 /// </summary>
 /// <param name="regex">given regex</param>
 /// <param name="whynot">if the return value is false, reason why compilation is not supported</param>
 /// <returns></returns>
 public static bool IsCompileSupported(this Regex regex, out string whynot)
 {
     if (context == null)
     {
         context = new CharSetSolver();
     }
     try
     {
         var sr_bdd = context.RegexConverter.ConvertToSymbolicRegex(regex, true);
         whynot = "";
         return(true);
     }
     catch (AutomataException e)
     {
         whynot = e.Message;
         return(false);
     }
 }
Beispiel #10
0
 /// <summary>
 /// Tries to compile a regex into a symbolic regex
 /// </summary>
 /// <param name="regex">given regex</param>
 /// <param name="result">if the return value is true then this is the result of compilation</param>
 /// <param name="whyfailed">if the return value is false then this is the reason why compilation failed</param>
 /// <param name="css">given solver, if null a new one is created</param>
 /// <param name="simplify">if true then lower loop bounds are unwound (default is true)</param>
 public static bool TryCompile(this Regex regex, out SymbolicRegex <BV> result, out string whyfailed, CharSetSolver css = null, bool simplify = true)
 {
     if (css == null)
     {
         css = new CharSetSolver();
     }
     try
     {
         result    = Compile(regex, css, simplify);
         whyfailed = "";
         return(true);
     }
     catch (AutomataException e)
     {
         result    = null;
         whyfailed = e.Message;
         return(false);
     }
 }
        /// <summary>
        /// Crteate a Boolean decision tree.
        /// References to solver and domain are not saved in the resulting decision tree.
        /// </summary>
        /// <param name="solver">character alberbra</param>
        /// <param name="domain">elements that map to true</param>
        /// <param name="precomputeLimit">upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)</param>
        /// <returns></returns>
        internal static BooleanDecisionTree Create(CharSetSolver solver, BDD domain, ushort precomputeLimit = 0xFF)
        {
            BDD domain_compl = solver.MkNot(domain);
            var partition    = new BDD[] { domain_compl, domain };

            if (precomputeLimit == 0)
            {
                return(new BooleanDecisionTree(new bool[] { }, MkBST(new DecisionTree.PartitionCut(solver, partition), 0, 0xFFFF)));
            }

            bool[]           precomp = Precompute(solver, domain, precomputeLimit);
            DecisionTree.BST bst     = null;
            if (precomputeLimit < ushort.MaxValue)
            {
                bst = MkBST(new DecisionTree.PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue);
            }

            return(new BooleanDecisionTree(precomp, bst));
        }
Beispiel #12
0
        /// <summary>
        /// Returns true if the regex can be compiled into a symbolic regex.
        /// </summary>
        /// <param name="regex">given regex</param>
        /// <param name="whynot">if the return value is false, reason why compilation is not supported</param>
        /// <returns></returns>
        public static bool IsCompileSupported(this Regex regex, out string whynot)
        {
            var css = new CharSetSolver();

            try
            {
                var sr_bdd = css.RegexConverter.ConvertToSymbolicRegex(regex, true);
                whynot = "";
                return(true);
            }
            catch (AutomataException e)
            {
                whynot = e.Message;
                return(false);
            }
            finally
            {
                css.Dispose();
            }
        }
Beispiel #13
0
        /// <summary>
        /// Display the automaton of the regex in dgml.
        /// </summary>
        /// <param name="regex">given regex</param>
        /// <param name="name">name for the automaton and the dgml file</param>
        /// <param name="minimize">minimize (and determinize) if true</param>
        /// <param name="determinize">determinize if true</param>
        /// <param name="removeepsilons">remove epsilon moves if true</param>
        public static void Display(this Regex regex, string name = "Automaton", bool minimize = false, bool determinize = false, bool removeepsilons = false)
        {
            var solver = new CharSetSolver(BitWidth.BV16);
            var aut    = solver.Convert(regex.ToString(), regex.Options);

            if (removeepsilons)
            {
                aut = aut.RemoveEpsilons().Normalize();
            }
            if (determinize)
            {
                aut = aut.RemoveEpsilons();
                aut = aut.Determinize().Normalize();
            }
            if (minimize)
            {
                aut = aut.RemoveEpsilons();
                aut = aut.Determinize();
                aut = aut.Minimize().Normalize();
            }
            aut.ShowGraph(name);
        }
        private static bool[] Precompute(CharSetSolver solver, BDD domain, int precomputeLimit)
        {
            bool[]           precomp = new bool[precomputeLimit + 1];
            Func <int, bool> F       = i =>
            {
                var bdd = solver.MkCharConstraint((char)i);
                if (solver.IsSatisfiable(solver.MkAnd(bdd, domain)))
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            };

            for (int c = 0; c <= precomputeLimit; c++)
            {
                precomp[c] = F(c);
            }
            return(precomp);
        }
Beispiel #15
0
        /// <summary>
        /// Crteate a decision tree that maps a character into a partion block id
        /// </summary>
        /// <param name="solver">character alberbra</param>
        /// <param name="partition">partition of the whole set of all characters into pairwise disjoint nonempty sets</param>
        /// <param name="precomputeLimit">upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)</param>
        /// <returns></returns>
        internal static DecisionTree Create(CharSetSolver solver, BDD[] partition, ushort precomputeLimit = 0xFF)
        {
            if (partition.Length == 1)
            {
                //there is no actual partition, everything maps to one id 0, e.g. as in .*
                return(new DecisionTree(new int[(int)precomputeLimit], new BST(0, null, null)));
            }

            if (precomputeLimit == 0)
            {
                return(new DecisionTree(new int[] { }, MkBST(new PartitionCut(solver, partition), 0, 0xFFFF)));
            }

            int[] precomp = Precompute(solver, partition, precomputeLimit);
            BST   bst     = null;

            if (precomputeLimit < ushort.MaxValue)
            {
                bst = MkBST(new PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue);
            }

            return(new DecisionTree(precomp, bst));
        }
Beispiel #16
0
        internal static SymbolicRegexNode <BDD> ConvertToSymbolicRegexBDD(this Regex regex, CharSetSolver css, bool simplify = true)
        {
            var sr_bdd = css.RegexConverter.ConvertToSymbolicRegex(regex, true);

            if (simplify)
            {
                sr_bdd = sr_bdd.Simplify();
            }
            return(sr_bdd);
        }
Beispiel #17
0
 internal PartitionCut(CharSetSolver solver, BDD[] blocks)
 {
     this.blocks = blocks;
     this.solver = solver;
 }
Beispiel #18
0
 public RegexAlgebra(CharSetSolver solver) : base(solver)
 {
     this.solver = solver;
 }
Beispiel #19
0
 public RegexToAutomatonConverterCharSet(CharSetSolver solver) : base(solver, new UnicodeCategoryToCharSetProvider(solver))
 {
     this.bddBuilder = solver;
     this.chooser    = new Chooser();
 }
Beispiel #20
0
 /// <summary>
 /// Sets the value of the static Context field to null and allows the solver to be garbage collected.
 /// </summary>
 public static void ResetContext(this Regex regex)
 {
     context = null;
 }
        internal static string ToRegexCharSet(BDD label, IUnicodeCategoryTheory <BDD> categorizer, CharSetSolver solver)
        {
            if (categorizer.CategoryCondition(8) == label)
            {
                return(@"\d");
            }
            if (solver.MkNot(categorizer.CategoryCondition(8)) == label)
            {
                return(@"\D");
            }
            if (categorizer.WordLetterCondition == label)
            {
                return(@"\w");
            }
            if (solver.MkNot(categorizer.WordLetterCondition) == label)
            {
                return(@"\W");
            }
            if (categorizer.WhiteSpaceCondition == label)
            {
                return(@"\s");
            }
            if (solver.MkNot(categorizer.WhiteSpaceCondition) == label)
            {
                return(@"\S");
            }
            for (int i = 0; i < categorizer.UnicodeCategoryStandardAbbreviations.Length; i++)
            {
                if (categorizer.CategoryCondition(i) == label)
                {
                    return(@"\P{" + categorizer.UnicodeCategoryStandardAbbreviations[i] + "}");
                }
            }

            var ranges = solver.ToRanges(label);

            if (ranges.Length == 1 && ranges[0].Item1 == ranges[0].Item2)
            {
                return(StringUtility.Escape((char)ranges[0].Item1));
            }

            var res = new StringBuilder("[");

            for (int i = 0; i < ranges.Length; i++)
            {
                var range = ranges[i];
                if (range.Item1 == range.Item2)
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                }
                else if (range.Item1 == range.Item2 - 1)
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
                }
                else
                {
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
                    res.Append("-");
                    res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
                }
            }
            res.Append("]");
            return(res.ToString());
        }
 public UnicodeCategoryToCharSetProvider(CharSetSolver solver)
 {
     this.solver = solver;
     InitializeUnicodeCategoryDefinitions();
 }
Beispiel #23
0
 /// <summary>
 /// Based on paper
 /// Order-n correction for regular langauges, http://dl.acm.org/citation.cfm?id=360995
 /// </summary>
 /// <param name="str">input string</param>
 /// <param name="automaton">dfa for which you want to compute the distance</param>
 /// <param name="solver">character solver</param>
 /// <param name="distance">outputs the distance</param>
 /// <returns>the closest string to str in automaton</returns>
 public static string GetClosestElement(string str, Automaton <BDD> automaton, CharSetSolver solver, out int distance, bool checkDeterminism = true)
 {
     return(GetClosestElement(str, automaton, solver, automaton.StateCount, out distance, checkDeterminism));
 }
Beispiel #24
0
        /// <summary>
        /// Based on paper
        /// Order-n correction for regular langauges, http://dl.acm.org/citation.cfm?id=360995
        /// </summary>
        /// <param name="str">input string</param>
        /// <param name="automaton">dfa for which you want to compute the distance</param>
        /// <param name="solver">character solver</param>
        /// <param name="bound">depth of search for max string insertion</param>
        /// <param name="distance">outputs the distance</param>
        /// <returns>the closest string to str in automaton</returns>
        public static string GetClosestElement(string str, Automaton <BDD> automaton, CharSetSolver solver, int bound, out int distance, bool checkDeterminism = true)
        {
            //bound = Math.Min(bound, str.Length);

            var input = str.ToCharArray();
            var chars = new HashSet <char>(input);
            var maxl  = input.Length + 1;

            if (automaton.IsEmpty)
            {
                throw new AutomataException("automaton must be nonempty");
            }
            if (checkDeterminism && !automaton.IsDeterministic)
            {
                throw new AutomataException("automaton must be deterministic");
            }

            //Compute P(T,S) L(T,S,c)
            var lstates = automaton.States.ToList();

            lstates.Sort();
            var states  = lstates.ToArray();
            var stToInd = new Dictionary <int, int>(states.Length + 1);

            for (int i = 0; i < states.Length; i++)
            {
                stToInd[states[i]] = i;
            }

            var Pold = new int[states.Length, states.Length];
            var P1   = new bool[states.Length, states.Length]; //Records the transition relation
            var Pnew = new int[states.Length, states.Length];
            var Lold = new Dictionary <char, bool[, ]>();
            var Lnew = new Dictionary <char, bool[, ]>();

            #region Initialize P L
            foreach (var c in chars)
            {
                Lold[c] = new bool[states.Length, states.Length];
                Lnew[c] = new bool[states.Length, states.Length];
            }
            foreach (var stT in automaton.States)
            {
                var T = stToInd[stT];
                foreach (var stS in automaton.States)
                {
                    var S = stToInd[stS];
                    if (T == S)
                    {
                        Pold[S, T] = 0;
                        char wit;
                        P1[S, T] = MoveFromStoT(stS, stT, automaton, solver, out wit);
                        foreach (var c in chars)
                        {
                            if (P1[S, T] && MoveFromStoTContainsC(c, stS, stT, automaton, solver))
                            {
                                Lold[c][S, T] = true;
                            }
                            else
                            {
                                Lold[c][S, T] = false;
                            }
                        }
                    }
                    else
                    {
                        char wit;
                        if (MoveFromStoT(stS, stT, automaton, solver, out wit))
                        {
                            Pold[S, T] = 1;
                            P1[S, T]   = true;
                            foreach (var c in chars)
                            {
                                if (MoveFromStoTContainsC(c, stS, stT, automaton, solver))
                                {
                                    Lold[c][S, T] = true;
                                }
                                else
                                {
                                    Lold[c][S, T] = false;
                                }
                            }
                        }
                        else
                        {
                            Pold[S, T] = int.MaxValue;
                            P1[S, T]   = false;
                            foreach (var c in chars)
                            {
                                Lold[c][S, T] = false;
                            }
                        }
                    }
                }
            }
            #endregion
            //solver.ShowGraph(automaton,"as");

            //Inductive step
            for (int k = 1; k <= bound; k++)
            {
                foreach (var stT in automaton.States)
                {
                    var T = stToInd[stT];
                    foreach (var stS in automaton.States)
                    {
                        var S = stToInd[stS];

                        if (Pold[S, T] == int.MaxValue)
                        {
                            bool found = false;
                            foreach (var move in automaton.GetMovesFrom(stS))
                            {
                                var stk = move.TargetState;
                                var K   = stToInd[stk];
                                if (Pold[K, T] != int.MaxValue)
                                {
                                    if (P1[S, K])
                                    {
                                        found      = true;
                                        Pnew[S, T] = Pold[K, T] + 1;
                                        foreach (var c in chars)
                                        {
                                            Lnew[c][S, T] = Lold[c][K, T] || solver.IsSatisfiable(solver.MkAnd(move.Label, solver.MkCharConstraint(c)));
                                        }
                                    }
                                }
                            }
                            if (!found)
                            {
                                Pnew[S, T] = Pold[S, T];
                                foreach (var c in chars)
                                {
                                    Lnew[c][S, T] = Lold[c][S, T];
                                }
                            }
                        }
                        else
                        {
                            Pnew[S, T] = Pold[S, T];
                            foreach (var c in chars)
                            {
                                Lnew[c][S, T] = Lold[c][S, T];
                            }
                        }
                    }
                }
                Pold = Pnew;
                Pnew = new int[states.Length, states.Length];
                foreach (var c in chars)
                {
                    Lold[c] = Lnew[c];
                }

                Lnew = new Dictionary <char, bool[, ]>();
                foreach (var c in chars)
                {
                    Lnew[c] = new bool[states.Length, states.Length];
                }
            }

            //Initialize table for value 0
            Pair <int, int>[,] F = new Pair <int, int> [maxl, automaton.StateCount];
            foreach (var st in automaton.States)
            {
                var T = stToInd[st];
                if (st == automaton.InitialState)
                {
                    F[0, T] = new Pair <int, int>(0, -1);
                }
                else
                {
                    F[0, T] = new Pair <int, int>(int.MaxValue, -1);
                }
            }

            //solver.ShowGraph(automaton,"aa");
            //Dynamic programming loop
            List <int> stateList = new List <int>();
            for (int j = 1; j < maxl; j++)
            {
                var aj = input[j - 1];
                foreach (var stT in automaton.States)
                {
                    var T     = stToInd[stT];
                    int min   = int.MaxValue;
                    int minSt = -1;
                    foreach (var stS in automaton.States)
                    {
                        var S = stToInd[stS];

                        var pts = Pold[S, T];
                        if (pts != int.MaxValue)
                        {
                            var ltsc  = Lold[aj][S, T] ? 1 : 0;
                            int vts   = pts == 0 ? 1 - ltsc : pts - ltsc;
                            var fjm1t = F[j - 1, S];
                            int expr  = fjm1t.First + vts;

                            if (fjm1t.First == int.MaxValue || vts == int.MaxValue)
                            {
                                expr = int.MaxValue;
                            }
                            else
                            if (expr <= min)
                            {
                                min   = expr;
                                minSt = S;
                                if (min == 0)
                                {
                                    break;
                                }
                            }
                        }
                    }
                    F[j, T] = new Pair <int, int>(min, minSt);
                }
            }

            //Iteration over final states
            int minAcc   = int.MaxValue;
            int minState = -1;
            foreach (var st in automaton.GetFinalStates())
            {
                var S = stToInd[st];
                if (F[input.Length, S].First < minAcc)
                {
                    minAcc   = F[input.Length, S].First;
                    minState = F[input.Length, S].Second;
                    minState = S;
                }
            }
            var minString = "";
            int curr      = minState;
            int strindex  = input.Length;
            while (strindex > 0)
            {
                var f  = F[strindex, curr];
                var aj = input[strindex - 1];

                var    pts  = Pold[f.Second, curr];
                var    ltsc = Lold[aj][f.Second, curr] ? 1 : 0;
                string vts  = pts == 0 ? ((ltsc == 1)? aj.ToString():"") : ((ltsc == 1) ? ShortStringStoTwithC(aj, states[f.Second], states[curr], automaton, bound, solver) : ShortStringStoT(states[f.Second], states[curr], automaton, bound, solver));

                minString = vts + minString;

                curr = f.Second;
                strindex--;
            }

            distance = minAcc;
            return(minString);
        }
Beispiel #25
0
 static Regex()
 {
     solver    = new CharSetSolver();
     converter = new RegexToAutomatonConverter <BDD>(solver);
 }
Beispiel #26
0
        static internal RegexAutomaton Create(CharSetSolver solver, Regex regex)
        {
            int t0       = System.Environment.TickCount;
            var nfa      = solver.Convert(regex.ToString(), regex.Options);
            var minterms = GetMinterms(nfa);
            //create specialized BV algebra for this particular minterm partition
            var bvsolver = new BVAlgebra(solver, minterms);
            //convert the nfa to the specialized algebra
            var nfa_BV = nfa.ReplaceAlgebra <BV>(bvsolver.MapPredToBV, bvsolver);

            t0 = System.Environment.TickCount - t0;
            int t    = System.Environment.TickCount;
            var nfa1 = nfa.Minimize();//.RemoveEpsilons().Minimize();
            var dfa  = nfa1.Determinize().Minimize().Normalize();

            t = System.Environment.TickCount - t;
            int t_BV       = System.Environment.TickCount;
            var nfa1_BV    = nfa_BV;//.RemoveEpsilons().Minimize();
            var dfa_BV     = nfa1_BV.Minimize().Determinize();
            var dfa_BV_min = dfa_BV.Minimize().Normalize();

            t_BV = System.Environment.TickCount - t_BV;
            //number of states
            var N = dfa.StateCount;
            //number of symbols
            int t2                = System.Environment.TickCount;
            var K                 = minterms.Length;
            var isfinalstate      = new HashSet <int>();
            var nonfinalSinkstate = -1;
            var finalSinkstate    = -1;

            for (int q = 0; q < N; q++)
            {
                if (dfa.IsFinalState(q))
                {
                    isfinalstate.Add(q);
                }
                if (dfa.IsLoopState(q) && dfa.GetMovesCountFrom(q) == 1)
                {
                    //there can only be at most one of each because dfa is minimal
                    if (dfa.IsFinalState(q))
                    {
                        if (finalSinkstate != -1)
                        {
                            throw new AutomataException(AutomataExceptionKind.InternalError_RegexAutomaton);
                        }
                        finalSinkstate = q;
                    }
                    else
                    {
                        if (nonfinalSinkstate != -1)
                        {
                            throw new AutomataException(AutomataExceptionKind.InternalError_RegexAutomaton);
                        }
                        nonfinalSinkstate = q;
                    }
                }
            }
            var delta = new int[K * N];

            for (int q = 0; q < dfa.StateCount; q++)
            {
                int symbols_mapped = 0;
                foreach (var move in dfa.GetMovesFrom(q))
                {
                    for (int a = 0; a < K; a++)
                    {
                        var phi = solver.MkAnd(move.Label, minterms[a]);
                        if (!phi.IsEmpty)
                        {
                            delta[(move.SourceState * K) + a] = move.TargetState;
                            symbols_mapped += 1;
                        }
                    }
                }
                if (symbols_mapped != K)
                {
                    throw new AutomataException(AutomataExceptionKind.InternalError_RegexAutomaton);
                }
            }
            var dt = DecisionTree.Create(solver, minterms);
            var ra = new RegexAutomaton(bvsolver, regex, K, N, delta, isfinalstate, dt, nonfinalSinkstate, finalSinkstate);

            t2 = System.Environment.TickCount - t2;
            return(ra);
        }