public static IEnumerable <Regexp> SynthesizeRegexp(HashSet <char> alphabet, Automaton <BDD> dfa, CharSetSolver s, StringBuilder sb, long timeout) { using (System.IO.StreamWriter file = new System.IO.StreamWriter(@"..\..\..\regexpenum.txt")) { solver = s; numStates = dfa.StateCount; alph = alphabet; #region test variables StringBuilder sb1 = new StringBuilder(); int lim = 0; Stopwatch membershipTimer = new Stopwatch(); Stopwatch equivTimer = new Stopwatch(); timer = new Stopwatch(); timer.Start(); #endregion #region TestSets for equiv var mytests = DFAUtilities.MyHillTestGeneration(alphabet, dfa, solver); var posMN = mytests.First; var negMN = mytests.Second; var tests = DFAUtilities.GetTestSets(dfa, alphabet, solver); var positive = tests.First; var negative = tests.Second; foreach (var t in posMN) { positive.Remove(t); } foreach (var t in negMN) { negative.Remove(t); } #endregion #region Sigma Star bool fst = true; foreach (var c in alph) { if (fst) { fst = false; sigmaStar = new RELabel(c); } else { sigmaStar = new REUnion(sigmaStar, new RELabel(c)); } } sigmaPlus = new REPlus(sigmaStar); sigmaStar = new REStar(sigmaStar); #endregion #region Accessories vars maxWidthC = 0; maxSigmaStarC = 0; var isSubset = true; HashSet <string> visited = new HashSet <string>(); HashSet <string> newReg = new HashSet <string>(); currUnionEls = new Dictionary <string, Automaton <BDD> >(); memoDfa = new Dictionary <string, Automaton <BDD> >(); List <Regexp> subsetReg = new List <Regexp>(); #endregion for (maxWidth = 1; true; maxWidth++) { newReg = new HashSet <string>(); maxSigmaStar = 2; foreach (var regexp in EnumerateRegexp()) { #region run for at most timeout if (timer.ElapsedMilliseconds > timeout) { sb.AppendLine("| Timeout"); timer.Stop(); yield break; } #endregion var re = regexp.Normalize(); if (!(visited.Contains(re.ToString()))) { visited.Add(re.ToString()); sb1 = new StringBuilder(); sb1.Append(re.ToString()); file.WriteLine(sb1); lim++; #region Membership test membershipTimer.Start(); isSubset = CorrectOnNegSet(regexp, negMN); membershipTimer.Stop(); #endregion #region equivalence check if (isSubset) { membershipTimer.Start(); if (CorrectOnNegSet(regexp, negative)) { if (CorrectOnPosSet(regexp, posMN) && CorrectOnPosSet(regexp, positive)) { membershipTimer.Stop(); equivTimer.Start(); var rDfa = getDfa(regexp); memoDfa[regexp.ToString()] = rDfa; if (rDfa.IsEquivalentWith(dfa, solver)) { isSubset = false; equivTimer.Stop(); timer.Stop(); sb.Append("| "); regexp.ToString(sb); sb.AppendLine("|"); sb.AppendLine(string.Format("| elapsed time: \t {0} ms", timer.ElapsedMilliseconds)); sb.AppendLine(string.Format("| equivalence cost:\t {0} ms", equivTimer.ElapsedMilliseconds)); sb.AppendLine(string.Format("| membership cost: \t {0} ms", membershipTimer.ElapsedMilliseconds)); sb.AppendLine(string.Format("| attempts: \t {0}", lim)); yield return(regexp); } else { Console.WriteLine("used dfa"); equivTimer.Stop(); } } else { membershipTimer.Stop(); } } else { membershipTimer.Stop(); isSubset = false; } } #endregion //#region Subsets //if (isSubset) //{ // foreach (var reg1 in subsetReg) // { // var union = (reg1.CompareTo(regexp) > 0) ? (new REUnion(reg1, regexp)) : (new REUnion(regexp, reg1)); // visited.Add(union.ToString()); // sb1 = new StringBuilder(); // sb1.Append(union + " From union"); // file.WriteLine(sb1); // lim++; // membershipTimer.Start(); // if (CorrectOnPosSet(union, posMN) && CorrectOnPosSet(union, positive)) // { // membershipTimer.Stop(); // equivTimer.Start(); // var rDfa = getDfa(union); // memoDfa[union.ToString()] = rDfa; // if (rDfa.IsEquivalentWith(dfa, solver)) // { // equivTimer.Stop(); // timer.Stop(); // sb.Append("| "); // union.ToString(sb); // sb.AppendLine("|"); // sb.AppendLine(string.Format("| elapsed time: \t {0} ms", timer.ElapsedMilliseconds)); // sb.AppendLine(string.Format("| equivalence cost:\t {0} ms", equivTimer.ElapsedMilliseconds)); // sb.AppendLine(string.Format("| membership cost: \t {0} ms", membershipTimer.ElapsedMilliseconds)); // sb.AppendLine(string.Format("| attempts: \t {0}", lim)); // yield return union; // } // else // { // Console.WriteLine("used dfa"); // equivTimer.Stop(); // } // } // else // { // membershipTimer.Stop(); // } // } // subsetReg.Add(regexp); //} //#endregion } } visited = new HashSet <string>(visited.Union(newReg)); } } }
/// <summary> /// Finds min edit distance script between DFAs if operation /// takes less than timeout ms /// </summary> /// <param name="dfa1"></param> /// <param name="dfa2"></param> /// <param name="al"></param> /// <param name="solver"></param> /// <param name="timeout"></param> /// <param name="sb"></param> /// <returns></returns> public static DFAEditScript GetDFAOptimalEdit( // copy Automaton <BDD> dfa1, Automaton <BDD> dfa2, HashSet <char> al, CharSetSolver solver, long timeout, StringBuilder sb) { //Contract.Assert(dfa1.IsDeterministic); //Contract.Assert(dfa2.IsDeterministic); DFAEditScript editScript = new DFAEditScript(); #region Add states to dfa2 to make it at least as dfa1 BDD fullAlphabetCondition = BDDOf(al, solver); //Normalize the DFA giving only names from 0 to |States|-1 var normDfaPair = DFAUtilities.normalizeDFA(dfa2); var dfa2augmented = normDfaPair.First; //solver.SaveAsDot(dfa2augmented, "aaaa"); var stateNamesMapping = normDfaPair.Second; //Add states to make dfa2 have the |dfa2.States|>= |dfa1.States| var newMoves = new List <Move <BDD> >(dfa2augmented.GetMoves()); for (int i = 1; i <= dfa1.StateCount - dfa2augmented.StateCount; i++) { int newStateName = dfa2augmented.MaxState + i; //Pick the next available name to be added stateNamesMapping[newStateName] = dfa2.MaxState + i; //save the operation in the script editScript.script.Insert(0, new DFAAddState(dfa2.MaxState + i)); newMoves.Add(new Move <BDD>(newStateName, newStateName, fullAlphabetCondition)); newStateName++; } //Create the new DFA with the added states dfa2augmented = Automaton <BDD> .Create(dfa2augmented.InitialState, dfa2augmented.GetFinalStates().ToList(), newMoves); #endregion int maxScore = (dfa1.StateCount + dfa2augmented.StateCount) * (al.Count + 1); int oldScirptSize = editScript.script.Count; //Start with the internal script equals to null, at the end bestScript.Script will contain the best script DFAEditScript bestScript = new DFAEditScript(); bestScript.script = null; Stopwatch sw = new Stopwatch(); sw.Start(); // Iteratively check if there exists an edit of a given depth for (int depth = 1; true; depth++) { var editList = new List <DFAEdit>(); if (GetDFAEditScriptTimeout( dfa1, dfa2augmented, al, solver, new List <long>(), editScript.script, depth, timeout, sw, DFAUtilities.MyHillTestGeneration(al, dfa1, solver), DFADensity.GetDFADensity(dfa1, al, solver), editScript.GetCost(), bestScript, stateNamesMapping)) { // if hits timeout break and return null break; } if (bestScript.script != null) { bestScript.script.Reverse(); sw.Stop(); return(bestScript); } } sw.Stop(); return(null); }