Capable of generating random regular expressions, and automata, and also provides a number of very basic unoptimized implementations (*slow) for testing.
// LUCENENET specific - De-nested RandomAcceptedStrings /// <summary> /// Return a random NFA/DFA for testing. </summary> public static Automaton RandomAutomaton(Random random) { // get two random Automata from regexps Automaton a1 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExpSyntax.NONE)).ToAutomaton(); if (random.NextBoolean()) { a1 = BasicOperations.Complement(a1); } Automaton a2 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExpSyntax.NONE)).ToAutomaton(); if (random.NextBoolean()) { a2 = BasicOperations.Complement(a2); } // combine them in random ways switch (random.Next(4)) { case 0: return(BasicOperations.Concatenate(a1, a2)); case 1: return(BasicOperations.Union(a1, a2)); case 2: return(BasicOperations.Intersection(a1, a2)); default: return(BasicOperations.Minus(a1, a2)); } }
public void TestRandomRegexes() { int num = AtLeast(250); for (int i = 0; i < num; i++) { AssertAutomaton((new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExpSyntax.NONE)).ToAutomaton()); } }
public virtual void TestRegexps() { int num = AtLeast(500); for (int i = 0; i < num; i++) { AssertAutomaton((new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE)).ToAutomaton()); } }
public virtual void TestIsFinite() { int num = AtLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.RandomAutomaton(Random); Automaton b = (Automaton)a.Clone(); Assert.AreEqual(AutomatonTestUtil.IsFiniteSlow(a), SpecialOperations.IsFinite(b)); } }
public virtual void Test() { int num = AtLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.RandomAutomaton(Random); Automaton b = (Automaton)a.Clone(); MinimizationOperations.Minimize(b); Assert.IsTrue(BasicOperations.SameLanguage(a, b)); } }
/// <summary> /// Builds a DFA for some string, and checks all Lev automata /// up to some maximum distance. /// </summary> private void AssertLev(string s, int maxDistance) { LevenshteinAutomata builder = new LevenshteinAutomata(s, false); LevenshteinAutomata tbuilder = new LevenshteinAutomata(s, true); Automaton[] automata = new Automaton[maxDistance + 1]; Automaton[] tautomata = new Automaton[maxDistance + 1]; for (int n = 0; n < automata.Length; n++) { automata[n] = builder.ToAutomaton(n); tautomata[n] = tbuilder.ToAutomaton(n); Assert.IsNotNull(automata[n]); Assert.IsNotNull(tautomata[n]); Assert.IsTrue(automata[n].Deterministic); Assert.IsTrue(tautomata[n].Deterministic); Assert.IsTrue(SpecialOperations.IsFinite(automata[n])); Assert.IsTrue(SpecialOperations.IsFinite(tautomata[n])); AutomatonTestUtil.AssertNoDetachedStates(automata[n]); AutomatonTestUtil.AssertNoDetachedStates(tautomata[n]); // check that the dfa for n-1 accepts a subset of the dfa for n if (n > 0) { Assert.IsTrue(automata[n - 1].SubsetOf(automata[n])); Assert.IsTrue(automata[n - 1].SubsetOf(tautomata[n])); Assert.IsTrue(tautomata[n - 1].SubsetOf(automata[n])); Assert.IsTrue(tautomata[n - 1].SubsetOf(tautomata[n])); Assert.AreNotSame(automata[n - 1], automata[n]); } // check that Lev(N) is a subset of LevT(N) Assert.IsTrue(automata[n].SubsetOf(tautomata[n])); // special checks for specific n switch (n) { case 0: // easy, matches the string itself Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), automata[0])); Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), tautomata[0])); break; case 1: // generate a lev1 naively, and check the accepted lang is the same. Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1(s), automata[1])); Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1T(s), tautomata[1])); break; default: AssertBruteForce(s, automata[n], n); AssertBruteForceT(s, tautomata[n], n); break; } } }
public virtual void TestAgainstSimple() { int num = AtLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.RandomAutomaton(Random()); Automaton b = (Automaton)a.Clone(); AutomatonTestUtil.DeterminizeSimple(a); b.Deterministic = false; // force det b.Determinize(); // TODO: more verifications possible? Assert.IsTrue(BasicOperations.SameLanguage(a, b)); } }
public virtual void TestAgainstBrzozowski() { int num = AtLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.RandomAutomaton(Random); AutomatonTestUtil.MinimizeSimple(a); Automaton b = (Automaton)a.Clone(); MinimizationOperations.Minimize(b); Assert.IsTrue(BasicOperations.SameLanguage(a, b)); Assert.AreEqual(a.GetNumberOfStates(), b.GetNumberOfStates()); Assert.AreEqual(a.GetNumberOfTransitions(), b.GetNumberOfTransitions()); } }
public virtual void TestRegexps() { // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. int num = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) ? 100 * RandomMultiplier : AtLeast(1000); for (int i = 0; i < num; i++) { string reg = AutomatonTestUtil.RandomRegexp(Random); if (Verbose) { Console.WriteLine("TEST: regexp=" + reg); } AssertSame(reg); } }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); RandomAcceptedStrings rx = new RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception /*t*/) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } } }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random()); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception t) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw t; } } } }
public int[] GetRandomAcceptedString(Random r) { IList <int?> soFar = new List <int?>(); if (a.IsSingleton) { // accepts only one var s = a.Singleton; int charUpto = 0; while (charUpto < s.Length) { int cp = s.CodePointAt(charUpto); charUpto += Character.CharCount(cp); soFar.Add(cp); } } else { var s = a.initial; while (true) { if (s.accept) { if (s.numTransitions == 0) { // stop now break; } else { if (r.NextBoolean()) { break; } } } if (s.numTransitions == 0) { throw new Exception("this automaton has dead states"); } bool cheat = r.NextBoolean(); Transition t; if (cheat) { // pick a transition that we know is the fastest // path to an accept state IList <Transition> toAccept = new List <Transition>(); for (int i = 0; i < s.numTransitions; i++) { Transition t0 = s.TransitionsArray[i]; if (leadsToAccept.ContainsKey(t0)) { toAccept.Add(t0); } } if (toAccept.Count == 0) { // this is OK -- it means we jumped into a cycle t = s.TransitionsArray[r.Next(s.numTransitions)]; } else { t = toAccept[r.Next(toAccept.Count)]; } } else { t = s.TransitionsArray[r.Next(s.numTransitions)]; } soFar.Add(AutomatonTestUtil.GetRandomCodePoint(r, t)); s = t.to; } } return(ArrayUtil.ToInt32Array(soFar)); }
public virtual void TestNoWastedStates() { AutomatonTestUtil.AssertNoDetachedStates((new LevenshteinAutomata("abc", false)).ToAutomaton(1)); }