private static Automaton NaiveUnion(IList <BytesRef> strings) { Automaton[] eachIndividual = new Automaton[strings.Count]; int i = 0; foreach (BytesRef bref in strings) { eachIndividual[i++] = BasicAutomata.MakeString(bref.Utf8ToString()); } return(BasicOperations.Union(eachIndividual)); }
public virtual void TestSingletonNFAConcatenate() { Automaton singleton = BasicAutomata.MakeString("prefix"); Automaton expandedSingleton = singleton.CloneExpanded(); // an NFA (two transitions for 't' from initial state) Automaton nfa = BasicOperations.Union(BasicAutomata.MakeString("this"), BasicAutomata.MakeString("three")); Automaton concat = BasicOperations.Concatenate(singleton, nfa); Assert.IsFalse(concat.IsDeterministic); Assert.IsTrue(BasicOperations.SameLanguage(BasicOperations.Concatenate(expandedSingleton, nfa), concat)); }
public virtual void TestSingleton() { Automaton singleton = BasicAutomata.MakeString("foobar"); Automaton expandedSingleton = singleton.CloneExpanded(); Assert.IsTrue(BasicOperations.SameLanguage(singleton, expandedSingleton)); singleton = BasicAutomata.MakeString("\ud801\udc1c"); expandedSingleton = singleton.CloneExpanded(); Assert.IsTrue(BasicOperations.SameLanguage(singleton, expandedSingleton)); }
public virtual void Test() { int num = AtLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.RandomAutomaton(Random); Automaton b = (Automaton)a.Clone(); MinimizationOperations.Minimize(b); Assert.IsTrue(BasicOperations.SameLanguage(a, b)); } }
/// <summary> /// Builds a DFA for some string, and checks all Lev automata /// up to some maximum distance. /// </summary> private void AssertLev(string s, int maxDistance) { LevenshteinAutomata builder = new LevenshteinAutomata(s, false); LevenshteinAutomata tbuilder = new LevenshteinAutomata(s, true); Automaton[] automata = new Automaton[maxDistance + 1]; Automaton[] tautomata = new Automaton[maxDistance + 1]; for (int n = 0; n < automata.Length; n++) { automata[n] = builder.ToAutomaton(n); tautomata[n] = tbuilder.ToAutomaton(n); Assert.IsNotNull(automata[n]); Assert.IsNotNull(tautomata[n]); Assert.IsTrue(automata[n].Deterministic); Assert.IsTrue(tautomata[n].Deterministic); Assert.IsTrue(SpecialOperations.IsFinite(automata[n])); Assert.IsTrue(SpecialOperations.IsFinite(tautomata[n])); AutomatonTestUtil.AssertNoDetachedStates(automata[n]); AutomatonTestUtil.AssertNoDetachedStates(tautomata[n]); // check that the dfa for n-1 accepts a subset of the dfa for n if (n > 0) { Assert.IsTrue(automata[n - 1].SubsetOf(automata[n])); Assert.IsTrue(automata[n - 1].SubsetOf(tautomata[n])); Assert.IsTrue(tautomata[n - 1].SubsetOf(automata[n])); Assert.IsTrue(tautomata[n - 1].SubsetOf(tautomata[n])); Assert.AreNotSame(automata[n - 1], automata[n]); } // check that Lev(N) is a subset of LevT(N) Assert.IsTrue(automata[n].SubsetOf(tautomata[n])); // special checks for specific n switch (n) { case 0: // easy, matches the string itself Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), automata[0])); Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), tautomata[0])); break; case 1: // generate a lev1 naively, and check the accepted lang is the same. Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1(s), automata[1])); Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1T(s), tautomata[1])); break; default: AssertBruteForce(s, automata[n], n); AssertBruteForceT(s, tautomata[n], n); break; } } }
/// <summary> /// Return an automaton that accepts all 1-character insertions, deletions, and /// substitutions of s. /// </summary> private Automaton NaiveLev1(string s) { Automaton a = BasicAutomata.MakeString(s); a = BasicOperations.Union(a, InsertionsOf(s)); MinimizationOperations.Minimize(a); a = BasicOperations.Union(a, DeletionsOf(s)); MinimizationOperations.Minimize(a); a = BasicOperations.Union(a, SubstitutionsOf(s)); MinimizationOperations.Minimize(a); return(a); }
public virtual void TestEmptySingletonConcatenate() { Automaton singleton = BasicAutomata.MakeString(""); Automaton expandedSingleton = singleton.CloneExpanded(); Automaton other = BasicAutomata.MakeCharRange('5', '7'); Automaton concat1 = BasicOperations.Concatenate(expandedSingleton, other); Automaton concat2 = BasicOperations.Concatenate(singleton, other); Assert.IsTrue(concat2.IsDeterministic); Assert.IsTrue(BasicOperations.SameLanguage(concat1, concat2)); Assert.IsTrue(BasicOperations.SameLanguage(other, concat1)); Assert.IsTrue(BasicOperations.SameLanguage(other, concat2)); }
public virtual void TestAgainstBrzozowski() { int num = AtLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.RandomAutomaton(Random); AutomatonTestUtil.MinimizeSimple(a); Automaton b = (Automaton)a.Clone(); MinimizationOperations.Minimize(b); Assert.IsTrue(BasicOperations.SameLanguage(a, b)); Assert.AreEqual(a.GetNumberOfStates(), b.GetNumberOfStates()); Assert.AreEqual(a.GetNumberOfTransitions(), b.GetNumberOfTransitions()); } }
public virtual void TestAgainstSimple() { int num = AtLeast(200); for (int i = 0; i < num; i++) { Automaton a = AutomatonTestUtil.RandomAutomaton(Random()); Automaton b = (Automaton)a.Clone(); AutomatonTestUtil.DeterminizeSimple(a); b.Deterministic = false; // force det b.Determinize(); // TODO: more verifications possible? Assert.IsTrue(BasicOperations.SameLanguage(a, b)); } }
public virtual void TestStringUnion() { List <BytesRef> strings = new List <BytesRef>(); for (int i = RandomInts.RandomInt32Between(Random, 0, 1000); --i >= 0;) { strings.Add(new BytesRef(TestUtil.RandomUnicodeString(Random))); } strings.Sort(); Automaton union = BasicAutomata.MakeStringUnion(strings); Assert.IsTrue(union.IsDeterministic); Assert.IsTrue(BasicOperations.SameLanguage(union, NaiveUnion(strings))); }
public void AssertLexicon(List<Automaton> a, List<string> terms) { var automata = CollectionsHelper.Shuffle(a); var lex = BasicOperations.Union(automata); lex.Determinize(); Assert.IsTrue(SpecialOperations.IsFinite(lex)); foreach (string s in terms) { Assert.IsTrue(BasicOperations.Run(lex, s)); } var lexByte = new ByteRunAutomaton(lex); foreach (string s in terms) { sbyte[] bytes = s.GetBytes(Encoding.UTF8); Assert.IsTrue(lexByte.Run(bytes, 0, bytes.Length)); } }
public virtual void TestFiniteStrings() { Automaton a = BasicOperations.Union(BasicAutomata.MakeString("dog"), BasicAutomata.MakeString("duck")); MinimizationOperations.Minimize(a); ISet <Int32sRef> strings = SpecialOperations.GetFiniteStrings(a, -1); Assert.AreEqual(2, strings.Count); Int32sRef dog = new Int32sRef(); Util.ToInt32sRef(new BytesRef("dog"), dog); Assert.IsTrue(strings.Contains(dog)); Int32sRef duck = new Int32sRef(); Util.ToInt32sRef(new BytesRef("duck"), duck); Assert.IsTrue(strings.Contains(duck)); }
/// <summary> /// Return an automaton that accepts all 1-character substitutions of s /// (replacing one character) /// </summary> private Automaton SubstitutionsOf(string s) { IList <Automaton> list = new List <Automaton>(); for (int i = 0; i < s.Length; i++) { Automaton au = BasicAutomata.MakeString(s.Substring(0, i)); au = BasicOperations.Concatenate(au, BasicAutomata.MakeAnyChar()); au = BasicOperations.Concatenate(au, BasicAutomata.MakeString(s.Substring(i + 1))); list.Add(au); } Automaton a = BasicOperations.Union(list); MinimizationOperations.Minimize(a); return(a); }
public void AssertLexicon() { Collections.Shuffle(automata, Random()); var lex = BasicOperations.Union(automata); lex.Determinize(); Assert.IsTrue(SpecialOperations.IsFinite(lex)); foreach (string s in terms) { assertTrue(BasicOperations.Run(lex, s)); } var lexByte = new ByteRunAutomaton(lex); foreach (string s in terms) { var bytes = s.GetBytes(Encoding.UTF8); assertTrue(lexByte.Run(bytes, 0, bytes.Length)); } }
/// <summary> /// Returns an automaton that accepts the union of the languages of the given /// automata. /// <para/> /// Complexity: linear in number of states. /// </summary> public static Automaton Union(ICollection <Automaton> l) { JCG.HashSet <int> ids = new JCG.HashSet <int>(); foreach (Automaton a in l) { ids.Add(a.GetHashCode()); } bool has_aliases = ids.Count != l.Count; State s = new State(); foreach (Automaton b in l) { if (BasicOperations.IsEmpty(b)) { continue; } Automaton bb = b; if (has_aliases) { bb = bb.CloneExpanded(); } else { bb = bb.CloneExpandedIfRequired(); } s.AddEpsilon(bb.initial); } Automaton a_ = new Automaton { initial = s, deterministic = false }; //a.clearHashCode(); a_.ClearNumberedStates(); a_.CheckMinimizeAlways(); return(a_); }
/// <summary> /// Returns a (deterministic) automaton that accepts the intersection of the /// language of <paramref name="a1"/> and the complement of the language of /// <paramref name="a2"/>. As a side-effect, the automata may be determinized, if not /// already deterministic. /// <para/> /// Complexity: quadratic in number of states (if already deterministic). /// </summary> public static Automaton Minus(Automaton a1, Automaton a2) { if (BasicOperations.IsEmpty(a1) || a1 == a2) { return(BasicAutomata.MakeEmpty()); } if (BasicOperations.IsEmpty(a2)) { return(a1.CloneIfRequired()); } if (a1.IsSingleton) { if (BasicOperations.Run(a2, a1.singleton)) { return(BasicAutomata.MakeEmpty()); } else { return(a1.CloneIfRequired()); } } return(Intersection(a1, a2.Complement())); }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random()); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception t) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw t; } } } }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); RandomAcceptedStrings rx = new RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception /*t*/) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } } }
/// <summary> /// See <see cref="BasicOperations.Repeat(Automaton, int, int)"/>. /// </summary> public virtual Automaton Repeat(int min, int max) { return(BasicOperations.Repeat(this, min, max)); }
private Automaton ToAutomaton(IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider) { IList <Automaton> list; Automaton a = null; switch (kind) { case Kind.REGEXP_UNION: list = new List <Automaton>(); FindLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider); FindLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider); a = BasicOperations.Union(list); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_CONCATENATION: list = new List <Automaton>(); FindLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); FindLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); a = BasicOperations.Concatenate(list); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_INTERSECTION: a = exp1.ToAutomaton(automata, automaton_provider).Intersection(exp2.ToAutomaton(automata, automaton_provider)); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_OPTIONAL: a = exp1.ToAutomaton(automata, automaton_provider).Optional(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT_MIN: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT_MINMAX: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min, max); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_COMPLEMENT: a = exp1.ToAutomaton(automata, automaton_provider).Complement(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_CHAR: a = BasicAutomata.MakeChar(c); break; case Kind.REGEXP_CHAR_RANGE: a = BasicAutomata.MakeCharRange(from, to); break; case Kind.REGEXP_ANYCHAR: a = BasicAutomata.MakeAnyChar(); break; case Kind.REGEXP_EMPTY: a = BasicAutomata.MakeEmpty(); break; case Kind.REGEXP_STRING: a = BasicAutomata.MakeString(s); break; case Kind.REGEXP_ANYSTRING: a = BasicAutomata.MakeAnyString(); break; case Kind.REGEXP_AUTOMATON: Automaton aa = null; if (automata != null) { aa = automata[s]; } if (aa == null && automaton_provider != null) { try { aa = automaton_provider.GetAutomaton(s); } catch (Exception e) when(e.IsIOException()) { throw new ArgumentException(e.ToString(), e); } } if (aa == null) { throw new ArgumentException("'" + s + "' not found"); } a = (Automaton)aa.Clone(); // always clone here (ignore allow_mutate) break; case Kind.REGEXP_INTERVAL: a = BasicAutomata.MakeInterval(min, max, digits); break; } return(a); }
public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify) { if (simplify) { // Test whether the automaton is a "simple" form and // if so, don't create a runAutomaton. Note that on a // large automaton these tests could be costly: if (BasicOperations.IsEmpty(automaton)) { // matches nothing Type = AUTOMATON_TYPE.NONE; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.IsTotal(automaton)) { // matches all possible strings Type = AUTOMATON_TYPE.ALL; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else { string commonPrefix; string singleton; if (automaton.Singleton == null) { commonPrefix = SpecialOperations.GetCommonPrefix(automaton); if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix))) { singleton = commonPrefix; } else { singleton = null; } } else { commonPrefix = null; singleton = automaton.Singleton; } if (singleton != null) { // matches a fixed string in singleton or expanded // representation Type = AUTOMATON_TYPE.SINGLE; Term = new BytesRef(singleton); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString()))) { // matches a constant prefix Type = AUTOMATON_TYPE.PREFIX; Term = new BytesRef(commonPrefix); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } } } Type = AUTOMATON_TYPE.NORMAL; Term = null; if (finite == null) { this.Finite = SpecialOperations.IsFinite(automaton); } else { this.Finite = finite; } Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton); if (this.Finite == true) { CommonSuffixRef = null; } else { CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8); } RunAutomaton = new ByteRunAutomaton(utf8, true); sortedTransitions = utf8.GetSortedTransitions(); }
/// <summary> /// See <see cref="BasicOperations.Union(ICollection{Automaton})"/>. /// </summary> public static Automaton Union(ICollection <Automaton> l) { return(BasicOperations.Union(l)); }
/// <summary> /// See <see cref="BasicOperations.Determinize(Automaton)"/>. /// </summary> public virtual void Determinize() { BasicOperations.Determinize(this); }
/// <summary> /// See <see cref="BasicOperations.Union(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Union(Automaton a) { return(BasicOperations.Union(this, a)); }
/// <summary> /// See <see cref="BasicOperations.SubsetOf(Automaton, Automaton)"/>. /// </summary> public virtual bool SubsetOf(Automaton a) { return(BasicOperations.SubsetOf(this, a)); }
/// <summary> /// See <see cref="BasicOperations.Intersection(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Intersection(Automaton a) { return(BasicOperations.Intersection(this, a)); }
/// <summary> /// See <see cref="BasicOperations.Minus(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Minus(Automaton a) { return(BasicOperations.Minus(this, a)); }
/// <summary> /// See <see cref="BasicOperations.Repeat(Automaton)"/>. /// </summary> public virtual Automaton Repeat() { return(BasicOperations.Repeat(this)); }
/// <summary> /// See <see cref="BasicOperations.Optional(Automaton)"/>. /// </summary> public virtual Automaton Optional() { return(BasicOperations.Optional(this)); }
/// <summary> /// See <see cref="BasicOperations.Complement(Automaton)"/>. /// </summary> public virtual Automaton Complement() { return(BasicOperations.Complement(this)); }