private static void AssertAutomaton(Automaton a) { Automaton clone = (Automaton)a.Clone(); // complement(complement(a)) = a Automaton equivalent = BasicOperations.Complement(BasicOperations.Complement(a)); Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent)); // a union a = a equivalent = BasicOperations.Union(a, clone); Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent)); // a intersect a = a equivalent = BasicOperations.Intersection(a, clone); Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent)); // a minus a = empty Automaton empty = BasicOperations.Minus(a, clone); Assert.IsTrue(BasicOperations.IsEmpty(empty)); // as long as don't accept the empty string // then optional(a) - empty = a if (!BasicOperations.Run(a, "")) { //System.out.println("test " + a); Automaton optional = BasicOperations.Optional(a); //System.out.println("optional " + optional); equivalent = BasicOperations.Minus(optional, BasicAutomata.MakeEmptyString()); //System.out.println("equiv " + equivalent); Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent)); } }
public virtual void TestEmptyLanguageConcatenate() { Automaton a = BasicAutomata.MakeString("a"); Automaton concat = BasicOperations.Concatenate(a, BasicAutomata.MakeEmpty()); Assert.IsTrue(BasicOperations.IsEmpty(concat)); }
/// <summary> /// Returns an automaton that accepts the union of the languages of the given /// automata. /// <para/> /// Complexity: linear in number of states. /// </summary> public static Automaton Union(ICollection <Automaton> l) { JCG.HashSet <int> ids = new JCG.HashSet <int>(); foreach (Automaton a in l) { ids.Add(a.GetHashCode()); } bool has_aliases = ids.Count != l.Count; State s = new State(); foreach (Automaton b in l) { if (BasicOperations.IsEmpty(b)) { continue; } Automaton bb = b; if (has_aliases) { bb = bb.CloneExpanded(); } else { bb = bb.CloneExpandedIfRequired(); } s.AddEpsilon(bb.initial); } Automaton a_ = new Automaton { initial = s, deterministic = false }; //a.clearHashCode(); a_.ClearNumberedStates(); a_.CheckMinimizeAlways(); return(a_); }
/// <summary> /// Returns a (deterministic) automaton that accepts the intersection of the /// language of <paramref name="a1"/> and the complement of the language of /// <paramref name="a2"/>. As a side-effect, the automata may be determinized, if not /// already deterministic. /// <para/> /// Complexity: quadratic in number of states (if already deterministic). /// </summary> public static Automaton Minus(Automaton a1, Automaton a2) { if (BasicOperations.IsEmpty(a1) || a1 == a2) { return(BasicAutomata.MakeEmpty()); } if (BasicOperations.IsEmpty(a2)) { return(a1.CloneIfRequired()); } if (a1.IsSingleton) { if (BasicOperations.Run(a2, a1.singleton)) { return(BasicAutomata.MakeEmpty()); } else { return(a1.CloneIfRequired()); } } return(Intersection(a1, a2.Complement())); }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); RandomAcceptedStrings rx = new RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception /*t*/) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } } }
public virtual void TestGetRandomAcceptedString() { int ITER1 = AtLeast(100); int ITER2 = AtLeast(100); for (int i = 0; i < ITER1; i++) { RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE); Automaton a = re.ToAutomaton(); Assert.IsFalse(BasicOperations.IsEmpty(a)); AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a); for (int j = 0; j < ITER2; j++) { int[] acc = null; try { acc = rx.GetRandomAcceptedString(Random()); string s = UnicodeUtil.NewString(acc, 0, acc.Length); Assert.IsTrue(BasicOperations.Run(a, s)); } catch (Exception t) { Console.WriteLine("regexp: " + re); if (acc != null) { Console.WriteLine("fail acc re=" + re + " count=" + acc.Length); for (int k = 0; k < acc.Length; k++) { Console.WriteLine(" " + acc[k].ToString("x")); } } throw t; } } } }
public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify) { if (simplify) { // Test whether the automaton is a "simple" form and // if so, don't create a runAutomaton. Note that on a // large automaton these tests could be costly: if (BasicOperations.IsEmpty(automaton)) { // matches nothing Type = AUTOMATON_TYPE.NONE; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.IsTotal(automaton)) { // matches all possible strings Type = AUTOMATON_TYPE.ALL; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else { string commonPrefix; string singleton; if (automaton.Singleton == null) { commonPrefix = SpecialOperations.GetCommonPrefix(automaton); if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix))) { singleton = commonPrefix; } else { singleton = null; } } else { commonPrefix = null; singleton = automaton.Singleton; } if (singleton != null) { // matches a fixed string in singleton or expanded // representation Type = AUTOMATON_TYPE.SINGLE; Term = new BytesRef(singleton); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString()))) { // matches a constant prefix Type = AUTOMATON_TYPE.PREFIX; Term = new BytesRef(commonPrefix); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } } } Type = AUTOMATON_TYPE.NORMAL; Term = null; if (finite == null) { this.Finite = SpecialOperations.IsFinite(automaton); } else { this.Finite = finite; } Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton); if (this.Finite == true) { CommonSuffixRef = null; } else { CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8); } RunAutomaton = new ByteRunAutomaton(utf8, true); sortedTransitions = utf8.GetSortedTransitions(); }
/// <summary> /// Returns an automaton that accepts the concatenation of the languages of the /// given automata. /// <para/> /// Complexity: linear in total number of states. /// </summary> public static Automaton Concatenate(IList <Automaton> l) { if (l.Count == 0) { return(BasicAutomata.MakeEmptyString()); } bool all_singleton = true; foreach (Automaton a in l) { if (!a.IsSingleton) { all_singleton = false; break; } } if (all_singleton) { StringBuilder b = new StringBuilder(); foreach (Automaton a in l) { b.Append(a.singleton); } return(BasicAutomata.MakeString(b.ToString())); } else { foreach (Automaton a in l) { if (BasicOperations.IsEmpty(a)) { return(BasicAutomata.MakeEmpty()); } } HashSet <int> ids = new HashSet <int>(); foreach (Automaton a in l) { ids.Add(a.GetHashCode()); } bool has_aliases = ids.Count != l.Count; Automaton b = l[0]; if (has_aliases) { b = b.CloneExpanded(); } else { b = b.CloneExpandedIfRequired(); } ISet <State> ac = b.GetAcceptStates(); bool first = true; foreach (Automaton a in l) { if (first) { first = false; } else { if (a.IsEmptyString) { continue; } Automaton aa = a; if (has_aliases) { aa = aa.CloneExpanded(); } else { aa = aa.CloneExpandedIfRequired(); } ISet <State> ns = aa.GetAcceptStates(); foreach (State s in ac) { s.accept = false; s.AddEpsilon(aa.initial); if (s.accept) { ns.Add(s); } } ac = ns; } } b.deterministic = false; //b.clearHashCode(); b.ClearNumberedStates(); b.CheckMinimizeAlways(); return(b); } }