/// <summary> /// Returns an automaton that accepts the union of the languages of the given /// automata. /// <para/> /// Complexity: linear in number of states. /// </summary> public static Automaton Union(ICollection <Automaton> l) { JCG.HashSet <int> ids = new JCG.HashSet <int>(); foreach (Automaton a in l) { ids.Add(a.GetHashCode()); } bool has_aliases = ids.Count != l.Count; State s = new State(); foreach (Automaton b in l) { if (BasicOperations.IsEmpty(b)) { continue; } Automaton bb = b; if (has_aliases) { bb = bb.CloneExpanded(); } else { bb = bb.CloneExpandedIfRequired(); } s.AddEpsilon(bb.initial); } Automaton a_ = new Automaton { initial = s, deterministic = false }; //a.clearHashCode(); a_.ClearNumberedStates(); a_.CheckMinimizeAlways(); return(a_); }
/// <summary> /// Returns a (deterministic) automaton that accepts the intersection of the /// language of <paramref name="a1"/> and the complement of the language of /// <paramref name="a2"/>. As a side-effect, the automata may be determinized, if not /// already deterministic. /// <para/> /// Complexity: quadratic in number of states (if already deterministic). /// </summary> public static Automaton Minus(Automaton a1, Automaton a2) { if (BasicOperations.IsEmpty(a1) || a1 == a2) { return(BasicAutomata.MakeEmpty()); } if (BasicOperations.IsEmpty(a2)) { return(a1.CloneIfRequired()); } if (a1.IsSingleton) { if (BasicOperations.Run(a2, a1.singleton)) { return(BasicAutomata.MakeEmpty()); } else { return(a1.CloneIfRequired()); } } return(Intersection(a1, a2.Complement())); }
private Automaton ToAutomaton(IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider) { IList <Automaton> list; Automaton a = null; switch (kind) { case Kind.REGEXP_UNION: list = new List <Automaton>(); FindLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider); FindLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider); a = BasicOperations.Union(list); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_CONCATENATION: list = new List <Automaton>(); FindLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); FindLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); a = BasicOperations.Concatenate(list); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_INTERSECTION: a = exp1.ToAutomaton(automata, automaton_provider).Intersection(exp2.ToAutomaton(automata, automaton_provider)); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_OPTIONAL: a = exp1.ToAutomaton(automata, automaton_provider).Optional(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT_MIN: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT_MINMAX: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min, max); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_COMPLEMENT: a = exp1.ToAutomaton(automata, automaton_provider).Complement(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_CHAR: a = BasicAutomata.MakeChar(c); break; case Kind.REGEXP_CHAR_RANGE: a = BasicAutomata.MakeCharRange(from, to); break; case Kind.REGEXP_ANYCHAR: a = BasicAutomata.MakeAnyChar(); break; case Kind.REGEXP_EMPTY: a = BasicAutomata.MakeEmpty(); break; case Kind.REGEXP_STRING: a = BasicAutomata.MakeString(s); break; case Kind.REGEXP_ANYSTRING: a = BasicAutomata.MakeAnyString(); break; case Kind.REGEXP_AUTOMATON: Automaton aa = null; if (automata != null) { aa = automata[s]; } if (aa == null && automaton_provider != null) { try { aa = automaton_provider.GetAutomaton(s); } catch (System.IO.IOException e) { throw new System.ArgumentException(e.ToString(), e); } } if (aa == null) { throw new System.ArgumentException("'" + s + "' not found"); } a = (Automaton)aa.Clone(); // always clone here (ignore allow_mutate) break; case Kind.REGEXP_INTERVAL: a = BasicAutomata.MakeInterval(min, max, digits); break; } return(a); }
public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify) { if (simplify) { // Test whether the automaton is a "simple" form and // if so, don't create a runAutomaton. Note that on a // large automaton these tests could be costly: if (BasicOperations.IsEmpty(automaton)) { // matches nothing Type = AUTOMATON_TYPE.NONE; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.IsTotal(automaton)) { // matches all possible strings Type = AUTOMATON_TYPE.ALL; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else { string commonPrefix; string singleton; if (automaton.Singleton == null) { commonPrefix = SpecialOperations.GetCommonPrefix(automaton); if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix))) { singleton = commonPrefix; } else { singleton = null; } } else { commonPrefix = null; singleton = automaton.Singleton; } if (singleton != null) { // matches a fixed string in singleton or expanded // representation Type = AUTOMATON_TYPE.SINGLE; Term = new BytesRef(singleton); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString()))) { // matches a constant prefix Type = AUTOMATON_TYPE.PREFIX; Term = new BytesRef(commonPrefix); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } } } Type = AUTOMATON_TYPE.NORMAL; Term = null; if (finite == null) { this.Finite = SpecialOperations.IsFinite(automaton); } else { this.Finite = finite; } Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton); if (this.Finite == true) { CommonSuffixRef = null; } else { CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8); } RunAutomaton = new ByteRunAutomaton(utf8, true); sortedTransitions = utf8.GetSortedTransitions(); }
/// <summary> /// See <see cref="BasicOperations.Determinize(Automaton)"/>. /// </summary> public virtual void Determinize() { BasicOperations.Determinize(this); }
/// <summary> /// See <see cref="BasicOperations.Union(ICollection{Automaton})"/>. /// </summary> public static Automaton Union(ICollection <Automaton> l) { return(BasicOperations.Union(l)); }
/// <summary> /// See <see cref="BasicOperations.Concatenate(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Concatenate(Automaton a) { return(BasicOperations.Concatenate(this, a)); }
/// <summary> /// See <see cref="BasicOperations.Minus(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Minus(Automaton a) { return(BasicOperations.Minus(this, a)); }
/// <summary> /// See <see cref="BasicOperations.Intersection(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Intersection(Automaton a) { return(BasicOperations.Intersection(this, a)); }
/// <summary> /// See <see cref="BasicOperations.Repeat(Automaton, int, int)"/>. /// </summary> public virtual Automaton Repeat(int min, int max) { return(BasicOperations.Repeat(this, min, max)); }
/// <summary> /// See <see cref="BasicOperations.Complement(Automaton)"/>. /// </summary> public virtual Automaton Complement() { return(BasicOperations.Complement(this)); }
/// <summary> /// See <see cref="BasicOperations.Repeat(Automaton)"/>. /// </summary> public virtual Automaton Repeat() { return(BasicOperations.Repeat(this)); }
/// <summary> /// See <see cref="BasicOperations.Optional(Automaton)"/>. /// </summary> public virtual Automaton Optional() { return(BasicOperations.Optional(this)); }
/// <summary> /// See <see cref="BasicOperations.Concatenate(IList{Automaton})"/>. /// </summary> public static Automaton Concatenate(IList <Automaton> l) { return(BasicOperations.Concatenate(l)); }
/// <summary> /// Returns an automaton that accepts the intersection of the languages of the /// given automata. Never modifies the input automata languages. /// <para/> /// Complexity: quadratic in number of states. /// </summary> public static Automaton Intersection(Automaton a1, Automaton a2) { if (a1.IsSingleton) { if (BasicOperations.Run(a2, a1.singleton)) { return(a1.CloneIfRequired()); } else { return(BasicAutomata.MakeEmpty()); } } if (a2.IsSingleton) { if (BasicOperations.Run(a1, a2.singleton)) { return(a2.CloneIfRequired()); } else { return(BasicAutomata.MakeEmpty()); } } if (a1 == a2) { return(a1.CloneIfRequired()); } Transition[][] transitions1 = a1.GetSortedTransitions(); Transition[][] transitions2 = a2.GetSortedTransitions(); Automaton c = new Automaton(); Queue <StatePair> worklist = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList Dictionary <StatePair, StatePair> newstates = new Dictionary <StatePair, StatePair>(); StatePair p = new StatePair(c.initial, a1.initial, a2.initial); worklist.Enqueue(p); newstates[p] = p; while (worklist.Count > 0) { p = worklist.Dequeue(); p.s.accept = p.s1.accept && p.s2.accept; Transition[] t1 = transitions1[p.s1.number]; Transition[] t2 = transitions2[p.s2.number]; for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++) { while (b2 < t2.Length && t2[b2].max < t1[n1].min) { b2++; } for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++) { if (t2[n2].max >= t1[n1].min) { StatePair q = new StatePair(t1[n1].to, t2[n2].to); if (!newstates.TryGetValue(q, out StatePair r) || r is null) { q.s = new State(); worklist.Enqueue(q); newstates[q] = q; r = q; } int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min; int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max; p.s.AddTransition(new Transition(min, max, r.s)); } } } } c.deterministic = a1.deterministic && a2.deterministic; c.RemoveDeadTransitions(); c.CheckMinimizeAlways(); return(c); }
/// <summary> /// See <see cref="BasicOperations.SubsetOf(Automaton, Automaton)"/>. /// </summary> public virtual bool SubsetOf(Automaton a) { return(BasicOperations.SubsetOf(this, a)); }
/// <summary> /// Returns true if the language of <paramref name="a1"/> is a subset of the language /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if /// not already marked as deterministic. /// <para/> /// Complexity: quadratic in number of states. /// </summary> public static bool SubsetOf(Automaton a1, Automaton a2) { if (a1 == a2) { return(true); } if (a1.IsSingleton) { if (a2.IsSingleton) { return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal)); } return(BasicOperations.Run(a2, a1.singleton)); } a2.Determinize(); Transition[][] transitions1 = a1.GetSortedTransitions(); Transition[][] transitions2 = a2.GetSortedTransitions(); Queue <StatePair> worklist = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList JCG.HashSet <StatePair> visited = new JCG.HashSet <StatePair>(); StatePair p = new StatePair(a1.initial, a2.initial); worklist.Enqueue(p); visited.Add(p); while (worklist.Count > 0) { p = worklist.Dequeue(); if (p.s1.accept && !p.s2.accept) { return(false); } Transition[] t1 = transitions1[p.s1.number]; Transition[] t2 = transitions2[p.s2.number]; for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++) { while (b2 < t2.Length && t2[b2].max < t1[n1].min) { b2++; } int min1 = t1[n1].min, max1 = t1[n1].max; for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++) { if (t2[n2].min > min1) { return(false); } if (t2[n2].max < Character.MaxCodePoint) { min1 = t2[n2].max + 1; } else { min1 = Character.MaxCodePoint; max1 = Character.MinCodePoint; } StatePair q = new StatePair(t1[n1].to, t2[n2].to); if (!visited.Contains(q)) { worklist.Enqueue(q); visited.Add(q); } } if (min1 <= max1) { return(false); } } } return(true); }
/// <summary> /// See <see cref="BasicOperations.Union(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Union(Automaton a) { return(BasicOperations.Union(this, a)); }
/// <summary> /// Returns an automaton that accepts the concatenation of the languages of the /// given automata. /// <para/> /// Complexity: linear in total number of states. /// </summary> public static Automaton Concatenate(IList <Automaton> l) { if (l.Count == 0) { return(BasicAutomata.MakeEmptyString()); } bool all_singleton = true; foreach (Automaton a in l) { if (!a.IsSingleton) { all_singleton = false; break; } } if (all_singleton) { StringBuilder b = new StringBuilder(); foreach (Automaton a in l) { b.Append(a.singleton); } return(BasicAutomata.MakeString(b.ToString())); } else { foreach (Automaton a in l) { if (BasicOperations.IsEmpty(a)) { return(BasicAutomata.MakeEmpty()); } } JCG.HashSet <int> ids = new JCG.HashSet <int>(); foreach (Automaton a in l) { ids.Add(a.GetHashCode()); } bool has_aliases = ids.Count != l.Count; Automaton b = l[0]; if (has_aliases) { b = b.CloneExpanded(); } else { b = b.CloneExpandedIfRequired(); } ISet <State> ac = b.GetAcceptStates(); bool first = true; foreach (Automaton a in l) { if (first) { first = false; } else { if (a.IsEmptyString) { continue; } Automaton aa = a; if (has_aliases) { aa = aa.CloneExpanded(); } else { aa = aa.CloneExpandedIfRequired(); } ISet <State> ns = aa.GetAcceptStates(); foreach (State s in ac) { s.accept = false; s.AddEpsilon(aa.initial); if (s.accept) { ns.Add(s); } } ac = ns; } } b.deterministic = false; //b.clearHashCode(); b.ClearNumberedStates(); b.CheckMinimizeAlways(); return(b); } }
/// <summary> /// Returns true if the language of <paramref name="a1"/> is a subset of the language /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if /// not already marked as deterministic. /// <para/> /// Complexity: quadratic in number of states. /// </summary> public static bool SubsetOf(Automaton a1, Automaton a2) { if (a1 == a2) { return(true); } if (a1.IsSingleton) { if (a2.IsSingleton) { return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal)); } return(BasicOperations.Run(a2, a1.singleton)); } a2.Determinize(); Transition[][] transitions1 = a1.GetSortedTransitions(); Transition[][] transitions2 = a2.GetSortedTransitions(); LinkedList <StatePair> worklist = new LinkedList <StatePair>(); HashSet <StatePair> visited = new HashSet <StatePair>(); StatePair p = new StatePair(a1.initial, a2.initial); worklist.AddLast(p); visited.Add(p); while (worklist.Count > 0) { p = worklist.First.Value; worklist.Remove(p); if (p.S1.accept && !p.S2.accept) { return(false); } Transition[] t1 = transitions1[p.S1.number]; Transition[] t2 = transitions2[p.S2.number]; for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++) { while (b2 < t2.Length && t2[b2].max < t1[n1].min) { b2++; } int min1 = t1[n1].min, max1 = t1[n1].max; for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++) { if (t2[n2].min > min1) { return(false); } if (t2[n2].max < Character.MAX_CODE_POINT) { min1 = t2[n2].max + 1; } else { min1 = Character.MAX_CODE_POINT; max1 = Character.MIN_CODE_POINT; } StatePair q = new StatePair(t1[n1].to, t2[n2].to); if (!visited.Contains(q)) { worklist.AddLast(q); visited.Add(q); } } if (min1 <= max1) { return(false); } } } return(true); }