/// <summary> /// Returns a new (deterministic) automaton that accepts a single character of the given value. /// </summary> /// <param name="c">The c.</param> /// <returns>A new (deterministic) automaton that accepts a single character of the given value.</returns> public static Automaton MakeChar(char c) { var a = new Automaton(); a.Singleton = c.ToString(); a.IsDeterministic = true; return a; }
/// <summary> /// Returns a new (deterministic) automaton that accepts all strings. /// </summary> /// <returns> /// A new (deterministic) automaton that accepts all strings. /// </returns> public static Automaton MakeAnyString() { var state = new State(); state.Accept = true; state.Transitions.Add(new Transition(char.MinValue, char.MaxValue, state)); var a = new Automaton(); a.Initial = state; a.IsDeterministic = true; return a; }
/// <summary> /// Initializes a new instance of the <see cref="Xeger"/> class. /// </summary> /// <param name="regex">The regex.</param> /// <param name="random">The random.</param> public Xeger(string regex, Random random) { if (string.IsNullOrEmpty(regex)) { throw new ArgumentNullException("regex"); } if (random == null) { throw new ArgumentNullException("random"); } this.automaton = new RegExp(regex).ToAutomaton(); this.random = random; }
/// <summary> /// Reverses the language of the given (non-singleton) automaton while returning the set of /// new initial states. /// </summary> /// <param name="a">The automaton.</param> /// <returns></returns> public static HashSet<State> Reverse(Automaton a) { // Reverse all edges. var m = new Dictionary<State, HashSet<Transition>>(); HashSet<State> states = a.GetStates(); HashSet<State> accept = a.GetAcceptStates(); foreach (State r in states) { m.Add(r, new HashSet<Transition>()); r.Accept = false; } foreach (State r in states) { foreach (Transition t in r.Transitions) { m[t.To].Add(new Transition(t.Min, t.Max, r)); } } foreach (State r in states) { r.Transitions = m[r].ToList(); } // Make new initial+final states. a.Initial.Accept = true; a.Initial = new State(); foreach (State r in accept) { a.Initial.AddEpsilon(r); // Ensures that all initial states are reachable. } a.IsDeterministic = false; return accept; }
/// <summary> /// Returns an automaton that accepts the intersection of the languages of the given automata. /// Never modifies the input automata languages. /// </summary> /// <param name="a1">The a1.</param> /// <param name="a2">The a2.</param> /// <returns></returns> public static Automaton Intersection(Automaton a1, Automaton a2) { if (a1.IsSingleton) { if (a2.Run(a1.Singleton)) { return(a1.CloneIfRequired()); } return(BasicAutomata.MakeEmpty()); } if (a2.IsSingleton) { if (a1.Run(a2.Singleton)) { return(a2.CloneIfRequired()); } return(BasicAutomata.MakeEmpty()); } if (a1 == a2) { return(a1.CloneIfRequired()); } Transition[][] transitions1 = Automaton.GetSortedTransitions(a1.GetStates()); Transition[][] transitions2 = Automaton.GetSortedTransitions(a2.GetStates()); var c = new Automaton(); var worklist = new LinkedList <StatePair>(); var newstates = new Dictionary <StatePair, StatePair>(); var p = new StatePair(c.Initial, a1.Initial, a2.Initial); worklist.AddLast(p); newstates.Add(p, p); while (worklist.Count > 0) { p = worklist.RemoveAndReturnFirst(); p.S.Accept = p.FirstState.Accept && p.SecondState.Accept; Transition[] t1 = transitions1[p.FirstState.Number]; Transition[] t2 = transitions2[p.SecondState.Number]; for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++) { while (b2 < t2.Length && t2[b2].Max < t1[n1].Min) { b2++; } for (int n2 = b2; n2 < t2.Length && t1[n1].Max >= t2[n2].Min; n2++) { if (t2[n2].Max >= t1[n1].Min) { var q = new StatePair(t1[n1].To, t2[n2].To); StatePair r; newstates.TryGetValue(q, out r); if (r == null) { q.S = new State(); worklist.AddLast(q); newstates.Add(q, q); r = q; } char min = t1[n1].Min > t2[n2].Min ? t1[n1].Min : t2[n2].Min; char max = t1[n1].Max < t2[n2].Max ? t1[n1].Max : t2[n2].Max; p.S.Transitions.Add(new Transition(min, max, r.S)); } } } } c.IsDeterministic = a1.IsDeterministic && a2.IsDeterministic; c.RemoveDeadTransitions(); c.CheckMinimizeAlways(); return(c); }
/// <summary> /// Returns true if the given string is accepted by the automaton. /// </summary> /// <param name="a">The automaton.</param> /// <param name="s">The string.</param> /// <returns></returns> /// <remarks> /// Complexity: linear in the length of the string. /// For full performance, use the RunAutomaton class. /// </remarks> public static bool Run(Automaton a, string s) { if (a.IsSingleton) { return(s.Equals(a.Singleton, System.StringComparison.CurrentCulture)); } if (a.IsDeterministic) { State p = a.Initial; foreach (char t in s) { State q = p.Step(t); if (q == null) { return(false); } p = q; } return(p.Accept); } HashSet <State> states = a.GetStates(); Automaton.SetStateNumbers(states); var pp = new LinkedList <State>(); var ppOther = new LinkedList <State>(); var bb = new BitArray(states.Count); var bbOther = new BitArray(states.Count); pp.AddLast(a.Initial); var dest = new List <State>(); bool accept = a.Initial.Accept; foreach (char c in s) { accept = false; ppOther.Clear(); bbOther.SetAll(false); foreach (State p in pp) { dest.Clear(); p.Step(c, dest); foreach (State q in dest) { if (q.Accept) { accept = true; } if (!bbOther.Get(q.Number)) { bbOther.Set(q.Number, true); ppOther.AddLast(q); } } } LinkedList <State> tp = pp; pp = ppOther; ppOther = tp; BitArray tb = bb; bb = bbOther; bbOther = tb; } return(accept); }
/// <summary> /// Returns an automaton with projected alphabet. The new automaton accepts all strings that /// are projections of strings accepted by the given automaton onto the given characters /// (represented by <code>Character</code>). If <code>null</code> is in the set, it abbreviates /// the intervals u0000-uDFFF and uF900-uFFFF (i.e., the non-private code points). It is assumed /// that all other characters from <code>chars</code> are in the interval uE000-uF8FF. /// </summary> /// <param name="a">The automaton.</param> /// <param name="chars">The chars.</param> /// <returns></returns> public static Automaton ProjectChars(Automaton a, HashSet<char> chars) { throw new NotImplementedException(); }
public static Automaton Concatenate(IList <Automaton> l) { if (l.Count == 0) { return(BasicAutomata.MakeEmptyString()); } bool allSingleton = l.All(a => a.IsSingleton); if (allSingleton) { var b = new StringBuilder(); foreach (Automaton a in l) { b.Append(a.Singleton); } return(BasicAutomata.MakeString(b.ToString())); } else { if (l.Any(a => a.IsEmpty)) { return(BasicAutomata.MakeEmpty()); } var ids = new HashSet <int>(); foreach (Automaton a in l) { ids.Add(RuntimeHelpers.GetHashCode(a)); } bool hasAliases = ids.Count != l.Count; Automaton b = l[0]; b = hasAliases ? b.CloneExpanded() : b.CloneExpandedIfRequired(); var ac = b.GetAcceptStates(); bool first = true; foreach (Automaton a in l) { if (first) { first = false; } else { if (a.IsEmptyString()) { continue; } Automaton aa = a; aa = hasAliases ? aa.CloneExpanded() : aa.CloneExpandedIfRequired(); HashSet <State> ns = aa.GetAcceptStates(); foreach (State s in ac) { s.Accept = false; s.AddEpsilon(aa.Initial); if (s.Accept) { ns.Add(s); } } ac = ns; } } b.IsDeterministic = false; b.ClearHashCode(); b.CheckMinimizeAlways(); return(b); } }
/// <summary> /// Determines whether the given automaton accepts the empty string and nothing else. /// </summary> /// <param name="a">The automaton.</param> /// <returns> /// <c>true</c> if the given automaton accepts the empty string and nothing else; otherwise, /// <c>false</c>. /// </returns> public static bool IsEmptyString(Automaton a) { if (a.IsSingleton) { return a.Singleton.Length == 0; } return a.Initial.Accept && a.Initial.Transitions.Count == 0; }
/// <summary> /// Accepts the Kleene star (zero or more concatenated repetitions) of the language of the /// given automaton. Never modifies the input automaton language. /// </summary> /// <param name="a">The automaton.</param> /// <returns> /// An automaton that accepts the Kleene star (zero or more concatenated repetitions) /// of the language of the given automaton. Never modifies the input automaton language. /// </returns> /// <remarks> /// Complexity: linear in number of states. /// </remarks> public static Automaton Repeat(Automaton a) { a = a.CloneExpanded(); var s = new State(); s.Accept = true; s.AddEpsilon(a.Initial); foreach (State p in a.GetAcceptStates()) { p.AddEpsilon(s); } a.Initial = s; a.IsDeterministic = false; a.ClearHashCode(); a.CheckMinimizeAlways(); return a; }
/// <summary> /// Constructs automaton that accepts the same strings as the given automaton but ignores upper/lower /// case of A-F. /// </summary> /// <param name="a">The automaton.</param> /// <returns>An automaton.</returns> public static Automaton HexCases(Automaton a) { throw new NotImplementedException(); }
public static Automaton Concatenate(Automaton a1, Automaton a2) { if (a1.IsSingleton && a2.IsSingleton) { return BasicAutomata.MakeString(a1.Singleton + a2.Singleton); } if (BasicOperations.IsEmpty(a1) || BasicOperations.IsEmpty(a2)) { return BasicAutomata.MakeEmpty(); } bool deterministic = a1.IsSingleton && a2.IsDeterministic; if (a1 == a2) { a1 = a1.CloneExpanded(); a2 = a2.CloneExpanded(); } else { a1 = a1.CloneExpandedIfRequired(); a2 = a2.CloneExpandedIfRequired(); } foreach (State s in a1.GetAcceptStates()) { s.Accept = false; s.AddEpsilon(a2.Initial); } a1.IsDeterministic = deterministic; a1.ClearHashCode(); a1.CheckMinimizeAlways(); return a1; }
/// <summary> /// Returns the longest string that is a prefix of all accepted strings and visits each state /// at most once. /// </summary> /// <param name="a">The automaton.</param> /// <returns> /// A common prefix. /// </returns> public static string GetCommonPrefix(Automaton a) { throw new NotImplementedException(); }
/// <summary> /// Prefix closes the given automaton. /// </summary> /// <param name="a">The automaton.</param> public static void PrefixClose(Automaton a) { throw new NotImplementedException(); }
/// <summary> /// Returns the set of accepted strings, assuming that at most <code>limit</code> strings are /// accepted. If more than <code>limit</code> strings are accepted, null is returned. If /// <code>limit</code><0, then this methods works like {@link #getFiniteStrings(Automaton)}. /// </summary> /// <param name="a">The automaton.</param> /// <param name="limit">The limit.</param> /// <returns></returns> public static HashSet<string> GetFiniteStrings(Automaton a, int limit) { throw new NotImplementedException(); }
/// <summary> /// Returns the set of accepted strings of the given length. /// </summary> /// <param name="a">The automaton.</param> /// <param name="length">The length.</param> /// <returns></returns> public static HashSet<string> GetStrings(Automaton a, int length) { throw new NotImplementedException(); }
/// <summary> /// Returns true if the language of this automaton is finite. /// </summary> /// <param name="a">The automaton.</param> /// <returns> /// <c>true</c> if the specified a is finite; otherwise, <c>false</c>. /// </returns> public static bool IsFinite(Automaton a) { throw new NotImplementedException(); }
public Automaton Concatenate(Automaton a) { return(BasicOperations.Concatenate(this, a)); }
/// <summary> /// Constructs automaton that accepts 0x20, 0x9, 0xa, and 0xd in place of each 0x20 transition /// in the given automaton. /// </summary> /// <param name="a">The automaton.</param> /// <returns>An automaton.</returns> public static Automaton ReplaceWhitespace(Automaton a) { throw new NotImplementedException(); }
/// <summary> /// Returns an automaton that accepts the union of the languages of the given automata. /// </summary> /// <param name="automatons">The l.</param> /// <returns> /// An automaton that accepts the union of the languages of the given automata. /// </returns> /// <remarks> /// Complexity: linear in number of states. /// </remarks> public static Automaton Union(IList<Automaton> automatons) { var ids = new HashSet<int>(); foreach (Automaton a in automatons) { ids.Add(RuntimeHelpers.GetHashCode(a)); } bool hasAliases = ids.Count != automatons.Count; var s = new State(); foreach (Automaton b in automatons) { if (b.IsEmpty) { continue; } Automaton bb = b; bb = hasAliases ? bb.CloneExpanded() : bb.CloneExpandedIfRequired(); s.AddEpsilon(bb.Initial); } var automaton = new Automaton(); automaton.Initial = s; automaton.IsDeterministic = false; automaton.ClearHashCode(); automaton.CheckMinimizeAlways(); return automaton; }
public static Automaton Minimize(Automaton a) { a.Minimize(); return a; }
/// <summary> /// Determinizes the given automaton using the given set of initial states. /// </summary> /// <param name="a">The automaton.</param> /// <param name="initialset">The initial states.</param> public static void Determinize(Automaton a, List<State> initialset) { char[] points = a.GetStartPoints(); var comparer = new ListEqualityComparer<State>(); // Subset construction. var sets = new Dictionary<List<State>, List<State>>(comparer); var worklist = new LinkedList<List<State>>(); var newstate = new Dictionary<List<State>, State>(comparer); sets.Add(initialset, initialset); worklist.AddLast(initialset); a.Initial = new State(); newstate.Add(initialset, a.Initial); while (worklist.Count > 0) { List<State> s = worklist.RemoveAndReturnFirst(); State r; newstate.TryGetValue(s, out r); foreach (State q in s) { if (q.Accept) { r.Accept = true; break; } } for (int n = 0; n < points.Length; n++) { var set = new HashSet<State>(); foreach (State c in s) foreach (Transition t in c.Transitions) if (t.Min <= points[n] && points[n] <= t.Max) set.Add(t.To); var p = set.ToList(); if (!sets.ContainsKey(p)) { sets.Add(p, p); worklist.AddLast(p); newstate.Add(p, new State()); } State q; newstate.TryGetValue(p, out q); char min = points[n]; char max; if (n + 1 < points.Length) { max = (char)(points[n + 1] - 1); } else { max = char.MaxValue; } r.Transitions.Add(new Transition(min, max, q)); } } a.IsDeterministic = true; a.RemoveDeadTransitions(); }
public static void MinimizeHopcroft(Automaton a) { a.Determinize(); IList<Transition> tr = a.Initial.Transitions; if (tr.Count == 1) { Transition t = tr[0]; if (t.To == a.Initial && t.Min == char.MinValue && t.Max == char.MaxValue) { return; } } a.Totalize(); // Make arrays for numbered states and effective alphabet. HashSet<State> ss = a.GetStates(); var states = new State[ss.Count]; int number = 0; foreach (State q in ss) { states[number] = q; q.Number = number++; } char[] sigma = a.GetStartPoints(); // Initialize data structures. var reverse = new List<List<LinkedList<State>>>(); foreach (State s in states) { var v = new List<LinkedList<State>>(); Initialize(ref v, sigma.Length); reverse.Add(v); } var reverseNonempty = new bool[states.Length, sigma.Length]; var partition = new List<LinkedList<State>>(); Initialize(ref partition, states.Length); var block = new int[states.Length]; var active = new StateList[states.Length, sigma.Length]; var active2 = new StateListNode[states.Length, sigma.Length]; var pending = new LinkedList<IntPair>(); var pending2 = new bool[sigma.Length, states.Length]; var split = new List<State>(); var split2 = new bool[states.Length]; var refine = new List<int>(); var refine2 = new bool[states.Length]; var splitblock = new List<List<State>>(); Initialize(ref splitblock, states.Length); for (int q = 0; q < states.Length; q++) { splitblock[q] = new List<State>(); partition[q] = new LinkedList<State>(); for (int x = 0; x < sigma.Length; x++) { reverse[q][x] = new LinkedList<State>(); active[q, x] = new StateList(); } } // Find initial partition and reverse edges. foreach (State qq in states) { int j = qq.Accept ? 0 : 1; partition[j].AddLast(qq); block[qq.Number] = j; for (int x = 0; x < sigma.Length; x++) { char y = sigma[x]; State p = qq.Step(y); reverse[p.Number][x].AddLast(qq); reverseNonempty[p.Number, x] = true; } } // Initialize active sets. for (int j = 0; j <= 1; j++) { for (int x = 0; x < sigma.Length; x++) { foreach (State qq in partition[j]) { if (reverseNonempty[qq.Number, x]) { active2[qq.Number, x] = active[j, x].Add(qq); } } } } // Initialize pending. for (int x = 0; x < sigma.Length; x++) { int a0 = active[0, x].Size; int a1 = active[1, x].Size; int j = a0 <= a1 ? 0 : 1; pending.AddLast(new IntPair(j, x)); pending2[x, j] = true; } // Process pending until fixed point. int k = 2; while (pending.Count > 0) { IntPair ip = pending.RemoveAndReturnFirst(); int p = ip.N1; int x = ip.N2; pending2[x, p] = false; // Find states that need to be split off their blocks. for (StateListNode m = active[p, x].First; m != null; m = m.Next) { foreach (State s in reverse[m.State.Number][x]) { if (!split2[s.Number]) { split2[s.Number] = true; split.Add(s); int j = block[s.Number]; splitblock[j].Add(s); if (!refine2[j]) { refine2[j] = true; refine.Add(j); } } } } // Refine blocks. foreach (int j in refine) { if (splitblock[j].Count < partition[j].Count) { LinkedList<State> b1 = partition[j]; LinkedList<State> b2 = partition[k]; foreach (State s in splitblock[j]) { b1.Remove(s); b2.AddLast(s); block[s.Number] = k; for (int c = 0; c < sigma.Length; c++) { StateListNode sn = active2[s.Number, c]; if (sn != null && sn.StateList == active[j, c]) { sn.Remove(); active2[s.Number, c] = active[k, c].Add(s); } } } // Update pending. for (int c = 0; c < sigma.Length; c++) { int aj = active[j, c].Size; int ak = active[k, c].Size; if (!pending2[c, j] && 0 < aj && aj <= ak) { pending2[c, j] = true; pending.AddLast(new IntPair(j, c)); } else { pending2[c, k] = true; pending.AddLast(new IntPair(k, c)); } } k++; } foreach (State s in splitblock[j]) { split2[s.Number] = false; } refine2[j] = false; splitblock[j].Clear(); } split.Clear(); refine.Clear(); } // Make a new state for each equivalence class, set initial state. var newstates = new State[k]; for (int n = 0; n < newstates.Length; n++) { var s = new State(); newstates[n] = s; foreach (State q in partition[n]) { if (q == a.Initial) { a.Initial = s; } s.Accept = q.Accept; s.Number = q.Number; // Select representative. q.Number = n; } } // Build transitions and set acceptance. foreach (State s in newstates) { s.Accept = states[s.Number].Accept; foreach (Transition t in states[s.Number].Transitions) { s.Transitions.Add(new Transition(t.Min, t.Max, newstates[t.To.Number])); } } a.RemoveDeadTransitions(); }
/// <summary> /// Adds epsilon transitions to the given automaton. This method adds extra character interval /// transitions that are equivalent to the given set of epsilon transitions. /// </summary> /// <param name="a">The automaton.</param> /// <param name="pairs">A collection of <see cref="StatePair"/> objects representing pairs of /// source/destination states where epsilon transitions should be added.</param> public static void AddEpsilons(Automaton a, ICollection<StatePair> pairs) { a.ExpandSingleton(); var forward = new Dictionary<State, HashSet<State>>(); var back = new Dictionary<State, HashSet<State>>(); foreach (StatePair p in pairs) { HashSet<State> to = forward[p.FirstState]; if (to == null) { to = new HashSet<State>(); forward.Add(p.FirstState, to); } to.Add(p.SecondState); HashSet<State> from = back[p.SecondState]; if (from == null) { from = new HashSet<State>(); back.Add(p.SecondState, from); } from.Add(p.FirstState); } var worklist = new LinkedList<StatePair>(pairs); var workset = new HashSet<StatePair>(pairs); while (worklist.Count != 0) { StatePair p = worklist.RemoveAndReturnFirst(); workset.Remove(p); HashSet<State> to = forward[p.SecondState]; HashSet<State> from = back[p.FirstState]; if (to != null) { foreach (State s in to) { var pp = new StatePair(p.FirstState, s); if (!pairs.Contains(pp)) { pairs.Add(pp); forward[p.FirstState].Add(s); back[s].Add(p.FirstState); worklist.AddLast(pp); workset.Add(pp); if (from != null) { foreach (State q in from) { var qq = new StatePair(q, p.FirstState); if (!workset.Contains(qq)) { worklist.AddLast(qq); workset.Add(qq); } } } } } } } // Add transitions. foreach (StatePair p in pairs) { p.FirstState.AddEpsilon(p.SecondState); } a.IsDeterministic = false; a.ClearHashCode(); a.CheckMinimizeAlways(); }
/// <summary> /// Constructs automaton that accept strings representing the given decimal number. /// Surrounding whitespace is permitted. /// </summary> /// <param name="value">The value string representation of decimal number.</param> /// <returns></returns> public static Automaton MakeDecimalValue(String value) { bool minus = false; int i = 0; while (i < value.Length) { char c = value[i]; if (c == '-') { minus = true; } if ((c >= '1' && c <= '9') || c == '.') { break; } i++; } var b1 = new StringBuilder(); var b2 = new StringBuilder(); int p = value.IndexOf('.', i); if (p == -1) { b1.Append(value.Substring(i)); } else { b1.Append(value.Substring(i, p - i)); i = value.Length - 1; while (i > p) { char c = value[i]; if (c >= '1' && c <= '9') { break; } i--; } b2.Append(value.Substring(p + 1, i + 1 - (p + 1))); } if (b1.Length == 0) { b1.Append("0"); } Automaton s = minus ? Automaton.MakeChar('-') : Automaton.MakeChar('+').Optional(); Automaton d; if (b2.Length == 0) { d = Automaton.MakeChar('.').Concatenate(Automaton.MakeChar('0').Repeat(1)).Optional(); } else { d = Automaton.MakeChar('.') .Concatenate(Automaton.MakeString(b2.ToString())) .Concatenate(Automaton.MakeChar('0') .Repeat()); } Automaton ws = Datatypes.WhitespaceAutomaton; return(Automaton.Minimize( ws.Concatenate( s.Concatenate(Automaton.MakeChar('0').Repeat()) .Concatenate(Automaton.MakeString(b1.ToString())) .Concatenate(d)) .Concatenate(ws))); }
/// <summary> /// Accepts between <code>min</code> and <code>max</code> (including both) concatenated /// repetitions of the language of the given automaton. /// </summary> /// <param name="a">The automaton.</param> /// <param name="min">The minimum concatenated repetitions of the language of the given /// automaton.</param> /// <param name="max">The maximum concatenated repetitions of the language of the given /// automaton.</param> /// <returns> /// Returns an automaton that accepts between <code>min</code> and <code>max</code> /// (including both) concatenated repetitions of the language of the given automaton. /// </returns> /// <remarks> /// Complexity: linear in number of states and in <code>min</code> and <code>max</code>. /// </remarks> public static Automaton Repeat(Automaton a, int min, int max) { if (min > max) { return BasicAutomata.MakeEmpty(); } max -= min; a.ExpandSingleton(); Automaton b; if (min == 0) { b = BasicAutomata.MakeEmptyString(); } else if (min == 1) { b = a.Clone(); } else { var @as = new List<Automaton>(); while (min-- > 0) { @as.Add(a); } b = BasicOperations.Concatenate(@as); } if (max > 0) { Automaton d = a.Clone(); while (--max > 0) { Automaton c = a.Clone(); foreach (State p in c.GetAcceptStates()) { p.AddEpsilon(d.Initial); } d = c; } foreach (State p in b.GetAcceptStates()) { p.AddEpsilon(d.Initial); } b.IsDeterministic = false; b.ClearHashCode(); b.CheckMinimizeAlways(); } return b; }
internal static Automaton Minimize(Automaton a) { a.Minimize(); return(a); }
public Automaton Intersection(Automaton a) { return BasicOperations.Intersection(this, a); }
/// <summary> /// Returns an automaton that accepts the overlap of strings that in more than one way can be /// split into a left part being accepted by <code>a1</code> and a right part being accepted /// by <code>a2</code>. /// </summary> /// <param name="a1">The a1.</param> /// <param name="a2">The a2.</param> /// <returns></returns> public static Automaton Overlap(Automaton a1, Automaton a2) { throw new NotImplementedException(); }
private Automaton ToAutomaton( IDictionary <string, Automaton> automata, IAutomatonProvider automatonProvider, bool minimize) { IList <Automaton> list; Automaton a = null; switch (this.Kind) { case Kind.RegexpUnion: list = new List <Automaton>(); this.FindLeaves(Expr1, Kind.RegexpUnion, list, automata, automatonProvider, minimize); this.FindLeaves(Expr2, Kind.RegexpUnion, list, automata, automatonProvider, minimize); a = BasicOperations.Union(list); a.Minimize(); break; case Kind.RegexpConcatenation: list = new List <Automaton>(); this.FindLeaves(Expr1, Kind.RegexpConcatenation, list, automata, automatonProvider, minimize); this.FindLeaves(Expr2, Kind.RegexpConcatenation, list, automata, automatonProvider, minimize); a = BasicOperations.Concatenate(list); a.Minimize(); break; case Kind.RegexpIntersection: a = Expr1.ToAutomaton(automata, automatonProvider, minimize) .Intersection(Expr2.ToAutomaton(automata, automatonProvider, minimize)); a.Minimize(); break; case Kind.RegexpOptional: a = Expr1.ToAutomaton(automata, automatonProvider, minimize).Optional(); a.Minimize(); break; case Kind.RegexpRepeat: a = Expr1.ToAutomaton(automata, automatonProvider, minimize).Repeat(); a.Minimize(); break; case Kind.RegexpRepeatMin: a = Expr1.ToAutomaton(automata, automatonProvider, minimize).Repeat(Min); a.Minimize(); break; case Kind.RegexpRepeatMinMax: a = Expr1.ToAutomaton(automata, automatonProvider, minimize).Repeat(Min, Max); a.Minimize(); break; case Kind.RegexpComplement: a = Expr1.ToAutomaton(automata, automatonProvider, minimize).Complement(); a.Minimize(); break; case Kind.RegexpChar: a = BasicAutomata.MakeChar(this.Char); break; case Kind.RegexpCharRange: a = BasicAutomata.MakeCharRange(FromChar, ToChar); break; case Kind.RegexpAnyChar: a = BasicAutomata.MakeAnyChar(); break; case Kind.RegexpEmpty: a = BasicAutomata.MakeEmpty(); break; case Kind.RegexpString: a = BasicAutomata.MakeString(SourceRegExpr); break; case Kind.RegexpAnyString: a = BasicAutomata.MakeAnyString(); break; case Kind.RegexpAutomaton: Automaton aa = null; if (automata != null) { automata.TryGetValue(SourceRegExpr, out aa); } if (aa == null && automatonProvider != null) { try { aa = automatonProvider.GetAutomaton(SourceRegExpr); } catch (IOException e) { throw new ArgumentException(string.Empty, e); } } if (aa == null) { throw new ArgumentException("'" + SourceRegExpr + "' not found"); } a = aa.Clone(); // Always clone here (ignore allowMutate). break; case Kind.RegexpInterval: a = BasicAutomata.MakeInterval(Min, Max, Digits); break; } return(a); }
private static void AcceptToAccept(Automaton a) { throw new NotImplementedException(); }
/// <summary> /// Constructs deterministic automaton that matches strings that contain the given substring. /// </summary> /// <param name="s">The s.</param> /// <returns></returns> public static Automaton MakeStringMatcher(String s) { var a = new Automaton(); var states = new State[s.Length + 1]; states[0] = a.Initial; for (int i = 0; i < s.Length; i++) { states[i + 1] = new State(); } State f = states[s.Length]; f.Accept = true; f.Transitions.Add(new Transition(Char.MinValue, Char.MaxValue, f)); for (int i = 0; i < s.Length; i++) { var done = new HashSet <char?>(); char c = s[i]; states[i].Transitions.Add(new Transition(c, states[i + 1])); done.Add(c); for (int j = i; j >= 1; j--) { char d = s[j - 1]; if (!done.Contains(d) && s.Substring(0, j - 1).Equals(s.Substring(i - j + 1, i - (i - j + 1)), StringComparison.CurrentCulture)) { states[i].Transitions.Add(new Transition(d, states[j])); done.Add(d); } } var da = new char[done.Count]; int h = 0; foreach (char w in done) { da[h++] = w; } Array.Sort(da); int from = Char.MinValue; int k = 0; while (from <= Char.MaxValue) { while (k < da.Length && da[k] == from) { k++; from++; } if (from <= Char.MaxValue) { int to = Char.MaxValue; if (k < da.Length) { to = da[k] - 1; k++; } states[i].Transitions.Add(new Transition((char)from, (char)to, states[0])); from = to + 2; } } } a.IsDeterministic = true; return(a); }
/// <summary> /// Returns an automaton that accepts the single chars that occur in strings that are accepted /// by the given automaton. Never modifies the input automaton. /// </summary> /// <param name="a">The automaton.</param> /// <returns></returns> public static Automaton SingleChars(Automaton a) { throw new NotImplementedException(); }
/// <summary> /// Determinizes the given automaton using the given set of initial states. /// </summary> /// <param name="a">The automaton.</param> /// <param name="initialset">The initial states.</param> public static void Determinize(Automaton a, List <State> initialset) { char[] points = a.GetStartPoints(); var comparer = new ListEqualityComparer <State>(); // Subset construction. var sets = new Dictionary <List <State>, List <State> >(comparer); var worklist = new LinkedList <List <State> >(); var newstate = new Dictionary <List <State>, State>(comparer); sets.Add(initialset, initialset); worklist.AddLast(initialset); a.Initial = new State(); newstate.Add(initialset, a.Initial); while (worklist.Count > 0) { List <State> s = worklist.RemoveAndReturnFirst(); State r; newstate.TryGetValue(s, out r); foreach (State q in s) { if (q.Accept) { r.Accept = true; break; } } for (int n = 0; n < points.Length; n++) { var set = new HashSet <State>(); foreach (State c in s) { foreach (Transition t in c.Transitions) { if (t.Min <= points[n] && points[n] <= t.Max) { set.Add(t.To); } } } var p = set.ToList(); if (!sets.ContainsKey(p)) { sets.Add(p, p); worklist.AddLast(p); newstate.Add(p, new State()); } State q; newstate.TryGetValue(p, out q); char min = points[n]; char max; if (n + 1 < points.Length) { max = (char)(points[n + 1] - 1); } else { max = char.MaxValue; } r.Transitions.Add(new Transition(min, max, q)); } } a.IsDeterministic = true; a.RemoveDeadTransitions(); }
/// <summary> /// Returns an automaton that accepts the compressed language of the given automaton. /// Whenever a <code>c</code> character is allowed in the original automaton, one or more /// <code>set</code> characters are allowed in the new automaton. /// </summary> /// <param name="a">The automaton.</param> /// <param name="set">The set of characters to be compressed.</param> /// <param name="c">The canonical compress character (assumed to be in <code>set</code>). /// </param> /// <returns></returns> public static Automaton Compress(Automaton a, string set, char c) { throw new NotImplementedException(); }
/// <summary> /// Adds epsilon transitions to the given automaton. This method adds extra character interval /// transitions that are equivalent to the given set of epsilon transitions. /// </summary> /// <param name="a">The automaton.</param> /// <param name="pairs">A collection of <see cref="StatePair"/> objects representing pairs of /// source/destination states where epsilon transitions should be added.</param> public static void AddEpsilons(Automaton a, ICollection <StatePair> pairs) { a.ExpandSingleton(); var forward = new Dictionary <State, HashSet <State> >(); var back = new Dictionary <State, HashSet <State> >(); foreach (StatePair p in pairs) { HashSet <State> to = forward[p.FirstState]; if (to == null) { to = new HashSet <State>(); forward.Add(p.FirstState, to); } to.Add(p.SecondState); HashSet <State> from = back[p.SecondState]; if (from == null) { from = new HashSet <State>(); back.Add(p.SecondState, from); } from.Add(p.FirstState); } var worklist = new LinkedList <StatePair>(pairs); var workset = new HashSet <StatePair>(pairs); while (worklist.Count != 0) { StatePair p = worklist.RemoveAndReturnFirst(); workset.Remove(p); HashSet <State> to = forward[p.SecondState]; HashSet <State> from = back[p.FirstState]; if (to != null) { foreach (State s in to) { var pp = new StatePair(p.FirstState, s); if (!pairs.Contains(pp)) { pairs.Add(pp); forward[p.FirstState].Add(s); back[s].Add(p.FirstState); worklist.AddLast(pp); workset.Add(pp); if (from != null) { foreach (State q in from) { var qq = new StatePair(q, p.FirstState); if (!workset.Contains(qq)) { worklist.AddLast(qq); workset.Add(qq); } } } } } } } // Add transitions. foreach (StatePair p in pairs) { p.FirstState.AddEpsilon(p.SecondState); } a.IsDeterministic = false; a.ClearHashCode(); a.CheckMinimizeAlways(); }
/// <summary> /// Returns an automaton where all transition labels have been substituted. /// <p> Each transition labeled <code>c</code> is changed to a set of transitions, one for /// each character in <code>map(c)</code>. If <code>map(c)</code> is null, then the /// transition is unchanged. /// </p> /// </summary> /// <param name="a">The automaton.</param> /// <param name="dictionary">The dictionary from characters to sets of characters (where /// characters are <code>char</code> objects).</param> /// <returns></returns> public static Automaton Subst(Automaton a, IDictionary <char, HashSet <char> > dictionary) { throw new NotImplementedException(); }
public static Automaton Minimize(Automaton a) { a.Minimize(); return(a); }
/// <summary> /// Returns an automaton where all transitions of the given char are replaced by a string. /// </summary> /// <param name="a">The automaton.</param> /// <param name="c">The c.</param> /// <param name="s">The s.</param> /// <returns> /// A new automaton. /// </returns> public static Automaton Subst(Automaton a, char c, string s) { throw new NotImplementedException(); }
public Automaton Intersection(Automaton a) { return(BasicOperations.Intersection(this, a)); }
/// <summary> /// Returns an automaton accepting the homomorphic image of the given automaton using the /// given function. /// <p> /// This method maps each transition label to a new value. /// <code>source</code> and <code>dest</code> are assumed to be arrays of same length, /// and <code>source</code> must be sorted in increasing order and contain no duplicates. /// <code>source</code> defines the starting points of char intervals, and the corresponding /// entries in <code>dest</code> define the starting points of corresponding new intervals. /// </p> /// </summary> /// <param name="a">The automaton.</param> /// <param name="source">The source.</param> /// <param name="dest">The dest.</param> /// <returns></returns> public static Automaton Homomorph(Automaton a, char[] source, char[] dest) { throw new NotImplementedException(); }
/// <summary> /// Returns a (deterministic) automaton that accepts the complement of the language of the /// given automaton. /// </summary> /// <param name="a">The automaton.</param> /// <returns>A (deterministic) automaton that accepts the complement of the language of the /// given automaton.</returns> /// <remarks> /// Complexity: linear in number of states (if already deterministic). /// </remarks> public static Automaton Complement(Automaton a) { a = a.CloneExpandedIfRequired(); a.Determinize(); a.Totalize(); foreach (State p in a.GetStates()) { p.Accept = !p.Accept; } a.RemoveDeadTransitions(); return a; }
/// <summary> /// Returns an automaton with projected alphabet. The new automaton accepts all strings that /// are projections of strings accepted by the given automaton onto the given characters /// (represented by <code>Character</code>). If <code>null</code> is in the set, it abbreviates /// the intervals u0000-uDFFF and uF900-uFFFF (i.e., the non-private code points). It is assumed /// that all other characters from <code>chars</code> are in the interval uE000-uF8FF. /// </summary> /// <param name="a">The automaton.</param> /// <param name="chars">The chars.</param> /// <returns></returns> public static Automaton ProjectChars(Automaton a, HashSet <char> chars) { throw new NotImplementedException(); }
/// <summary> /// Determinizes the specified automaton. /// </summary> /// <remarks> /// Complexity: exponential in number of states. /// </remarks> /// <param name="a">The automaton.</param> public static void Determinize(Automaton a) { if (a.IsDeterministic || a.IsSingleton) { return; } var initialset = new HashSet<State>(); initialset.Add(a.Initial); BasicOperations.Determinize(a, initialset.ToList()); }
/// <summary> /// Determines whether the given automaton accepts no strings. /// </summary> /// <param name="a">The automaton.</param> /// <returns> /// <c>true</c> if the given automaton accepts no strings; otherwise, <c>false</c>. /// </returns> public static bool IsEmpty(Automaton a) { if (a.IsSingleton) { return false; } return !a.Initial.Accept && a.Initial.Transitions.Count == 0; }
/// <summary> /// Returns the set of accepted strings of the given length. /// </summary> /// <param name="a">The automaton.</param> /// <param name="length">The length.</param> /// <returns></returns> public static HashSet <string> GetStrings(Automaton a, int length) { throw new NotImplementedException(); }
/// <summary> /// Returns an automaton that accepts the intersection of the languages of the given automata. /// Never modifies the input automata languages. /// </summary> /// <param name="a1">The a1.</param> /// <param name="a2">The a2.</param> /// <returns></returns> public static Automaton Intersection(Automaton a1, Automaton a2) { if (a1.IsSingleton) { if (a2.Run(a1.Singleton)) { return a1.CloneIfRequired(); } return BasicAutomata.MakeEmpty(); } if (a2.IsSingleton) { if (a1.Run(a2.Singleton)) { return a2.CloneIfRequired(); } return BasicAutomata.MakeEmpty(); } if (a1 == a2) { return a1.CloneIfRequired(); } Transition[][] transitions1 = Automaton.GetSortedTransitions(a1.GetStates()); Transition[][] transitions2 = Automaton.GetSortedTransitions(a2.GetStates()); var c = new Automaton(); var worklist = new LinkedList<StatePair>(); var newstates = new Dictionary<StatePair, StatePair>(); var p = new StatePair(c.Initial, a1.Initial, a2.Initial); worklist.AddLast(p); newstates.Add(p, p); while (worklist.Count > 0) { p = worklist.RemoveAndReturnFirst(); p.S.Accept = p.FirstState.Accept && p.SecondState.Accept; Transition[] t1 = transitions1[p.FirstState.Number]; Transition[] t2 = transitions2[p.SecondState.Number]; for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++) { while (b2 < t2.Length && t2[b2].Max < t1[n1].Min) { b2++; } for (int n2 = b2; n2 < t2.Length && t1[n1].Max >= t2[n2].Min; n2++) { if (t2[n2].Max >= t1[n1].Min) { var q = new StatePair(t1[n1].To, t2[n2].To); StatePair r; newstates.TryGetValue(q, out r); if (r == null) { q.S = new State(); worklist.AddLast(q); newstates.Add(q, q); r = q; } char min = t1[n1].Min > t2[n2].Min ? t1[n1].Min : t2[n2].Min; char max = t1[n1].Max < t2[n2].Max ? t1[n1].Max : t2[n2].Max; p.S.Transitions.Add(new Transition(min, max, r.S)); } } } } c.IsDeterministic = a1.IsDeterministic && a2.IsDeterministic; c.RemoveDeadTransitions(); c.CheckMinimizeAlways(); return c; }
/// <summary> /// Returns the set of accepted strings, assuming that at most <code>limit</code> strings are /// accepted. If more than <code>limit</code> strings are accepted, null is returned. If /// <code>limit</code><0, then this methods works like {@link #getFiniteStrings(Automaton)}. /// </summary> /// <param name="a">The automaton.</param> /// <param name="limit">The limit.</param> /// <returns></returns> public static HashSet <string> GetFiniteStrings(Automaton a, int limit) { throw new NotImplementedException(); }
/// <summary> /// Returns an automaton that accepts the union of the empty string and the language of the /// given automaton. /// </summary> /// <param name="a">The automaton.</param> /// <remarks> /// Complexity: linear in number of states. /// </remarks> /// <returns>An automaton that accepts the union of the empty string and the language of the /// given automaton.</returns> public static Automaton Optional(Automaton a) { a = a.CloneExpandedIfRequired(); var s = new State(); s.AddEpsilon(a.Initial); s.Accept = true; a.Initial = s; a.IsDeterministic = false; a.ClearHashCode(); a.CheckMinimizeAlways(); return a; }
/// <summary> /// Accepts <code>min</code> or more concatenated repetitions of the language of the given /// automaton. /// </summary> /// <param name="a">The automaton.</param> /// <param name="min">The minimum concatenated repetitions of the language of the given /// automaton.</param> /// <returns>Returns an automaton that accepts <code>min</code> or more concatenated /// repetitions of the language of the given automaton. /// </returns> /// <remarks> /// Complexity: linear in number of states and in <code>min</code>. /// </remarks> public static Automaton Repeat(Automaton a, int min) { if (min == 0) { return BasicOperations.Repeat(a); } var @as = new List<Automaton>(); while (min-- > 0) { @as.Add(a); } @as.Add(BasicOperations.Repeat(a)); return BasicOperations.Concatenate(@as); }
/// <summary> /// Returns true if the given string is accepted by the automaton. /// </summary> /// <param name="a">The automaton.</param> /// <param name="s">The string.</param> /// <returns></returns> /// <remarks> /// Complexity: linear in the length of the string. /// For full performance, use the RunAutomaton class. /// </remarks> public static bool Run(Automaton a, string s) { if (a.IsSingleton) { return s.Equals(a.IsSingleton); } if (a.IsDeterministic) { State p = a.Initial; foreach (char t in s) { State q = p.Step(t); if (q == null) { return false; } p = q; } return p.Accept; } HashSet<State> states = a.GetStates(); Automaton.SetStateNumbers(states); var pp = new LinkedList<State>(); var ppOther = new LinkedList<State>(); var bb = new BitArray(states.Count); var bbOther = new BitArray(states.Count); pp.AddLast(a.Initial); var dest = new List<State>(); bool accept = a.Initial.Accept; foreach (char c in s) { accept = false; ppOther.Clear(); bbOther.SetAll(false); foreach (State p in pp) { dest.Clear(); p.Step(c, dest); foreach (State q in dest) { if (q.Accept) { accept = true; } if (!bbOther.Get(q.Number)) { bbOther.Set(q.Number, true); ppOther.AddLast(q); } } } LinkedList<State> tp = pp; pp = ppOther; ppOther = tp; BitArray tb = bb; bb = bbOther; bbOther = tb; } return accept; }
public Automaton Concatenate(Automaton a) { return BasicOperations.Concatenate(this, a); }
/// <summary> /// Minimizes the given automaton using Huffman's algorithm. /// </summary> /// <param name="a">The automaton.</param> public static void MinimizeHuffman(Automaton a) { a.Determinize(); a.Totalize(); HashSet<State> ss = a.GetStates(); var transitions = new Transition[ss.Count][]; State[] states = ss.ToArray(); var mark = new List<List<bool>>(); var triggers = new List<List<HashSet<IntPair>>>(); foreach (State t in states) { var v = new List<HashSet<IntPair>>(); Initialize(ref v, states.Length); triggers.Add(v); } // Initialize marks based on acceptance status and find transition arrays. for (int n1 = 0; n1 < states.Length; n1++) { states[n1].Number = n1; transitions[n1] = states[n1].GetSortedTransitions(false).ToArray(); for (int n2 = n1 + 1; n2 < states.Length; n2++) { if (states[n1].Accept != states[n2].Accept) { mark[n1][n2] = true; } } } // For all pairs, see if states agree. for (int n1 = 0; n1 < states.Length; n1++) { for (int n2 = n1 + 1; n2 < states.Length; n2++) { if (!mark[n1][n2]) { if (MinimizationOperations.StatesAgree(transitions, mark, n1, n2)) { MinimizationOperations.AddTriggers(transitions, triggers, n1, n2); } else { MinimizationOperations.MarkPair(mark, triggers, n1, n2); } } } } // Assign equivalence class numbers to states. int numclasses = 0; foreach (State t in states) { t.Number = -1; } for (int n1 = 0; n1 < states.Length; n1++) { if (states[n1].Number == -1) { states[n1].Number = numclasses; for (int n2 = n1 + 1; n2 < states.Length; n2++) { if (!mark[n1][n2]) { states[n2].Number = numclasses; } } numclasses++; } } // Make a new state for each equivalence class. var newstates = new State[numclasses]; for (int n = 0; n < numclasses; n++) { newstates[n] = new State(); } // Select a class representative for each class and find the new initial state. for (int n = 0; n < states.Length; n++) { newstates[states[n].Number].Number = n; if (states[n] == a.Initial) { a.Initial = newstates[states[n].Number]; } } // Build transitions and set acceptance. for (int n = 0; n < numclasses; n++) { State s = newstates[n]; s.Accept = states[s.Number].Accept; foreach (Transition t in states[s.Number].Transitions) { s.Transitions.Add(new Transition(t.Min, t.Max, newstates[t.To.Number])); } } a.RemoveDeadTransitions(); }