Пример #1
0
        /// <summary>
        /// Returns an automaton that accepts the union of the languages of the given
        /// automata.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Union(Automaton a1, Automaton a2)
        {
            if ((a1.IsSingleton && a2.IsSingleton && a1.singleton.Equals(a2.singleton, StringComparison.Ordinal)) || a1 == a2)
            {
                return(a1.CloneIfRequired());
            }
            if (a1 == a2)
            {
                a1 = a1.CloneExpanded();
                a2 = a2.CloneExpanded();
            }
            else
            {
                a1 = a1.CloneExpandedIfRequired();
                a2 = a2.CloneExpandedIfRequired();
            }
            State s = new State();

            s.AddEpsilon(a1.initial);
            s.AddEpsilon(a2.initial);
            a1.initial       = s;
            a1.deterministic = false;
            //a1.clearHashCode();
            a1.ClearNumberedStates();
            a1.CheckMinimizeAlways();
            return(a1);
        }
Пример #2
0
        /// <summary>
        /// Returns a clone of this automaton.
        /// </summary>
        public virtual object Clone()
        {
            Automaton a = (Automaton)base.MemberwiseClone();

            if (!IsSingleton)
            {
                Dictionary <State, State> m = new Dictionary <State, State>();
                State[] states = GetNumberedStates();
                foreach (State s in states)
                {
                    m[s] = new State();
                }
                foreach (State s in states)
                {
                    State p = m[s];
                    p.accept = s.accept;
                    if (s == initial)
                    {
                        a.initial = p;
                    }
                    foreach (Transition t in s.GetTransitions())
                    {
                        p.AddTransition(new Transition(t.min, t.max, m[t.to]));
                    }
                }
            }
            a.ClearNumberedStates();
            return(a);
        }
Пример #3
0
        /// <summary>
        /// Returns an automaton that accepts the union of the empty string and the
        /// language of the given automaton.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Optional(Automaton a)
        {
            a = a.CloneExpandedIfRequired();
            State s = new State();

            s.AddEpsilon(a.initial);
            s.accept        = true;
            a.initial       = s;
            a.deterministic = false;
            //a.clearHashCode();
            a.ClearNumberedStates();
            a.CheckMinimizeAlways();
            return(a);
        }
Пример #4
0
        /// <summary>
        /// Returns an automaton that accepts the Kleene star (zero or more
        /// concatenated repetitions) of the language of the given automaton. Never
        /// modifies the input automaton language.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Repeat(Automaton a)
        {
            a = a.CloneExpanded();
            State s = new State();

            s.accept = true;
            s.AddEpsilon(a.initial);
            foreach (State p in a.GetAcceptStates())
            {
                p.AddEpsilon(s);
            }
            a.initial       = s;
            a.deterministic = false;
            //a.clearHashCode();
            a.ClearNumberedStates();
            a.CheckMinimizeAlways();
            return(a);
        }
Пример #5
0
        /// <summary>
        /// Reverses the language of the given (non-singleton) automaton while returning
        /// the set of new initial states.
        /// </summary>
        public static ISet <State> Reverse(Automaton a)
        {
            a.ExpandSingleton();
            // reverse all edges
            Dictionary <State, ISet <Transition> > m = new Dictionary <State, ISet <Transition> >();

            State[]      states = a.GetNumberedStates();
            ISet <State> accept = new JCG.HashSet <State>();

            foreach (State s in states)
            {
                if (s.Accept)
                {
                    accept.Add(s);
                }
            }
            foreach (State r in states)
            {
                m[r]     = new JCG.HashSet <Transition>();
                r.accept = false;
            }
            foreach (State r in states)
            {
                foreach (Transition t in r.GetTransitions())
                {
                    m[t.to].Add(new Transition(t.min, t.max, r));
                }
            }
            foreach (State r in states)
            {
                ISet <Transition> tr = m[r];
                r.SetTransitions(tr.ToArray(/*new Transition[tr.Count]*/));
            }
            // make new initial+final states
            a.initial.accept = true;
            a.initial        = new State();
            foreach (State r in accept)
            {
                a.initial.AddEpsilon(r); // ensures that all initial states are reachable
            }
            a.deterministic = false;
            a.ClearNumberedStates();
            return(accept);
        }
Пример #6
0
        /// <summary>
        /// Returns an automaton that accepts the union of the languages of the given
        /// automata.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Union(ICollection <Automaton> l)
        {
            JCG.HashSet <int> ids = new JCG.HashSet <int>();
            foreach (Automaton a in l)
            {
                ids.Add(a.GetHashCode());
            }
            bool  has_aliases = ids.Count != l.Count;
            State s           = new State();

            foreach (Automaton b in l)
            {
                if (BasicOperations.IsEmpty(b))
                {
                    continue;
                }
                Automaton bb = b;
                if (has_aliases)
                {
                    bb = bb.CloneExpanded();
                }
                else
                {
                    bb = bb.CloneExpandedIfRequired();
                }
                s.AddEpsilon(bb.initial);
            }
            Automaton a_ = new Automaton
            {
                initial       = s,
                deterministic = false
            };

            //a.clearHashCode();
            a_.ClearNumberedStates();
            a_.CheckMinimizeAlways();
            return(a_);
        }
Пример #7
0
        /// <summary>
        /// Returns an automaton that accepts the concatenation of the languages of the
        /// given automata.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Concatenate(Automaton a1, Automaton a2)
        {
            if (a1.IsSingleton && a2.IsSingleton)
            {
                return(BasicAutomata.MakeString(a1.singleton + a2.singleton));
            }
            if (IsEmpty(a1) || IsEmpty(a2))
            {
                return(BasicAutomata.MakeEmpty());
            }
            // adding epsilon transitions with the NFA concatenation algorithm
            // in this case always produces a resulting DFA, preventing expensive
            // redundant determinize() calls for this common case.
            bool deterministic = a1.IsSingleton && a2.IsDeterministic;

            if (a1 == a2)
            {
                a1 = a1.CloneExpanded();
                a2 = a2.CloneExpanded();
            }
            else
            {
                a1 = a1.CloneExpandedIfRequired();
                a2 = a2.CloneExpandedIfRequired();
            }
            foreach (State s in a1.GetAcceptStates())
            {
                s.accept = false;
                s.AddEpsilon(a2.initial);
            }
            a1.deterministic = deterministic;
            //a1.clearHashCode();
            a1.ClearNumberedStates();
            a1.CheckMinimizeAlways();
            return(a1);
        }
Пример #8
0
        /// <summary>
        /// Minimizes the given automaton using Hopcroft's algorithm.
        /// </summary>
        public static void MinimizeHopcroft(Automaton a)
        {
            a.Determinize();
            if (a.initial.numTransitions == 1)
            {
                Transition t = a.initial.TransitionsArray[0];
                if (t.to == a.initial && t.min == Character.MinCodePoint && t.max == Character.MaxCodePoint)
                {
                    return;
                }
            }
            a.Totalize();

            // initialize data structures
            int[]   sigma = a.GetStartPoints();
            State[] states = a.GetNumberedStates();
            int     sigmaLen = sigma.Length, statesLen = states.Length;

            JCG.List <State>[,] reverse = new JCG.List <State> [statesLen, sigmaLen];
            ISet <State>[]     partition  = new JCG.HashSet <State> [statesLen];
            JCG.List <State>[] splitblock = new JCG.List <State> [statesLen];
            int[] block = new int[statesLen];
            StateList[,] active      = new StateList[statesLen, sigmaLen];
            StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen];
            Queue <Int32Pair> pending = new Queue <Int32Pair>(); // LUCENENET specific - Queue is much more performant than LinkedList
            OpenBitSet        pending2 = new OpenBitSet(sigmaLen * statesLen);
            OpenBitSet        split = new OpenBitSet(statesLen),
                              refine = new OpenBitSet(statesLen), refine2 = new OpenBitSet(statesLen);

            for (int q = 0; q < statesLen; q++)
            {
                splitblock[q] = new JCG.List <State>();
                partition[q]  = new JCG.HashSet <State>();
                for (int x = 0; x < sigmaLen; x++)
                {
                    active[q, x] = new StateList();
                }
            }
            // find initial partition and reverse edges
            for (int q = 0; q < statesLen; q++)
            {
                State qq = states[q];
                int   j  = qq.accept ? 0 : 1;
                partition[j].Add(qq);
                block[q] = j;
                for (int x = 0; x < sigmaLen; x++)
                {
                    //JCG.List<State>[] r = reverse[qq.Step(sigma[x]).number];
                    var r = qq.Step(sigma[x]).number;
                    if (reverse[r, x] == null)
                    {
                        reverse[r, x] = new JCG.List <State>();
                    }
                    reverse[r, x].Add(qq);
                }
            }
            // initialize active sets
            for (int j = 0; j <= 1; j++)
            {
                for (int x = 0; x < sigmaLen; x++)
                {
                    foreach (State qq in partition[j])
                    {
                        if (reverse[qq.number, x] != null)
                        {
                            active2[qq.number, x] = active[j, x].Add(qq);
                        }
                    }
                }
            }
            // initialize pending
            for (int x = 0; x < sigmaLen; x++)
            {
                int j = (active[0, x].Count <= active[1, x].Count) ? 0 : 1;
                pending.Enqueue(new Int32Pair(j, x));
                pending2.Set(x * statesLen + j);
            }
            // process pending until fixed point
            int k = 2;

            while (pending.Count > 0)
            {
                Int32Pair ip = pending.Dequeue();
                int       p  = ip.n1;
                int       x  = ip.n2;
                pending2.Clear(x * statesLen + p);
                // find states that need to be split off their blocks
                for (StateListNode m = active[p, x].First; m != null; m = m.Next)
                {
                    JCG.List <State> r = reverse[m.Q.number, x];
                    if (r != null)
                    {
                        foreach (State s in r)
                        {
                            int i = s.number;
                            if (!split.Get(i))
                            {
                                split.Set(i);
                                int j = block[i];
                                splitblock[j].Add(s);
                                if (!refine2.Get(j))
                                {
                                    refine2.Set(j);
                                    refine.Set(j);
                                }
                            }
                        }
                    }
                }
                // refine blocks
                for (int j = refine.NextSetBit(0); j >= 0; j = refine.NextSetBit(j + 1))
                {
                    JCG.List <State> sb = splitblock[j];
                    if (sb.Count < partition[j].Count)
                    {
                        ISet <State> b1 = partition[j];
                        ISet <State> b2 = partition[k];
                        foreach (State s in sb)
                        {
                            b1.Remove(s);
                            b2.Add(s);
                            block[s.number] = k;
                            for (int c = 0; c < sigmaLen; c++)
                            {
                                StateListNode sn = active2[s.number, c];
                                if (sn != null && sn.Sl == active[j, c])
                                {
                                    sn.Remove();
                                    active2[s.number, c] = active[k, c].Add(s);
                                }
                            }
                        }
                        // update pending
                        for (int c = 0; c < sigmaLen; c++)
                        {
                            int aj = active[j, c].Count, ak = active[k, c].Count, ofs = c * statesLen;
                            if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak)
                            {
                                pending2.Set(ofs + j);
                                pending.Enqueue(new Int32Pair(j, c));
                            }
                            else
                            {
                                pending2.Set(ofs + k);
                                pending.Enqueue(new Int32Pair(k, c));
                            }
                        }
                        k++;
                    }
                    refine2.Clear(j);
                    foreach (State s in sb)
                    {
                        split.Clear(s.number);
                    }
                    sb.Clear();
                }
                refine.Clear(0, refine.Length);
            }
            // make a new state for each equivalence class, set initial state
            State[] newstates = new State[k];
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = new State();
                newstates[n] = s;
                foreach (State q in partition[n])
                {
                    if (q == a.initial)
                    {
                        a.initial = s;
                    }
                    s.accept = q.accept;
                    s.number = q.number; // select representative
                    q.number = n;
                }
            }
            // build transitions and set acceptance
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = newstates[n];
                s.accept = states[s.number].accept;
                foreach (Transition t in states[s.number].GetTransitions())
                {
                    s.AddTransition(new Transition(t.min, t.max, newstates[t.to.number]));
                }
            }
            a.ClearNumberedStates();
            a.RemoveDeadTransitions();
        }
Пример #9
0
        /// <summary>
        /// Returns an automaton that accepts the concatenation of the languages of the
        /// given automata.
        /// <para/>
        /// Complexity: linear in total number of states.
        /// </summary>
        public static Automaton Concatenate(IList <Automaton> l)
        {
            if (l.Count == 0)
            {
                return(BasicAutomata.MakeEmptyString());
            }
            bool all_singleton = true;

            foreach (Automaton a in l)
            {
                if (!a.IsSingleton)
                {
                    all_singleton = false;
                    break;
                }
            }
            if (all_singleton)
            {
                StringBuilder b = new StringBuilder();
                foreach (Automaton a in l)
                {
                    b.Append(a.singleton);
                }
                return(BasicAutomata.MakeString(b.ToString()));
            }
            else
            {
                foreach (Automaton a in l)
                {
                    if (BasicOperations.IsEmpty(a))
                    {
                        return(BasicAutomata.MakeEmpty());
                    }
                }
                JCG.HashSet <int> ids = new JCG.HashSet <int>();
                foreach (Automaton a in l)
                {
                    ids.Add(a.GetHashCode());
                }
                bool      has_aliases = ids.Count != l.Count;
                Automaton b           = l[0];
                if (has_aliases)
                {
                    b = b.CloneExpanded();
                }
                else
                {
                    b = b.CloneExpandedIfRequired();
                }
                ISet <State> ac    = b.GetAcceptStates();
                bool         first = true;
                foreach (Automaton a in l)
                {
                    if (first)
                    {
                        first = false;
                    }
                    else
                    {
                        if (a.IsEmptyString)
                        {
                            continue;
                        }
                        Automaton aa = a;
                        if (has_aliases)
                        {
                            aa = aa.CloneExpanded();
                        }
                        else
                        {
                            aa = aa.CloneExpandedIfRequired();
                        }
                        ISet <State> ns = aa.GetAcceptStates();
                        foreach (State s in ac)
                        {
                            s.accept = false;
                            s.AddEpsilon(aa.initial);
                            if (s.accept)
                            {
                                ns.Add(s);
                            }
                        }
                        ac = ns;
                    }
                }
                b.deterministic = false;
                //b.clearHashCode();
                b.ClearNumberedStates();
                b.CheckMinimizeAlways();
                return(b);
            }
        }
Пример #10
0
        /// <summary>
        /// Adds epsilon transitions to the given automaton. This method adds extra
        /// character interval transitions that are equivalent to the given set of
        /// epsilon transitions.
        /// </summary>
        /// <param name="a"> Automaton. </param>
        /// <param name="pairs"> Collection of <see cref="StatePair"/> objects representing pairs of
        ///          source/destination states where epsilon transitions should be
        ///          added. </param>
        public static void AddEpsilons(Automaton a, ICollection <StatePair> pairs)
        {
            a.ExpandSingleton();
            Dictionary <State, JCG.HashSet <State> > forward = new Dictionary <State, JCG.HashSet <State> >();
            Dictionary <State, JCG.HashSet <State> > back    = new Dictionary <State, JCG.HashSet <State> >();

            foreach (StatePair p in pairs)
            {
                if (!forward.TryGetValue(p.s1, out JCG.HashSet <State> to))
                {
                    to            = new JCG.HashSet <State>();
                    forward[p.s1] = to;
                }
                to.Add(p.s2);
                if (!back.TryGetValue(p.s2, out JCG.HashSet <State> from))
                {
                    from       = new JCG.HashSet <State>();
                    back[p.s2] = from;
                }
                from.Add(p.s1);
            }
            // calculate epsilon closure
            LinkedList <StatePair> worklist = new LinkedList <StatePair>(pairs);

            JCG.HashSet <StatePair> workset = new JCG.HashSet <StatePair>(pairs);
            while (worklist.Count > 0)
            {
                StatePair p = worklist.First.Value;
                worklist.Remove(p);
                workset.Remove(p);
#pragma warning disable IDE0018 // Inline variable declaration
                JCG.HashSet <State> from;
#pragma warning restore IDE0018 // Inline variable declaration
                if (forward.TryGetValue(p.s2, out JCG.HashSet <State> to))
                {
                    foreach (State s in to)
                    {
                        StatePair pp = new StatePair(p.s1, s);
                        if (!pairs.Contains(pp))
                        {
                            pairs.Add(pp);
                            forward[p.s1].Add(s);
                            back[s].Add(p.s1);
                            worklist.AddLast(pp);
                            workset.Add(pp);
                            if (back.TryGetValue(p.s1, out from))
                            {
                                foreach (State q in from)
                                {
                                    StatePair qq = new StatePair(q, p.s1);
                                    if (!workset.Contains(qq))
                                    {
                                        worklist.AddLast(qq);
                                        workset.Add(qq);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // add transitions
            foreach (StatePair p in pairs)
            {
                p.s1.AddEpsilon(p.s2);
            }
            a.deterministic = false;
            //a.clearHashCode();
            a.ClearNumberedStates();
            a.CheckMinimizeAlways();
        }