Пример #1
0
        public RandomAcceptedStrings(Automaton a)
        {
            this.a = a;
            if (a.IsSingleton)
            {
                leadsToAccept = null;
                return;
            }

            // must use IdentityHashmap because two Transitions w/
            // different start nodes can be considered the same
            leadsToAccept = new JCG.Dictionary <Transition, bool?>(IdentityEqualityComparer <Transition> .Default);
            IDictionary <State, IList <ArrivingTransition> > allArriving = new Dictionary <State, IList <ArrivingTransition> >();

            Queue <State> q    = new Queue <State>();
            ISet <State>  seen = new JCG.HashSet <State>();

            // reverse map the transitions, so we can quickly look
            // up all arriving transitions to a given state
            foreach (State s in a.GetNumberedStates())
            {
                for (int i = 0; i < s.numTransitions; i++)
                {
                    Transition t = s.TransitionsArray[i];
                    if (!allArriving.TryGetValue(t.to, out IList <ArrivingTransition> tl) || tl == null)
                    {
                        tl = new List <ArrivingTransition>();
                        allArriving[t.to] = tl;
                    }
                    tl.Add(new ArrivingTransition(s, t));
                }
                if (s.Accept)
                {
                    q.Enqueue(s);
                    seen.Add(s);
                }
            }

            // Breadth-first search, from accept states,
            // backwards:
            while (q.Count > 0)
            {
                State s = q.Dequeue();
                if (allArriving.TryGetValue(s, out IList <ArrivingTransition> arriving) && arriving != null)
                {
                    foreach (ArrivingTransition at in arriving)
                    {
                        State from = at.from;
                        if (!seen.Contains(from))
                        {
                            q.Enqueue(from);
                            seen.Add(from);
                            leadsToAccept[at.t] = true;
                        }
                    }
                }
            }
        }
Пример #2
0
        /// <summary>
        /// Converts an incoming utf32 <see cref="Automaton"/> to an equivalent
        /// utf8 one.  The incoming automaton need not be
        /// deterministic.  Note that the returned automaton will
        /// not in general be deterministic, so you must
        /// determinize it if that's needed.
        /// </summary>
        public Automaton Convert(Automaton utf32)
        {
            if (utf32.IsSingleton)
            {
                utf32 = utf32.CloneExpanded();
            }

            State[]          map        = new State[utf32.GetNumberedStates().Length];
            JCG.List <State> pending    = new JCG.List <State>();
            State            utf32State = utf32.GetInitialState();

            pending.Add(utf32State);
            Automaton utf8 = new Automaton();

            utf8.IsDeterministic = false;

            State utf8State = utf8.GetInitialState();

            utf8States                 = new State[5];
            utf8StateCount             = 0;
            utf8State.number           = utf8StateCount;
            utf8States[utf8StateCount] = utf8State;
            utf8StateCount++;

            utf8State.Accept = utf32State.Accept;

            map[utf32State.number] = utf8State;

            while (pending.Count != 0)
            {
                utf32State = pending[pending.Count - 1];
                pending.RemoveAt(pending.Count - 1);
                utf8State = map[utf32State.number];
                for (int i = 0; i < utf32State.numTransitions; i++)
                {
                    Transition t         = utf32State.TransitionsArray[i];
                    State      destUTF32 = t.to;
                    State      destUTF8  = map[destUTF32.number];
                    if (destUTF8 == null)
                    {
                        destUTF8              = NewUTF8State();
                        destUTF8.accept       = destUTF32.accept;
                        map[destUTF32.number] = destUTF8;
                        pending.Add(destUTF32);
                    }
                    ConvertOneEdge(utf8State, destUTF8, t.min, t.max);
                }
            }

            utf8.SetNumberedStates(utf8States, utf8StateCount);

            return(utf8);
        }
Пример #3
0
 /// <summary>
 /// Returns a (deterministic) automaton that accepts the complement of the
 /// language of the given automaton.
 /// <para/>
 /// Complexity: linear in number of states (if already deterministic).
 /// </summary>
 public static Automaton Complement(Automaton a)
 {
     a = a.CloneExpandedIfRequired();
     a.Determinize();
     a.Totalize();
     foreach (State p in a.GetNumberedStates())
     {
         p.accept = !p.accept;
     }
     a.RemoveDeadTransitions();
     return(a);
 }
Пример #4
0
        /// <summary>
        /// Constructs a new <code>RunAutomaton</code> from a deterministic
        /// <code>Automaton</code>.
        /// </summary>
        /// <param name="a"> an automaton </param>
        /// <param name="maxInterval"></param>
        /// <param name="tableize"></param>
        public RunAutomaton(Automaton a, int maxInterval, bool tableize)
        {
            this._maxInterval = maxInterval;
            a.Determinize();
            _points = a.GetStartPoints();
            State[] states = a.GetNumberedStates();
            m_initial     = a.initial.Number;
            _size         = states.Length;
            m_accept      = new bool[_size];
            m_transitions = new int[_size * _points.Length];
            for (int n = 0; n < _size * _points.Length; n++)
            {
                m_transitions[n] = -1;
            }
            foreach (State s in states)
            {
                int n = s.number;
                m_accept[n] = s.accept;
                for (int c = 0; c < _points.Length; c++)
                {
                    State q = s.Step(_points[c]);
                    if (q != null)
                    {
                        m_transitions[n * _points.Length + c] = q.number;
                    }
                }
            }

            /*
             * Set alphabet table for optimal run performance.
             */
            if (tableize)
            {
                _classmap = new int[maxInterval + 1];
                int i = 0;
                for (int j = 0; j <= maxInterval; j++)
                {
                    if (i + 1 < _points.Length && j == _points[i + 1])
                    {
                        i++;
                    }
                    _classmap[j] = i;
                }
            }
            else
            {
                _classmap = null;
            }
        }
Пример #5
0
        /// <summary>
        /// Reverses the language of the given (non-singleton) automaton while returning
        /// the set of new initial states.
        /// </summary>
        public static ISet <State> Reverse(Automaton a)
        {
            a.ExpandSingleton();
            // reverse all edges
            Dictionary <State, ISet <Transition> > m = new Dictionary <State, ISet <Transition> >();

            State[]      states = a.GetNumberedStates();
            ISet <State> accept = new JCG.HashSet <State>();

            foreach (State s in states)
            {
                if (s.Accept)
                {
                    accept.Add(s);
                }
            }
            foreach (State r in states)
            {
                m[r]     = new JCG.HashSet <Transition>();
                r.accept = false;
            }
            foreach (State r in states)
            {
                foreach (Transition t in r.GetTransitions())
                {
                    m[t.to].Add(new Transition(t.min, t.max, r));
                }
            }
            foreach (State r in states)
            {
                ISet <Transition> tr = m[r];
                r.SetTransitions(tr.ToArray(/*new Transition[tr.Count]*/));
            }
            // make new initial+final states
            a.initial.accept = true;
            a.initial        = new State();
            foreach (State r in accept)
            {
                a.initial.AddEpsilon(r); // ensures that all initial states are reachable
            }
            a.deterministic = false;
            a.ClearNumberedStates();
            return(accept);
        }
Пример #6
0
        /// <summary>
        /// Minimizes the given automaton using Hopcroft's algorithm.
        /// </summary>
        public static void MinimizeHopcroft(Automaton a)
        {
            a.Determinize();
            if (a.initial.numTransitions == 1)
            {
                Transition t = a.initial.TransitionsArray[0];
                if (t.to == a.initial && t.min == Character.MIN_CODE_POINT && t.max == Character.MAX_CODE_POINT)
                {
                    return;
                }
            }
            a.Totalize();

            // initialize data structures
            int[]   sigma = a.GetStartPoints();
            State[] states = a.GetNumberedStates();
            int     sigmaLen = sigma.Length, statesLen = states.Length;

            List <State>[,] reverse = new List <State> [statesLen, sigmaLen];
            ISet <State>[] partition  = new EquatableSet <State> [statesLen];
            List <State>[] splitblock = new List <State> [statesLen];
            int[]          block      = new int[statesLen];
            StateList[,] active      = new StateList[statesLen, sigmaLen];
            StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen];
            LinkedList <Int32Pair> pending = new LinkedList <Int32Pair>();
            OpenBitSet             pending2 = new OpenBitSet(sigmaLen * statesLen);
            OpenBitSet             split = new OpenBitSet(statesLen),
                                   refine = new OpenBitSet(statesLen), refine2 = new OpenBitSet(statesLen);

            for (int q = 0; q < statesLen; q++)
            {
                splitblock[q] = new List <State>();
                partition[q]  = new EquatableSet <State>();
                for (int x = 0; x < sigmaLen; x++)
                {
                    active[q, x] = new StateList();
                }
            }
            // find initial partition and reverse edges
            for (int q = 0; q < statesLen; q++)
            {
                State qq = states[q];
                int   j  = qq.accept ? 0 : 1;
                partition[j].Add(qq);
                block[q] = j;
                for (int x = 0; x < sigmaLen; x++)
                {
                    //List<State>[] r = reverse[qq.Step(sigma[x]).number];
                    var r = qq.Step(sigma[x]).number;
                    if (reverse[r, x] == null)
                    {
                        reverse[r, x] = new List <State>();
                    }
                    reverse[r, x].Add(qq);
                }
            }
            // initialize active sets
            for (int j = 0; j <= 1; j++)
            {
                for (int x = 0; x < sigmaLen; x++)
                {
                    foreach (State qq in partition[j])
                    {
                        if (reverse[qq.number, x] != null)
                        {
                            active2[qq.number, x] = active[j, x].Add(qq);
                        }
                    }
                }
            }
            // initialize pending
            for (int x = 0; x < sigmaLen; x++)
            {
                int j = (active[0, x].Count <= active[1, x].Count) ? 0 : 1;
                pending.AddLast(new Int32Pair(j, x));
                pending2.Set(x * statesLen + j);
            }
            // process pending until fixed point
            int k = 2;

            while (pending.Count > 0)
            {
                Int32Pair ip = pending.First.Value;
                pending.Remove(ip);
                int p = ip.N1;
                int x = ip.N2;
                pending2.Clear(x * statesLen + p);
                // find states that need to be split off their blocks
                for (StateListNode m = active[p, x].First; m != null; m = m.Next)
                {
                    List <State> r = reverse[m.Q.number, x];
                    if (r != null)
                    {
                        foreach (State s in r)
                        {
                            int i = s.number;
                            if (!split.Get(i))
                            {
                                split.Set(i);
                                int j = block[i];
                                splitblock[j].Add(s);
                                if (!refine2.Get(j))
                                {
                                    refine2.Set(j);
                                    refine.Set(j);
                                }
                            }
                        }
                    }
                }
                // refine blocks
                for (int j = refine.NextSetBit(0); j >= 0; j = refine.NextSetBit(j + 1))
                {
                    List <State> sb = splitblock[j];
                    if (sb.Count < partition[j].Count)
                    {
                        ISet <State> b1 = partition[j];
                        ISet <State> b2 = partition[k];
                        foreach (State s in sb)
                        {
                            b1.Remove(s);
                            b2.Add(s);
                            block[s.number] = k;
                            for (int c = 0; c < sigmaLen; c++)
                            {
                                StateListNode sn = active2[s.number, c];
                                if (sn != null && sn.Sl == active[j, c])
                                {
                                    sn.Remove();
                                    active2[s.number, c] = active[k, c].Add(s);
                                }
                            }
                        }
                        // update pending
                        for (int c = 0; c < sigmaLen; c++)
                        {
                            int aj = active[j, c].Count, ak = active[k, c].Count, ofs = c * statesLen;
                            if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak)
                            {
                                pending2.Set(ofs + j);
                                pending.AddLast(new Int32Pair(j, c));
                            }
                            else
                            {
                                pending2.Set(ofs + k);
                                pending.AddLast(new Int32Pair(k, c));
                            }
                        }
                        k++;
                    }
                    refine2.Clear(j);
                    foreach (State s in sb)
                    {
                        split.Clear(s.number);
                    }
                    sb.Clear();
                }
                refine.Clear(0, refine.Length - 1);
            }
            // make a new state for each equivalence class, set initial state
            State[] newstates = new State[k];
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = new State();
                newstates[n] = s;
                foreach (State q in partition[n])
                {
                    if (q == a.initial)
                    {
                        a.initial = s;
                    }
                    s.accept = q.accept;
                    s.number = q.number; // select representative
                    q.number = n;
                }
            }
            // build transitions and set acceptance
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = newstates[n];
                s.accept = states[s.number].accept;
                foreach (Transition t in states[s.number].GetTransitions())
                {
                    s.AddTransition(new Transition(t.min, t.max, newstates[t.to.number]));
                }
            }
            a.ClearNumberedStates();
            a.RemoveDeadTransitions();
        }
Пример #7
0
        /// <summary>
        /// Determinizes the given automaton.
        /// <para/>
        /// Worst case complexity: exponential in number of states.
        /// </summary>
        public static void Determinize(Automaton a)
        {
            if (a.IsDeterministic || a.IsSingleton)
            {
                return;
            }

            State[] allStates = a.GetNumberedStates();

            // subset construction
            bool initAccept = a.initial.accept;
            int  initNumber = a.initial.number;

            a.initial = new State();
            SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial);

            LinkedList <SortedInt32Set.FrozenInt32Set>         worklist = new LinkedList <SortedInt32Set.FrozenInt32Set>();
            IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>();

            worklist.AddLast(initialset);

            a.initial.accept     = initAccept;
            newstate[initialset] = a.initial;

            int newStateUpto = 0;

            State[] newStatesArray = new State[5];
            newStatesArray[newStateUpto] = a.initial;
            a.initial.number             = newStateUpto;
            newStateUpto++;

            // like Set<Integer,PointTransitions>
            PointTransitionSet points = new PointTransitionSet();

            // like SortedMap<Integer,Integer>
            SortedInt32Set statesSet = new SortedInt32Set(5);

            // LUCENENET NOTE: The problem here is almost certainly
            // due to the conversion to FrozenIntSet along with its
            // differing equality checking.
            while (worklist.Count > 0)
            {
                SortedInt32Set.FrozenInt32Set s = worklist.First.Value;
                worklist.Remove(s);

                // Collate all outgoing transitions by min/1+max:
                for (int i = 0; i < s.values.Length; i++)
                {
                    State s0 = allStates[s.values[i]];
                    for (int j = 0; j < s0.numTransitions; j++)
                    {
                        points.Add(s0.TransitionsArray[j]);
                    }
                }

                if (points.count == 0)
                {
                    // No outgoing transitions -- skip it
                    continue;
                }

                points.Sort();

                int lastPoint = -1;
                int accCount  = 0;

                State r = s.state;
                for (int i = 0; i < points.count; i++)
                {
                    int point = points.points[i].point;

                    if (statesSet.upto > 0)
                    {
                        Debug.Assert(lastPoint != -1);

                        statesSet.ComputeHash();

                        State q;
                        if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out q) || q == null)
                        {
                            q = new State();

                            SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q);
                            worklist.AddLast(p);
                            if (newStateUpto == newStatesArray.Length)
                            {
                                State[] newArray = new State[ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
                                Array.Copy(newStatesArray, 0, newArray, 0, newStateUpto);
                                newStatesArray = newArray;
                            }
                            newStatesArray[newStateUpto] = q;
                            q.number = newStateUpto;
                            newStateUpto++;
                            q.accept    = accCount > 0;
                            newstate[p] = q;
                        }
                        else
                        {
                            Debug.Assert((accCount > 0) == q.accept, "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet);
                        }

                        r.AddTransition(new Transition(lastPoint, point - 1, q));
                    }

                    // process transitions that end on this point
                    // (closes an overlapping interval)
                    Transition[] transitions = points.points[i].ends.transitions;
                    int          limit       = points.points[i].ends.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Decr(num);
                        accCount -= t.to.accept ? 1 : 0;
                    }
                    points.points[i].ends.count = 0;

                    // process transitions that start on this point
                    // (opens a new interval)
                    transitions = points.points[i].starts.transitions;
                    limit       = points.points[i].starts.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Incr(num);
                        accCount += t.to.accept ? 1 : 0;
                    }
                    lastPoint = point;
                    points.points[i].starts.count = 0;
                }
                points.Reset();
                Debug.Assert(statesSet.upto == 0, "upto=" + statesSet.upto);
            }
            a.deterministic = true;
            a.SetNumberedStates(newStatesArray, newStateUpto);
        }
Пример #8
0
 /// <summary>
 /// Returns <c>true</c> if the given string is accepted by the automaton.
 /// <para/>
 /// Complexity: linear in the length of the string.
 /// <para/>
 /// <b>Note:</b> for full performance, use the <see cref="RunAutomaton"/> class.
 /// </summary>
 public static bool Run(Automaton a, string s)
 {
     if (a.IsSingleton)
     {
         return(s.Equals(a.singleton, StringComparison.Ordinal));
     }
     if (a.deterministic)
     {
         State p = a.initial;
         for (int i = 0, cp = 0; i < s.Length; i += Character.CharCount(cp))
         {
             State q = p.Step(cp = Character.CodePointAt(s, i));
             if (q == null)
             {
                 return(false);
             }
             p = q;
         }
         return(p.accept);
     }
     else
     {
         State[]            states   = a.GetNumberedStates();
         LinkedList <State> pp       = new LinkedList <State>();
         LinkedList <State> pp_other = new LinkedList <State>();
         OpenBitSet         bb       = new OpenBitSet(states.Length);
         OpenBitSet         bb_other = new OpenBitSet(states.Length);
         pp.AddLast(a.initial);
         List <State> dest   = new List <State>();
         bool         accept = a.initial.accept;
         for (int i = 0, c = 0; i < s.Length; i += Character.CharCount(c))
         {
             c      = Character.CodePointAt(s, i);
             accept = false;
             pp_other.Clear();
             bb_other.Clear(0, bb_other.Length - 1);
             foreach (State p in pp)
             {
                 dest.Clear();
                 p.Step(c, dest);
                 foreach (State q in dest)
                 {
                     if (q.accept)
                     {
                         accept = true;
                     }
                     if (!bb_other.Get(q.number))
                     {
                         bb_other.Set(q.number);
                         pp_other.AddLast(q);
                     }
                 }
             }
             LinkedList <State> tp = pp;
             pp       = pp_other;
             pp_other = tp;
             OpenBitSet tb = bb;
             bb       = bb_other;
             bb_other = tb;
         }
         return(accept);
     }
 }
Пример #9
0
        /// <summary>
        /// Determinizes the given automaton.
        /// <para/>
        /// Worst case complexity: exponential in number of states.
        /// </summary>
        public static void Determinize(Automaton a)
        {
            if (a.IsDeterministic || a.IsSingleton)
            {
                return;
            }

            State[] allStates = a.GetNumberedStates();

            // subset construction
            bool initAccept = a.initial.accept;
            int  initNumber = a.initial.number;

            a.initial = new State();
            SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial);

            Queue <SortedInt32Set.FrozenInt32Set> worklist = new Queue <SortedInt32Set.FrozenInt32Set>(); // LUCENENET specific - Queue is much more performant than LinkedList
            IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>();

            worklist.Enqueue(initialset);

            a.initial.accept     = initAccept;
            newstate[initialset] = a.initial;

            int newStateUpto = 0;

            State[] newStatesArray = new State[5];
            newStatesArray[newStateUpto] = a.initial;
            a.initial.number             = newStateUpto;
            newStateUpto++;

            // like Set<Integer,PointTransitions>
            PointTransitionSet points = new PointTransitionSet();

            // like SortedMap<Integer,Integer>
            SortedInt32Set statesSet = new SortedInt32Set(5);

            while (worklist.Count > 0)
            {
                SortedInt32Set.FrozenInt32Set s = worklist.Dequeue();
                //worklist.Remove(s);

                // Collate all outgoing transitions by min/1+max:
                for (int i = 0; i < s.values.Length; i++)
                {
                    State s0 = allStates[s.values[i]];
                    for (int j = 0; j < s0.numTransitions; j++)
                    {
                        points.Add(s0.TransitionsArray[j]);
                    }
                }

                if (points.count == 0)
                {
                    // No outgoing transitions -- skip it
                    continue;
                }

                points.Sort();

                int lastPoint = -1;
                int accCount  = 0;

                State r = s.state;
                for (int i = 0; i < points.count; i++)
                {
                    int point = points.points[i].point;

                    if (statesSet.upto > 0)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(lastPoint != -1);
                        }

                        statesSet.ComputeHash();

                        if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out State q) || q == null)
                        {
                            q = new State();

                            SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q);
                            worklist.Enqueue(p);
                            if (newStateUpto == newStatesArray.Length)
                            {
                                // LUCENENET: Resize rather than copy
                                Array.Resize(ref newStatesArray, ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
                            }
                            newStatesArray[newStateUpto] = q;
                            q.number = newStateUpto;
                            newStateUpto++;
                            q.accept    = accCount > 0;
                            newstate[p] = q;
                        }
                        else
                        {
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert((accCount > 0) == q.accept, "accCount={0} vs existing accept={1} states={2}", accCount, q.accept, statesSet);
                            }
                        }

                        r.AddTransition(new Transition(lastPoint, point - 1, q));
                    }

                    // process transitions that end on this point
                    // (closes an overlapping interval)
                    Transition[] transitions = points.points[i].ends.transitions;
                    int          limit       = points.points[i].ends.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Decr(num);
                        accCount -= t.to.accept ? 1 : 0;
                    }
                    points.points[i].ends.count = 0;

                    // process transitions that start on this point
                    // (opens a new interval)
                    transitions = points.points[i].starts.transitions;
                    limit       = points.points[i].starts.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Incr(num);
                        accCount += t.to.accept ? 1 : 0;
                    }
                    lastPoint = point;
                    points.points[i].starts.count = 0;
                }
                points.Reset();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(statesSet.upto == 0, "upto={0}", statesSet.upto);
                }
            }
            a.deterministic = true;
            a.SetNumberedStates(newStatesArray, newStateUpto);
        }