Пример #1
0
 /// <summary>
 /// See <see cref="BasicOperations.SubsetOf(Automaton, Automaton)"/>.
 /// </summary>
 public virtual bool SubsetOf(Automaton a)
 {
     return(BasicOperations.SubsetOf(this, a));
 }
Пример #2
0
 /// <summary>
 /// See <see cref="BasicOperations.Minus(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Minus(Automaton a)
 {
     return(BasicOperations.Minus(this, a));
 }
Пример #3
0
 /// <summary>
 /// See <see cref="BasicOperations.Intersection(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Intersection(Automaton a)
 {
     return(BasicOperations.Intersection(this, a));
 }
Пример #4
0
        /// <summary>
        /// Minimizes the given automaton using Hopcroft's algorithm.
        /// </summary>
        public static void MinimizeHopcroft(Automaton a)
        {
            a.Determinize();
            if (a.initial.numTransitions == 1)
            {
                Transition t = a.initial.TransitionsArray[0];
                if (t.to == a.initial && t.min == Character.MinCodePoint && t.max == Character.MaxCodePoint)
                {
                    return;
                }
            }
            a.Totalize();

            // initialize data structures
            int[]   sigma = a.GetStartPoints();
            State[] states = a.GetNumberedStates();
            int     sigmaLen = sigma.Length, statesLen = states.Length;

            List <State>[,] reverse = new List <State> [statesLen, sigmaLen];
            ISet <State>[] partition  = new JCG.HashSet <State> [statesLen];
            List <State>[] splitblock = new List <State> [statesLen];
            int[]          block      = new int[statesLen];
            StateList[,] active      = new StateList[statesLen, sigmaLen];
            StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen];
            LinkedList <Int32Pair> pending = new LinkedList <Int32Pair>();
            OpenBitSet             pending2 = new OpenBitSet(sigmaLen * statesLen);
            OpenBitSet             split = new OpenBitSet(statesLen),
                                   refine = new OpenBitSet(statesLen), refine2 = new OpenBitSet(statesLen);

            for (int q = 0; q < statesLen; q++)
            {
                splitblock[q] = new List <State>();
                partition[q]  = new JCG.HashSet <State>();
                for (int x = 0; x < sigmaLen; x++)
                {
                    active[q, x] = new StateList();
                }
            }
            // find initial partition and reverse edges
            for (int q = 0; q < statesLen; q++)
            {
                State qq = states[q];
                int   j  = qq.accept ? 0 : 1;
                partition[j].Add(qq);
                block[q] = j;
                for (int x = 0; x < sigmaLen; x++)
                {
                    //List<State>[] r = reverse[qq.Step(sigma[x]).number];
                    var r = qq.Step(sigma[x]).number;
                    if (reverse[r, x] == null)
                    {
                        reverse[r, x] = new List <State>();
                    }
                    reverse[r, x].Add(qq);
                }
            }
            // initialize active sets
            for (int j = 0; j <= 1; j++)
            {
                for (int x = 0; x < sigmaLen; x++)
                {
                    foreach (State qq in partition[j])
                    {
                        if (reverse[qq.number, x] != null)
                        {
                            active2[qq.number, x] = active[j, x].Add(qq);
                        }
                    }
                }
            }
            // initialize pending
            for (int x = 0; x < sigmaLen; x++)
            {
                int j = (active[0, x].Count <= active[1, x].Count) ? 0 : 1;
                pending.AddLast(new Int32Pair(j, x));
                pending2.Set(x * statesLen + j);
            }
            // process pending until fixed point
            int k = 2;

            while (pending.Count > 0)
            {
                Int32Pair ip = pending.First.Value;
                pending.Remove(ip);
                int p = ip.N1;
                int x = ip.N2;
                pending2.Clear(x * statesLen + p);
                // find states that need to be split off their blocks
                for (StateListNode m = active[p, x].First; m != null; m = m.Next)
                {
                    List <State> r = reverse[m.Q.number, x];
                    if (r != null)
                    {
                        foreach (State s in r)
                        {
                            int i = s.number;
                            if (!split.Get(i))
                            {
                                split.Set(i);
                                int j = block[i];
                                splitblock[j].Add(s);
                                if (!refine2.Get(j))
                                {
                                    refine2.Set(j);
                                    refine.Set(j);
                                }
                            }
                        }
                    }
                }
                // refine blocks
                for (int j = refine.NextSetBit(0); j >= 0; j = refine.NextSetBit(j + 1))
                {
                    List <State> sb = splitblock[j];
                    if (sb.Count < partition[j].Count)
                    {
                        ISet <State> b1 = partition[j];
                        ISet <State> b2 = partition[k];
                        foreach (State s in sb)
                        {
                            b1.Remove(s);
                            b2.Add(s);
                            block[s.number] = k;
                            for (int c = 0; c < sigmaLen; c++)
                            {
                                StateListNode sn = active2[s.number, c];
                                if (sn != null && sn.Sl == active[j, c])
                                {
                                    sn.Remove();
                                    active2[s.number, c] = active[k, c].Add(s);
                                }
                            }
                        }
                        // update pending
                        for (int c = 0; c < sigmaLen; c++)
                        {
                            int aj = active[j, c].Count, ak = active[k, c].Count, ofs = c * statesLen;
                            if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak)
                            {
                                pending2.Set(ofs + j);
                                pending.AddLast(new Int32Pair(j, c));
                            }
                            else
                            {
                                pending2.Set(ofs + k);
                                pending.AddLast(new Int32Pair(k, c));
                            }
                        }
                        k++;
                    }
                    refine2.Clear(j);
                    foreach (State s in sb)
                    {
                        split.Clear(s.number);
                    }
                    sb.Clear();
                }
                refine.Clear(0, refine.Length - 1);
            }
            // make a new state for each equivalence class, set initial state
            State[] newstates = new State[k];
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = new State();
                newstates[n] = s;
                foreach (State q in partition[n])
                {
                    if (q == a.initial)
                    {
                        a.initial = s;
                    }
                    s.accept = q.accept;
                    s.number = q.number; // select representative
                    q.number = n;
                }
            }
            // build transitions and set acceptance
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = newstates[n];
                s.accept = states[s.number].accept;
                foreach (Transition t in states[s.number].GetTransitions())
                {
                    s.AddTransition(new Transition(t.min, t.max, newstates[t.to.number]));
                }
            }
            a.ClearNumberedStates();
            a.RemoveDeadTransitions();
        }
Пример #5
0
 /// <summary>
 /// See <see cref="BasicOperations.Concatenate(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Concatenate(Automaton a)
 {
     return(BasicOperations.Concatenate(this, a));
 }
Пример #6
0
 public CompiledAutomaton(Automaton automaton)
     : this(automaton, null, true)
 {
 }
Пример #7
0
 public CharacterRunAutomaton(Automaton a)
     : base(a, Character.MaxCodePoint, false)
 {
 }
Пример #8
0
        /// <summary>
        /// Determinizes the given automaton.
        /// <para/>
        /// Worst case complexity: exponential in number of states.
        /// </summary>
        public static void Determinize(Automaton a)
        {
            if (a.IsDeterministic || a.IsSingleton)
            {
                return;
            }

            State[] allStates = a.GetNumberedStates();

            // subset construction
            bool initAccept = a.initial.accept;
            int  initNumber = a.initial.number;

            a.initial = new State();
            SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial);

            Queue <SortedInt32Set.FrozenInt32Set> worklist = new Queue <SortedInt32Set.FrozenInt32Set>(); // LUCENENET specific - Queue is much more performant than LinkedList
            IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>();

            worklist.Enqueue(initialset);

            a.initial.accept     = initAccept;
            newstate[initialset] = a.initial;

            int newStateUpto = 0;

            State[] newStatesArray = new State[5];
            newStatesArray[newStateUpto] = a.initial;
            a.initial.number             = newStateUpto;
            newStateUpto++;

            // like Set<Integer,PointTransitions>
            PointTransitionSet points = new PointTransitionSet();

            // like SortedMap<Integer,Integer>
            SortedInt32Set statesSet = new SortedInt32Set(5);

            while (worklist.Count > 0)
            {
                SortedInt32Set.FrozenInt32Set s = worklist.Dequeue();
                //worklist.Remove(s);

                // Collate all outgoing transitions by min/1+max:
                for (int i = 0; i < s.values.Length; i++)
                {
                    State s0 = allStates[s.values[i]];
                    for (int j = 0; j < s0.numTransitions; j++)
                    {
                        points.Add(s0.TransitionsArray[j]);
                    }
                }

                if (points.count == 0)
                {
                    // No outgoing transitions -- skip it
                    continue;
                }

                points.Sort();

                int lastPoint = -1;
                int accCount  = 0;

                State r = s.state;
                for (int i = 0; i < points.count; i++)
                {
                    int point = points.points[i].point;

                    if (statesSet.upto > 0)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(lastPoint != -1);
                        }

                        statesSet.ComputeHash();

                        if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out State q) || q is null)
                        {
                            q = new State();

                            SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q);
                            worklist.Enqueue(p);
                            if (newStateUpto == newStatesArray.Length)
                            {
                                // LUCENENET: Resize rather than copy
                                Array.Resize(ref newStatesArray, ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
                            }
                            newStatesArray[newStateUpto] = q;
                            q.number = newStateUpto;
                            newStateUpto++;
                            q.accept    = accCount > 0;
                            newstate[p] = q;
                        }
                        else
                        {
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert((accCount > 0) == q.accept, "accCount={0} vs existing accept={1} states={2}", accCount, q.accept, statesSet);
                            }
                        }

                        r.AddTransition(new Transition(lastPoint, point - 1, q));
                    }

                    // process transitions that end on this point
                    // (closes an overlapping interval)
                    Transition[] transitions = points.points[i].ends.transitions;
                    int          limit       = points.points[i].ends.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Decr(num);
                        accCount -= t.to.accept ? 1 : 0;
                    }
                    points.points[i].ends.count = 0;

                    // process transitions that start on this point
                    // (opens a new interval)
                    transitions = points.points[i].starts.transitions;
                    limit       = points.points[i].starts.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Incr(num);
                        accCount += t.to.accept ? 1 : 0;
                    }
                    lastPoint = point;
                    points.points[i].starts.count = 0;
                }
                points.Reset();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(statesSet.upto == 0, "upto={0}", statesSet.upto);
                }
            }
            a.deterministic = true;
            a.SetNumberedStates(newStatesArray, newStateUpto);
        }
Пример #9
0
        /// <summary>
        /// Adds epsilon transitions to the given automaton. This method adds extra
        /// character interval transitions that are equivalent to the given set of
        /// epsilon transitions.
        /// </summary>
        /// <param name="a"> Automaton. </param>
        /// <param name="pairs"> Collection of <see cref="StatePair"/> objects representing pairs of
        ///          source/destination states where epsilon transitions should be
        ///          added. </param>
        public static void AddEpsilons(Automaton a, ICollection <StatePair> pairs)
        {
            a.ExpandSingleton();
            Dictionary <State, JCG.HashSet <State> > forward = new Dictionary <State, JCG.HashSet <State> >();
            Dictionary <State, JCG.HashSet <State> > back    = new Dictionary <State, JCG.HashSet <State> >();

            foreach (StatePair p in pairs)
            {
                if (!forward.TryGetValue(p.s1, out JCG.HashSet <State> to))
                {
                    to            = new JCG.HashSet <State>();
                    forward[p.s1] = to;
                }
                to.Add(p.s2);
                if (!back.TryGetValue(p.s2, out JCG.HashSet <State> from))
                {
                    from       = new JCG.HashSet <State>();
                    back[p.s2] = from;
                }
                from.Add(p.s1);
            }
            // calculate epsilon closure
            LinkedList <StatePair> worklist = new LinkedList <StatePair>(pairs);

            JCG.HashSet <StatePair> workset = new JCG.HashSet <StatePair>(pairs);
            while (worklist.Count > 0)
            {
                StatePair p = worklist.First.Value;
                worklist.Remove(p);
                workset.Remove(p);
#pragma warning disable IDE0018 // Inline variable declaration
                JCG.HashSet <State> from;
#pragma warning restore IDE0018 // Inline variable declaration
                if (forward.TryGetValue(p.s2, out JCG.HashSet <State> to))
                {
                    foreach (State s in to)
                    {
                        StatePair pp = new StatePair(p.s1, s);
                        if (!pairs.Contains(pp))
                        {
                            pairs.Add(pp);
                            forward[p.s1].Add(s);
                            back[s].Add(p.s1);
                            worklist.AddLast(pp);
                            workset.Add(pp);
                            if (back.TryGetValue(p.s1, out from))
                            {
                                foreach (State q in from)
                                {
                                    StatePair qq = new StatePair(q, p.s1);
                                    if (!workset.Contains(qq))
                                    {
                                        worklist.AddLast(qq);
                                        workset.Add(qq);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // add transitions
            foreach (StatePair p in pairs)
            {
                p.s1.AddEpsilon(p.s2);
            }
            a.deterministic = false;
            //a.clearHashCode();
            a.ClearNumberedStates();
            a.CheckMinimizeAlways();
        }
Пример #10
0
        /// <summary>
        /// Returns an automaton that accepts the intersection of the languages of the
        /// given automata. Never modifies the input automata languages.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static Automaton Intersection(Automaton a1, Automaton a2)
        {
            if (a1.IsSingleton)
            {
                if (BasicOperations.Run(a2, a1.singleton))
                {
                    return(a1.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a2.IsSingleton)
            {
                if (BasicOperations.Run(a1, a2.singleton))
                {
                    return(a2.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a1 == a2)
            {
                return(a1.CloneIfRequired());
            }
            Transition[][]    transitions1 = a1.GetSortedTransitions();
            Transition[][]    transitions2 = a2.GetSortedTransitions();
            Automaton         c            = new Automaton();
            Queue <StatePair> worklist     = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList
            Dictionary <StatePair, StatePair> newstates = new Dictionary <StatePair, StatePair>();
            StatePair p = new StatePair(c.initial, a1.initial, a2.initial);

            worklist.Enqueue(p);
            newstates[p] = p;
            while (worklist.Count > 0)
            {
                p          = worklist.Dequeue();
                p.s.accept = p.s1.accept && p.s2.accept;
                Transition[] t1 = transitions1[p.s1.number];
                Transition[] t2 = transitions2[p.s2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].max >= t1[n1].min)
                        {
                            StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                            if (!newstates.TryGetValue(q, out StatePair r) || r is null)
                            {
                                q.s = new State();
                                worklist.Enqueue(q);
                                newstates[q] = q;
                                r            = q;
                            }
                            int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min;
                            int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max;
                            p.s.AddTransition(new Transition(min, max, r.s));
                        }
                    }
                }
            }
            c.deterministic = a1.deterministic && a2.deterministic;
            c.RemoveDeadTransitions();
            c.CheckMinimizeAlways();
            return(c);
        }
Пример #11
0
        /// <summary>
        /// Returns true if the language of <paramref name="a1"/> is a subset of the language
        /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if
        /// not already marked as deterministic.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static bool SubsetOf(Automaton a1, Automaton a2)
        {
            if (a1 == a2)
            {
                return(true);
            }
            if (a1.IsSingleton)
            {
                if (a2.IsSingleton)
                {
                    return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal));
                }
                return(BasicOperations.Run(a2, a1.singleton));
            }
            a2.Determinize();
            Transition[][]    transitions1 = a1.GetSortedTransitions();
            Transition[][]    transitions2 = a2.GetSortedTransitions();
            Queue <StatePair> worklist     = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList

            JCG.HashSet <StatePair> visited = new JCG.HashSet <StatePair>();
            StatePair p = new StatePair(a1.initial, a2.initial);

            worklist.Enqueue(p);
            visited.Add(p);
            while (worklist.Count > 0)
            {
                p = worklist.Dequeue();
                if (p.s1.accept && !p.s2.accept)
                {
                    return(false);
                }
                Transition[] t1 = transitions1[p.s1.number];
                Transition[] t2 = transitions2[p.s2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    int min1 = t1[n1].min, max1 = t1[n1].max;

                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].min > min1)
                        {
                            return(false);
                        }
                        if (t2[n2].max < Character.MaxCodePoint)
                        {
                            min1 = t2[n2].max + 1;
                        }
                        else
                        {
                            min1 = Character.MaxCodePoint;
                            max1 = Character.MinCodePoint;
                        }
                        StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                        if (!visited.Contains(q))
                        {
                            worklist.Enqueue(q);
                            visited.Add(q);
                        }
                    }
                    if (min1 <= max1)
                    {
                        return(false);
                    }
                }
            }
            return(true);
        }
Пример #12
0
 /// <summary>
 /// Returns <c>true</c> if the given string is accepted by the automaton.
 /// <para/>
 /// Complexity: linear in the length of the string.
 /// <para/>
 /// <b>Note:</b> for full performance, use the <see cref="RunAutomaton"/> class.
 /// </summary>
 public static bool Run(Automaton a, string s)
 {
     if (a.IsSingleton)
     {
         return(s.Equals(a.singleton, StringComparison.Ordinal));
     }
     if (a.deterministic)
     {
         State p = a.initial;
         int   cp; // LUCENENET: Removed unnecessary assignment
         for (int i = 0; i < s.Length; i += Character.CharCount(cp))
         {
             State q = p.Step(cp = Character.CodePointAt(s, i));
             if (q is null)
             {
                 return(false);
             }
             p = q;
         }
         return(p.accept);
     }
     else
     {
         State[]            states   = a.GetNumberedStates();
         LinkedList <State> pp       = new LinkedList <State>();
         LinkedList <State> pp_other = new LinkedList <State>();
         OpenBitSet         bb       = new OpenBitSet(states.Length);
         OpenBitSet         bb_other = new OpenBitSet(states.Length);
         pp.AddLast(a.initial);
         JCG.List <State> dest   = new JCG.List <State>();
         bool             accept = a.initial.accept;
         int c; // LUCENENET: Removed unnecessary assignment
         for (int i = 0; i < s.Length; i += Character.CharCount(c))
         {
             c      = Character.CodePointAt(s, i);
             accept = false;
             pp_other.Clear();
             bb_other.Clear(0, bb_other.Length - 1);
             foreach (State p in pp)
             {
                 dest.Clear();
                 p.Step(c, dest);
                 foreach (State q in dest)
                 {
                     if (q.accept)
                     {
                         accept = true;
                     }
                     if (!bb_other.Get(q.number))
                     {
                         bb_other.Set(q.number);
                         pp_other.AddLast(q);
                     }
                 }
             }
             LinkedList <State> tp = pp;
             pp       = pp_other;
             pp_other = tp;
             OpenBitSet tb = bb;
             bb       = bb_other;
             bb_other = tb;
         }
         return(accept);
     }
 }
Пример #13
0
        /// <summary>
        /// Compute a DFA that accepts all strings within an edit distance of <paramref name="n"/>.
        /// <para>
        /// All automata have the following properties:
        /// <list type="bullet">
        ///     <item><description>They are deterministic (DFA).</description></item>
        ///     <item><description>There are no transitions to dead states.</description></item>
        ///     <item><description>They are not minimal (some transitions could be combined).</description></item>
        /// </list>
        /// </para>
        /// </summary>
        public virtual Automaton ToAutomaton(int n)
        {
            if (n == 0)
            {
                return(BasicAutomata.MakeString(word, 0, word.Length));
            }

            if (n >= descriptions.Length)
            {
                return(null);
            }

            int range = 2 * n + 1;
            ParametricDescription description = descriptions[n];

            // the number of states is based on the length of the word and n
            State[] states = new State[description.Count];
            // create all states, and mark as accept states if appropriate
            for (int i = 0; i < states.Length; i++)
            {
                states[i] = new State
                {
                    number = i,
                    accept = description.IsAccept(i)
                };
            }
            // create transitions from state to state
            for (int k = 0; k < states.Length; k++)
            {
                int xpos = description.GetPosition(k);
                if (xpos < 0)
                {
                    continue;
                }
                int end = xpos + Math.Min(word.Length - xpos, range);

                for (int x = 0; x < alphabet.Length; x++)
                {
                    int ch = alphabet[x];
                    // get the characteristic vector at this position wrt ch
                    int cvec = GetVector(ch, xpos, end);
                    int dest = description.Transition(k, xpos, cvec);
                    if (dest >= 0)
                    {
                        states[k].AddTransition(new Transition(ch, states[dest]));
                    }
                }
                // add transitions for all other chars in unicode
                // by definition, their characteristic vectors are always 0,
                // because they do not exist in the input string.
                int dest_ = description.Transition(k, xpos, 0); // by definition
                if (dest_ >= 0)
                {
                    for (int r = 0; r < numRanges; r++)
                    {
                        states[k].AddTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest_]));
                    }
                }
            }

            Automaton a = new Automaton(states[0])
            {
                deterministic = true
            };

            // we create some useless unconnected states, and its a net-win overall to remove these,
            // as well as to combine any adjacent transitions (it makes later algorithms more efficient).
            // so, while we could set our numberedStates here, its actually best not to, and instead to
            // force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
            //a.setNumberedStates(states);
            a.Reduce();
            // we need not trim transitions to dead states, as they are not created.
            //a.restoreInvariant();
            return(a);
        }
Пример #14
0
        /// <summary>
        /// Determinizes the given automaton.
        /// <para/>
        /// Worst case complexity: exponential in number of states.
        /// </summary>
        public static void Determinize(Automaton a)
        {
            if (a.IsDeterministic || a.IsSingleton)
            {
                return;
            }

            State[] allStates = a.GetNumberedStates();

            // subset construction
            bool initAccept = a.initial.accept;
            int  initNumber = a.initial.number;

            a.initial = new State();
            SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial);

            LinkedList <SortedInt32Set.FrozenInt32Set>         worklist = new LinkedList <SortedInt32Set.FrozenInt32Set>();
            IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>();

            worklist.AddLast(initialset);

            a.initial.accept     = initAccept;
            newstate[initialset] = a.initial;

            int newStateUpto = 0;

            State[] newStatesArray = new State[5];
            newStatesArray[newStateUpto] = a.initial;
            a.initial.number             = newStateUpto;
            newStateUpto++;

            // like Set<Integer,PointTransitions>
            PointTransitionSet points = new PointTransitionSet();

            // like SortedMap<Integer,Integer>
            SortedInt32Set statesSet = new SortedInt32Set(5);

            // LUCENENET NOTE: The problem here is almost certainly
            // due to the conversion to FrozenIntSet along with its
            // differing equality checking.
            while (worklist.Count > 0)
            {
                SortedInt32Set.FrozenInt32Set s = worklist.First.Value;
                worklist.Remove(s);

                // Collate all outgoing transitions by min/1+max:
                for (int i = 0; i < s.values.Length; i++)
                {
                    State s0 = allStates[s.values[i]];
                    for (int j = 0; j < s0.numTransitions; j++)
                    {
                        points.Add(s0.TransitionsArray[j]);
                    }
                }

                if (points.count == 0)
                {
                    // No outgoing transitions -- skip it
                    continue;
                }

                points.Sort();

                int lastPoint = -1;
                int accCount  = 0;

                State r = s.state;
                for (int i = 0; i < points.count; i++)
                {
                    int point = points.points[i].point;

                    if (statesSet.upto > 0)
                    {
                        Debug.Assert(lastPoint != -1);

                        statesSet.ComputeHash();

                        State q;
                        if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out q) || q == null)
                        {
                            q = new State();

                            SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q);
                            worklist.AddLast(p);
                            if (newStateUpto == newStatesArray.Length)
                            {
                                State[] newArray = new State[ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
                                Array.Copy(newStatesArray, 0, newArray, 0, newStateUpto);
                                newStatesArray = newArray;
                            }
                            newStatesArray[newStateUpto] = q;
                            q.number = newStateUpto;
                            newStateUpto++;
                            q.accept    = accCount > 0;
                            newstate[p] = q;
                        }
                        else
                        {
                            Debug.Assert((accCount > 0) == q.accept, "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet);
                        }

                        r.AddTransition(new Transition(lastPoint, point - 1, q));
                    }

                    // process transitions that end on this point
                    // (closes an overlapping interval)
                    Transition[] transitions = points.points[i].ends.transitions;
                    int          limit       = points.points[i].ends.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Decr(num);
                        accCount -= t.to.accept ? 1 : 0;
                    }
                    points.points[i].ends.count = 0;

                    // process transitions that start on this point
                    // (opens a new interval)
                    transitions = points.points[i].starts.transitions;
                    limit       = points.points[i].starts.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Incr(num);
                        accCount += t.to.accept ? 1 : 0;
                    }
                    lastPoint = point;
                    points.points[i].starts.count = 0;
                }
                points.Reset();
                Debug.Assert(statesSet.upto == 0, "upto=" + statesSet.upto);
            }
            a.deterministic = true;
            a.SetNumberedStates(newStatesArray, newStateUpto);
        }
Пример #15
0
 /// <summary>
 /// See <see cref="BasicOperations.Union(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Union(Automaton a)
 {
     return(BasicOperations.Union(this, a));
 }
Пример #16
0
        /// <summary>
        /// Returns an automaton that accepts the concatenation of the languages of the
        /// given automata.
        /// <para/>
        /// Complexity: linear in total number of states.
        /// </summary>
        public static Automaton Concatenate(IList <Automaton> l)
        {
            if (l.Count == 0)
            {
                return(BasicAutomata.MakeEmptyString());
            }
            bool all_singleton = true;

            foreach (Automaton a in l)
            {
                if (!a.IsSingleton)
                {
                    all_singleton = false;
                    break;
                }
            }
            if (all_singleton)
            {
                StringBuilder b = new StringBuilder();
                foreach (Automaton a in l)
                {
                    b.Append(a.singleton);
                }
                return(BasicAutomata.MakeString(b.ToString()));
            }
            else
            {
                foreach (Automaton a in l)
                {
                    if (BasicOperations.IsEmpty(a))
                    {
                        return(BasicAutomata.MakeEmpty());
                    }
                }
                JCG.HashSet <int> ids = new JCG.HashSet <int>();
                foreach (Automaton a in l)
                {
                    ids.Add(a.GetHashCode());
                }
                bool      has_aliases = ids.Count != l.Count;
                Automaton b           = l[0];
                if (has_aliases)
                {
                    b = b.CloneExpanded();
                }
                else
                {
                    b = b.CloneExpandedIfRequired();
                }
                ISet <State> ac    = b.GetAcceptStates();
                bool         first = true;
                foreach (Automaton a in l)
                {
                    if (first)
                    {
                        first = false;
                    }
                    else
                    {
                        if (a.IsEmptyString)
                        {
                            continue;
                        }
                        Automaton aa = a;
                        if (has_aliases)
                        {
                            aa = aa.CloneExpanded();
                        }
                        else
                        {
                            aa = aa.CloneExpandedIfRequired();
                        }
                        ISet <State> ns = aa.GetAcceptStates();
                        foreach (State s in ac)
                        {
                            s.accept = false;
                            s.AddEpsilon(aa.initial);
                            if (s.accept)
                            {
                                ns.Add(s);
                            }
                        }
                        ac = ns;
                    }
                }
                b.deterministic = false;
                //b.clearHashCode();
                b.ClearNumberedStates();
                b.CheckMinimizeAlways();
                return(b);
            }
        }
Пример #17
0
 /// <summary>
 /// See <see cref="MinimizationOperations.Minimize(Automaton)"/>. Returns the
 /// automaton being given as argument.
 /// </summary>
 public static Automaton Minimize(Automaton a)
 {
     MinimizationOperations.Minimize(a);
     return(a);
 }
Пример #18
0
        private Automaton ToAutomaton(IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider)
        {
            IList <Automaton> list;
            Automaton         a = null;

            switch (kind)
            {
            case Kind.REGEXP_UNION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider);
                a = BasicOperations.Union(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CONCATENATION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                a = BasicOperations.Concatenate(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_INTERSECTION:
                a = exp1.ToAutomaton(automata, automaton_provider).Intersection(exp2.ToAutomaton(automata, automaton_provider));
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_OPTIONAL:
                a = exp1.ToAutomaton(automata, automaton_provider).Optional();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MIN:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MINMAX:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min, max);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_COMPLEMENT:
                a = exp1.ToAutomaton(automata, automaton_provider).Complement();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CHAR:
                a = BasicAutomata.MakeChar(c);
                break;

            case Kind.REGEXP_CHAR_RANGE:
                a = BasicAutomata.MakeCharRange(from, to);
                break;

            case Kind.REGEXP_ANYCHAR:
                a = BasicAutomata.MakeAnyChar();
                break;

            case Kind.REGEXP_EMPTY:
                a = BasicAutomata.MakeEmpty();
                break;

            case Kind.REGEXP_STRING:
                a = BasicAutomata.MakeString(s);
                break;

            case Kind.REGEXP_ANYSTRING:
                a = BasicAutomata.MakeAnyString();
                break;

            case Kind.REGEXP_AUTOMATON:
                Automaton aa = null;
                if (automata != null)
                {
                    aa = automata[s];
                }
                if (aa == null && automaton_provider != null)
                {
                    try
                    {
                        aa = automaton_provider.GetAutomaton(s);
                    }
                    catch (IOException e)
                    {
                        throw new ArgumentException(e.ToString(), e);
                    }
                }
                if (aa == null)
                {
                    throw new ArgumentException("'" + s + "' not found");
                }
                a = (Automaton)aa.Clone();     // always clone here (ignore allow_mutate)
                break;

            case Kind.REGEXP_INTERVAL:
                a = BasicAutomata.MakeInterval(min, max, digits);
                break;
            }
            return(a);
        }
Пример #19
0
        public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify)
        {
            if (simplify)
            {
                // Test whether the automaton is a "simple" form and
                // if so, don't create a runAutomaton.  Note that on a
                // large automaton these tests could be costly:
                if (BasicOperations.IsEmpty(automaton))
                {
                    // matches nothing
                    Type              = AUTOMATON_TYPE.NONE;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else if (BasicOperations.IsTotal(automaton))
                {
                    // matches all possible strings
                    Type              = AUTOMATON_TYPE.ALL;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else
                {
                    string commonPrefix;
                    string singleton;
                    if (automaton.Singleton is null)
                    {
                        commonPrefix = SpecialOperations.GetCommonPrefix(automaton);
                        if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix)))
                        {
                            singleton = commonPrefix;
                        }
                        else
                        {
                            singleton = null;
                        }
                    }
                    else
                    {
                        commonPrefix = null;
                        singleton    = automaton.Singleton;
                    }

                    if (singleton != null)
                    {
                        // matches a fixed string in singleton or expanded
                        // representation
                        Type              = AUTOMATON_TYPE.SINGLE;
                        Term              = new BytesRef(singleton);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                    else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString())))
                    {
                        // matches a constant prefix
                        Type              = AUTOMATON_TYPE.PREFIX;
                        Term              = new BytesRef(commonPrefix);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                }
            }

            Type = AUTOMATON_TYPE.NORMAL;
            Term = null;
            if (finite is null)
            {
                this.Finite = SpecialOperations.IsFinite(automaton);
            }
            else
            {
                this.Finite = finite;
            }
            Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton);

            if (this.Finite == true)
            {
                CommonSuffixRef = null;
            }
            else
            {
                CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8);
            }
            RunAutomaton      = new ByteRunAutomaton(utf8, true);
            sortedTransitions = utf8.GetSortedTransitions();
        }
Пример #20
0
        /// <summary>
        /// Returns true if the language of <paramref name="a1"/> is a subset of the language
        /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if
        /// not already marked as deterministic.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static bool SubsetOf(Automaton a1, Automaton a2)
        {
            if (a1 == a2)
            {
                return(true);
            }
            if (a1.IsSingleton)
            {
                if (a2.IsSingleton)
                {
                    return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal));
                }
                return(BasicOperations.Run(a2, a1.singleton));
            }
            a2.Determinize();
            Transition[][]         transitions1 = a1.GetSortedTransitions();
            Transition[][]         transitions2 = a2.GetSortedTransitions();
            LinkedList <StatePair> worklist     = new LinkedList <StatePair>();
            HashSet <StatePair>    visited      = new HashSet <StatePair>();
            StatePair p = new StatePair(a1.initial, a2.initial);

            worklist.AddLast(p);
            visited.Add(p);
            while (worklist.Count > 0)
            {
                p = worklist.First.Value;
                worklist.Remove(p);
                if (p.S1.accept && !p.S2.accept)
                {
                    return(false);
                }
                Transition[] t1 = transitions1[p.S1.number];
                Transition[] t2 = transitions2[p.S2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    int min1 = t1[n1].min, max1 = t1[n1].max;

                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].min > min1)
                        {
                            return(false);
                        }
                        if (t2[n2].max < Character.MAX_CODE_POINT)
                        {
                            min1 = t2[n2].max + 1;
                        }
                        else
                        {
                            min1 = Character.MAX_CODE_POINT;
                            max1 = Character.MIN_CODE_POINT;
                        }
                        StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                        if (!visited.Contains(q))
                        {
                            worklist.AddLast(q);
                            visited.Add(q);
                        }
                    }
                    if (min1 <= max1)
                    {
                        return(false);
                    }
                }
            }
            return(true);
        }