Finite-state automaton with regular expression operations.

Class invariants:

  • An automaton is either represented explicitly (with State and Transition objects) or with a singleton string (see #getSingleton() and #expandSingleton()) in case the automaton is known to accept exactly one string. (Implicitly, all states and transitions of an automaton are reachable from its initial state.)
  • Automata are always reduced (see #reduce()) and have no transitions to dead states (see #removeDeadTransitions()).
  • If an automaton is nondeterministic, then #isDeterministic() returns false (but the converse is not required).
  • Automata provided as input to operations are generally assumed to be disjoint.

If the states or transitions are manipulated manually, the #restoreInvariant() and #setDeterministic(boolean) methods should be used afterwards to restore representation invariants that are assumed by the built-in automata operations.

Note: this class has internal mutable state and is not thread safe. It is the caller's responsibility to ensure any necessary synchronization if you wish to use the same Automaton from multiple threads. In general it is instead recommended to use a RunAutomaton for multithreaded matching: it is immutable, thread safe, and much faster.

@lucene.experimental
Inheritance: ICloneable
Exemplo n.º 1
0
        private static void AssertAutomaton(Automaton a)
        {
            Automaton clone = (Automaton)a.Clone();
            // complement(complement(a)) = a
            Automaton equivalent = BasicOperations.Complement(BasicOperations.Complement(a));
            Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));

            // a union a = a
            equivalent = BasicOperations.Union(a, clone);
            Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));

            // a intersect a = a
            equivalent = BasicOperations.Intersection(a, clone);
            Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));

            // a minus a = empty
            Automaton empty = BasicOperations.Minus(a, clone);
            Assert.IsTrue(BasicOperations.IsEmpty(empty));

            // as long as don't accept the empty string
            // then optional(a) - empty = a
            if (!BasicOperations.Run(a, ""))
            {
                //System.out.println("test " + a);
                Automaton optional = BasicOperations.Optional(a);
                //System.out.println("optional " + optional);
                equivalent = BasicOperations.Minus(optional, BasicAutomata.MakeEmptyString());
                //System.out.println("equiv " + equivalent);
                Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));
            }
        }
Exemplo n.º 2
0
 /// <summary>
 /// Returns a new (deterministic) automaton that accepts a single codepoint of
 /// the given value.
 /// </summary>
 public static Automaton MakeChar(int c)
 {
     Automaton a = new Automaton();
     a.singleton = new string(Character.ToChars(c));
     a.deterministic = true;
     return a;
 }
Exemplo n.º 3
0
        public override void SetUp()
        {
            base.SetUp();
            // we generate aweful regexps: good for testing.
            // but for preflex codec, the test can be very slow, so use less iterations.
            NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50);
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.YES);
            doc.Add(field);
            Terms = new SortedSet<BytesRef>();

            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                field.StringValue = s;
                Terms.Add(new BytesRef(s));
                writer.AddDocument(doc);
            }

            TermsAutomaton = BasicAutomata.MakeStringUnion(Terms);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
Exemplo n.º 4
0
 /// <summary>
 /// Returns true if the language of this automaton is finite.
 /// </summary>
 public static bool IsFinite(Automaton a)
 {
     if (a.IsSingleton)
     {
         return true;
     }
     return IsFinite(a.Initial, new BitArray(a.NumberOfStates), new BitArray(a.NumberOfStates));
 }
Exemplo n.º 5
0
 /// <summary>
 /// Returns a new (deterministic) automaton that accepts all strings.
 /// </summary>
 public static Automaton MakeAnyString()
 {
     Automaton a = new Automaton();
     State s = new State();
     a.Initial = s;
     s.accept = true;
     s.AddTransition(new Transition(Character.MIN_CODE_POINT, Character.MAX_CODE_POINT, s));
     a.deterministic = true;
     return a;
 }
Exemplo n.º 6
0
 /// <summary>
 /// Simple, original brics implementation of determinize()
 /// </summary>
 public static void DeterminizeSimple(Automaton a)
 {
     if (a.Deterministic || a.Singleton != null)
     {
         return;
     }
     HashSet<State> initialset = new HashSet<State>();
     initialset.Add(a.InitialState);
     DeterminizeSimple(a, initialset);
 }
Exemplo n.º 7
0
 /// <summary>
 /// Minimizes (and determinizes if not already deterministic) the given
 /// automaton.
 /// </summary>
 /// <seealso cref= Automaton#setMinimization(int) </seealso>
 public static void Minimize(Automaton a)
 {
     if (!a.IsSingleton)
     {
         MinimizeHopcroft(a);
     }
     // recompute hash code
     //a.hash_code = 1a.getNumberOfStates() * 3 + a.getNumberOfTransitions() * 2;
     //if (a.hash_code == 0) a.hash_code = 1;
 }
 public override void SetUp()
 {
     base.SetUp();
     // build an automaton matching this jvm's letter definition
     State initial = new State();
     State accept = new State();
     accept.Accept = true;
     for (int i = 0; i <= 0x10FFFF; i++)
     {
         if (Character.IsLetter(i))
         {
             initial.AddTransition(new Transition(i, i, accept));
         }
     }
     Automaton single = new Automaton(initial);
     single.Reduce();
     Automaton repeat = BasicOperations.Repeat(single);
     jvmLetter = new CharacterRunAutomaton(repeat);
 }
 private void AssertBruteForceT(string input, Automaton dfa, int distance)
 {
     CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa);
     int maxLen = input.Length + distance + 1;
     int maxNum = (int)Math.Pow(2, maxLen);
     for (int i = 0; i < maxNum; i++)
     {
         string encoded = Convert.ToString(i, 2);
         bool accepts = ra.Run(encoded);
         if (accepts)
         {
             Assert.IsTrue(GetTDistance(input, encoded) <= distance);
         }
         else
         {
             Assert.IsTrue(GetTDistance(input, encoded) > distance);
         }
     }
 }
Exemplo n.º 10
0
        public void TestRandomRanges()
        {
            Random r = Random();
            int ITERS = AtLeast(10);
            int ITERS_PER_DFA = AtLeast(100);
            for (int iter = 0; iter < ITERS; iter++)
            {
                int x1 = GetCodeStart(r);
                int x2 = GetCodeStart(r);
                int startCode, endCode;

                if (x1 < x2)
                {
                    startCode = x1;
                    endCode = x2;
                }
                else
                {
                    startCode = x2;
                    endCode = x1;
                }

                if (IsSurrogate(startCode) && IsSurrogate(endCode))
                {
                    iter--;
                    continue;
                }

                var a = new Automaton();
                var end = new State {Accept = true};
                a.InitialState.AddTransition(new Transition(startCode, endCode, end));
                a.Deterministic = true;

                TestOne(r, new ByteRunAutomaton(a), startCode, endCode, ITERS_PER_DFA);
            }
        }
Exemplo n.º 11
0
            public RandomAcceptedStrings(Automaton a)
            {
                this.a = a;
                if (!String.IsNullOrEmpty(a.Singleton))
                {
                    LeadsToAccept = null;
                    return;
                }

                // must use IdentityHashmap because two Transitions w/
                // different start nodes can be considered the same
                LeadsToAccept = new IdentityHashMap <Transition, bool?>();
                IDictionary <State, IList <ArrivingTransition> > allArriving = new Dictionary <State, IList <ArrivingTransition> >();

                LinkedList <State> q    = new LinkedList <State>();
                HashSet <State>    seen = new HashSet <State>();

                // reverse map the transitions, so we can quickly look
                // up all arriving transitions to a given state
                foreach (State s in a.NumberedStates)
                {
                    for (int i = 0; i < s.numTransitions; i++)
                    {
                        Transition t = s.TransitionsArray[i];
                        IList <ArrivingTransition> tl;
                        allArriving.TryGetValue(t.Dest, out tl);
                        if (tl == null)
                        {
                            tl = new List <ArrivingTransition>();
                            allArriving[t.Dest] = tl;
                        }
                        tl.Add(new ArrivingTransition(s, t));
                    }
                    if (s.Accept)
                    {
                        q.AddLast(s);
                        seen.Add(s);
                    }
                }

                // Breadth-first search, from accept states,
                // backwards:
                while (q.Count > 0)
                {
                    State s = q.First.Value;
                    q.RemoveFirst();
                    IList <ArrivingTransition> arriving;
                    allArriving.TryGetValue(s, out arriving);
                    if (arriving != null)
                    {
                        foreach (ArrivingTransition at in arriving)
                        {
                            State from = at.From;
                            if (!seen.Contains(from))
                            {
                                q.AddLast(from);
                                seen.Add(from);
                                LeadsToAccept[at.t] = true;
                            }
                        }
                    }
                }
            }
Exemplo n.º 12
0
        /// <summary>
        /// Builds a DFA for some string, and checks all Lev automata
        /// up to some maximum distance.
        /// </summary>
        private void AssertLev(string s, int maxDistance)
        {
            LevenshteinAutomata builder = new LevenshteinAutomata(s, false);
            LevenshteinAutomata tbuilder = new LevenshteinAutomata(s, true);
            Automaton[] automata = new Automaton[maxDistance + 1];
            Automaton[] tautomata = new Automaton[maxDistance + 1];
            for (int n = 0; n < automata.Length; n++)
            {
                automata[n] = builder.ToAutomaton(n);
                tautomata[n] = tbuilder.ToAutomaton(n);
                Assert.IsNotNull(automata[n]);
                Assert.IsNotNull(tautomata[n]);
                Assert.IsTrue(automata[n].Deterministic);
                Assert.IsTrue(tautomata[n].Deterministic);
                Assert.IsTrue(SpecialOperations.IsFinite(automata[n]));
                Assert.IsTrue(SpecialOperations.IsFinite(tautomata[n]));
                AutomatonTestUtil.AssertNoDetachedStates(automata[n]);
                AutomatonTestUtil.AssertNoDetachedStates(tautomata[n]);
                // check that the dfa for n-1 accepts a subset of the dfa for n
                if (n > 0)
                {
                    Assert.IsTrue(automata[n - 1].SubsetOf(automata[n]));
                    Assert.IsTrue(automata[n - 1].SubsetOf(tautomata[n]));
                    Assert.IsTrue(tautomata[n - 1].SubsetOf(automata[n]));
                    Assert.IsTrue(tautomata[n - 1].SubsetOf(tautomata[n]));
                    Assert.AreNotSame(automata[n - 1], automata[n]);
                }
                // check that Lev(N) is a subset of LevT(N)
                Assert.IsTrue(automata[n].SubsetOf(tautomata[n]));
                // special checks for specific n
                switch (n)
                {
                    case 0:
                        // easy, matches the string itself
                        Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), automata[0]));
                        Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), tautomata[0]));
                        break;

                    case 1:
                        // generate a lev1 naively, and check the accepted lang is the same.
                        Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1(s), automata[1]));
                        Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1T(s), tautomata[1]));
                        break;

                    default:
                        AssertBruteForce(s, automata[n], n);
                        AssertBruteForceT(s, tautomata[n], n);
                        break;
                }
            }
        }
Exemplo n.º 13
0
        /// <summary>
        /// Returns true if the language of <paramref name="a1"/> is a subset of the language
        /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if
        /// not already marked as deterministic.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static bool SubsetOf(Automaton a1, Automaton a2)
        {
            if (a1 == a2)
            {
                return(true);
            }
            if (a1.IsSingleton)
            {
                if (a2.IsSingleton)
                {
                    return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal));
                }
                return(BasicOperations.Run(a2, a1.singleton));
            }
            a2.Determinize();
            Transition[][]    transitions1 = a1.GetSortedTransitions();
            Transition[][]    transitions2 = a2.GetSortedTransitions();
            Queue <StatePair> worklist     = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList

            JCG.HashSet <StatePair> visited = new JCG.HashSet <StatePair>();
            StatePair p = new StatePair(a1.initial, a2.initial);

            worklist.Enqueue(p);
            visited.Add(p);
            while (worklist.Count > 0)
            {
                p = worklist.Dequeue();
                if (p.s1.accept && !p.s2.accept)
                {
                    return(false);
                }
                Transition[] t1 = transitions1[p.s1.number];
                Transition[] t2 = transitions2[p.s2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    int min1 = t1[n1].min, max1 = t1[n1].max;

                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].min > min1)
                        {
                            return(false);
                        }
                        if (t2[n2].max < Character.MaxCodePoint)
                        {
                            min1 = t2[n2].max + 1;
                        }
                        else
                        {
                            min1 = Character.MaxCodePoint;
                            max1 = Character.MinCodePoint;
                        }
                        StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                        if (!visited.Contains(q))
                        {
                            worklist.Enqueue(q);
                            visited.Add(q);
                        }
                    }
                    if (min1 <= max1)
                    {
                        return(false);
                    }
                }
            }
            return(true);
        }
Exemplo n.º 14
0
        /// <summary>
        /// Adds epsilon transitions to the given automaton. This method adds extra
        /// character interval transitions that are equivalent to the given set of
        /// epsilon transitions.
        /// </summary>
        /// <param name="a"> Automaton. </param>
        /// <param name="pairs"> Collection of <see cref="StatePair"/> objects representing pairs of
        ///          source/destination states where epsilon transitions should be
        ///          added. </param>
        public static void AddEpsilons(Automaton a, ICollection <StatePair> pairs)
        {
            a.ExpandSingleton();
            Dictionary <State, JCG.HashSet <State> > forward = new Dictionary <State, JCG.HashSet <State> >();
            Dictionary <State, JCG.HashSet <State> > back    = new Dictionary <State, JCG.HashSet <State> >();

            foreach (StatePair p in pairs)
            {
                if (!forward.TryGetValue(p.s1, out JCG.HashSet <State> to))
                {
                    to            = new JCG.HashSet <State>();
                    forward[p.s1] = to;
                }
                to.Add(p.s2);
                if (!back.TryGetValue(p.s2, out JCG.HashSet <State> from))
                {
                    from       = new JCG.HashSet <State>();
                    back[p.s2] = from;
                }
                from.Add(p.s1);
            }
            // calculate epsilon closure
            LinkedList <StatePair> worklist = new LinkedList <StatePair>(pairs);

            JCG.HashSet <StatePair> workset = new JCG.HashSet <StatePair>(pairs);
            while (worklist.Count > 0)
            {
                StatePair p = worklist.First.Value;
                worklist.Remove(p);
                workset.Remove(p);
#pragma warning disable IDE0018 // Inline variable declaration
                JCG.HashSet <State> from;
#pragma warning restore IDE0018 // Inline variable declaration
                if (forward.TryGetValue(p.s2, out JCG.HashSet <State> to))
                {
                    foreach (State s in to)
                    {
                        StatePair pp = new StatePair(p.s1, s);
                        if (!pairs.Contains(pp))
                        {
                            pairs.Add(pp);
                            forward[p.s1].Add(s);
                            back[s].Add(p.s1);
                            worklist.AddLast(pp);
                            workset.Add(pp);
                            if (back.TryGetValue(p.s1, out from))
                            {
                                foreach (State q in from)
                                {
                                    StatePair qq = new StatePair(q, p.s1);
                                    if (!workset.Contains(qq))
                                    {
                                        worklist.AddLast(qq);
                                        workset.Add(qq);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // add transitions
            foreach (StatePair p in pairs)
            {
                p.s1.AddEpsilon(p.s2);
            }
            a.deterministic = false;
            //a.clearHashCode();
            a.ClearNumberedStates();
            a.CheckMinimizeAlways();
        }
Exemplo n.º 15
0
        /// <summary>
        /// Determinizes the given automaton.
        /// <para/>
        /// Worst case complexity: exponential in number of states.
        /// </summary>
        public static void Determinize(Automaton a)
        {
            if (a.IsDeterministic || a.IsSingleton)
            {
                return;
            }

            State[] allStates = a.GetNumberedStates();

            // subset construction
            bool initAccept = a.initial.accept;
            int  initNumber = a.initial.number;

            a.initial = new State();
            SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial);

            LinkedList <SortedInt32Set.FrozenInt32Set>         worklist = new LinkedList <SortedInt32Set.FrozenInt32Set>();
            IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>();

            worklist.AddLast(initialset);

            a.initial.accept     = initAccept;
            newstate[initialset] = a.initial;

            int newStateUpto = 0;

            State[] newStatesArray = new State[5];
            newStatesArray[newStateUpto] = a.initial;
            a.initial.number             = newStateUpto;
            newStateUpto++;

            // like Set<Integer,PointTransitions>
            PointTransitionSet points = new PointTransitionSet();

            // like SortedMap<Integer,Integer>
            SortedInt32Set statesSet = new SortedInt32Set(5);

            // LUCENENET TODO: THIS IS INFINITE LOOPING

            // LUCENENET NOTE: The problem here is almost certainly
            // due to the conversion to FrozenIntSet along with its
            // differing equality checking.
            while (worklist.Count > 0)
            {
                SortedInt32Set.FrozenInt32Set s = worklist.First.Value;
                worklist.Remove(s);

                // Collate all outgoing transitions by min/1+max:
                for (int i = 0; i < s.values.Length; i++)
                {
                    State s0 = allStates[s.values[i]];
                    for (int j = 0; j < s0.numTransitions; j++)
                    {
                        points.Add(s0.TransitionsArray[j]);
                    }
                }

                if (points.count == 0)
                {
                    // No outgoing transitions -- skip it
                    continue;
                }

                points.Sort();

                int lastPoint = -1;
                int accCount  = 0;

                State r = s.state;
                for (int i = 0; i < points.count; i++)
                {
                    int point = points.points[i].point;

                    if (statesSet.upto > 0)
                    {
                        Debug.Assert(lastPoint != -1);

                        statesSet.ComputeHash();

                        State q;
                        if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out q) || q == null)
                        {
                            q = new State();

                            SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q);
                            worklist.AddLast(p);
                            if (newStateUpto == newStatesArray.Length)
                            {
                                State[] newArray = new State[ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
                                Array.Copy(newStatesArray, 0, newArray, 0, newStateUpto);
                                newStatesArray = newArray;
                            }
                            newStatesArray[newStateUpto] = q;
                            q.number = newStateUpto;
                            newStateUpto++;
                            q.accept    = accCount > 0;
                            newstate[p] = q;
                        }
                        else
                        {
                            Debug.Assert((accCount > 0) == q.accept, "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet);
                        }

                        r.AddTransition(new Transition(lastPoint, point - 1, q));
                    }

                    // process transitions that end on this point
                    // (closes an overlapping interval)
                    Transition[] transitions = points.points[i].ends.transitions;
                    int          limit       = points.points[i].ends.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Decr(num);
                        accCount -= t.to.accept ? 1 : 0;
                    }
                    points.points[i].ends.count = 0;

                    // process transitions that start on this point
                    // (opens a new interval)
                    transitions = points.points[i].starts.transitions;
                    limit       = points.points[i].starts.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Incr(num);
                        accCount += t.to.accept ? 1 : 0;
                    }
                    lastPoint = point;
                    points.points[i].starts.count = 0;
                }
                points.Reset();
                Debug.Assert(statesSet.upto == 0, "upto=" + statesSet.upto);
            }
            a.deterministic = true;
            a.SetNumberedStates(newStatesArray, newStateUpto);
        }
Exemplo n.º 16
0
 /// <summary>
 /// Returns <c>true</c> if the given string is accepted by the automaton.
 /// <para/>
 /// Complexity: linear in the length of the string.
 /// <para/>
 /// <b>Note:</b> for full performance, use the <see cref="RunAutomaton"/> class.
 /// </summary>
 public static bool Run(Automaton a, string s)
 {
     if (a.IsSingleton)
     {
         return(s.Equals(a.singleton, StringComparison.Ordinal));
     }
     if (a.deterministic)
     {
         State p = a.initial;
         int   cp; // LUCENENET: Removed unnecessary assignment
         for (int i = 0; i < s.Length; i += Character.CharCount(cp))
         {
             State q = p.Step(cp = Character.CodePointAt(s, i));
             if (q is null)
             {
                 return(false);
             }
             p = q;
         }
         return(p.accept);
     }
     else
     {
         State[]            states   = a.GetNumberedStates();
         LinkedList <State> pp       = new LinkedList <State>();
         LinkedList <State> pp_other = new LinkedList <State>();
         OpenBitSet         bb       = new OpenBitSet(states.Length);
         OpenBitSet         bb_other = new OpenBitSet(states.Length);
         pp.AddLast(a.initial);
         JCG.List <State> dest   = new JCG.List <State>();
         bool             accept = a.initial.accept;
         int c; // LUCENENET: Removed unnecessary assignment
         for (int i = 0; i < s.Length; i += Character.CharCount(c))
         {
             c      = Character.CodePointAt(s, i);
             accept = false;
             pp_other.Clear();
             bb_other.Clear(0, bb_other.Length - 1);
             foreach (State p in pp)
             {
                 dest.Clear();
                 p.Step(c, dest);
                 foreach (State q in dest)
                 {
                     if (q.accept)
                     {
                         accept = true;
                     }
                     if (!bb_other.Get(q.number))
                     {
                         bb_other.Set(q.number);
                         pp_other.AddLast(q);
                     }
                 }
             }
             LinkedList <State> tp = pp;
             pp       = pp_other;
             pp_other = tp;
             OpenBitSet tb = bb;
             bb       = bb_other;
             bb_other = tb;
         }
         return(accept);
     }
 }
Exemplo n.º 17
0
 protected internal override Automaton ConvertAutomaton(Automaton a)
 {
     if (unicodeAware)
     {
         Automaton utf8automaton = (new UTF32ToUTF8()).Convert(a);
         BasicOperations.Determinize(utf8automaton);
         return utf8automaton;
     }
     else
     {
         return a;
     }
 }
Exemplo n.º 18
0
        internal Automaton ToLevenshteinAutomata(Automaton automaton)
        {
            var @ref = SpecialOperations.GetFiniteStrings(automaton, -1);
            Automaton[] subs = new Automaton[@ref.Count];
            int upto = 0;
            foreach (IntsRef path in @ref)
            {
                if (path.Length <= nonFuzzyPrefix || path.Length < minFuzzyLength)
                {
                    subs[upto] = BasicAutomata.MakeString(path.Ints, path.Offset, path.Length);
                    upto++;
                }
                else
                {
                    Automaton prefix = BasicAutomata.MakeString(path.Ints, path.Offset, nonFuzzyPrefix);
                    int[] ints = new int[path.Length - nonFuzzyPrefix];
                    Array.Copy(path.Ints, path.Offset + nonFuzzyPrefix, ints, 0, ints.Length);
                    // TODO: maybe add alphaMin to LevenshteinAutomata,
                    // and pass 1 instead of 0?  We probably don't want
                    // to allow the trailing dedup bytes to be
                    // edited... but then 0 byte is "in general" allowed
                    // on input (but not in UTF8).
                    LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? char.MAX_CODE_POINT : 255, transpositions);
                    Automaton levAutomaton = lev.ToAutomaton(maxEdits);
                    Automaton combined = BasicOperations.Concatenate(Arrays.AsList(prefix, levAutomaton));
                    combined.Deterministic = true; // its like the special case in concatenate itself, except we cloneExpanded already
                    subs[upto] = combined;
                    upto++;
                }
            }

            if (subs.Length == 0)
            {
                // automaton is empty, there is no accepted paths through it
                return BasicAutomata.MakeEmpty(); // matches nothing
            }
            else if (subs.Length == 1)
            {
                // no synonyms or anything: just a single path through the tokenstream
                return subs[0];
            }
            else
            {
                // multiple paths: this is really scary! is it slow?
                // maybe we should not do this and throw UOE?
                Automaton a = BasicOperations.Union(Arrays.AsList(subs));
                // TODO: we could call toLevenshteinAutomata() before det?
                // this only happens if you have multiple paths anyway (e.g. synonyms)
                BasicOperations.Determinize(a);

                return a;
            }
        }
Exemplo n.º 19
0
        public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify)
        {
            if (simplify)
            {
                // Test whether the automaton is a "simple" form and
                // if so, don't create a runAutomaton.  Note that on a
                // large automaton these tests could be costly:
                if (BasicOperations.IsEmpty(automaton))
                {
                    // matches nothing
                    Type              = AUTOMATON_TYPE.NONE;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else if (BasicOperations.IsTotal(automaton))
                {
                    // matches all possible strings
                    Type              = AUTOMATON_TYPE.ALL;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else
                {
                    string commonPrefix;
                    string singleton;
                    if (automaton.Singleton == null)
                    {
                        commonPrefix = SpecialOperations.GetCommonPrefix(automaton);
                        if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix)))
                        {
                            singleton = commonPrefix;
                        }
                        else
                        {
                            singleton = null;
                        }
                    }
                    else
                    {
                        commonPrefix = null;
                        singleton    = automaton.Singleton;
                    }

                    if (singleton != null)
                    {
                        // matches a fixed string in singleton or expanded
                        // representation
                        Type              = AUTOMATON_TYPE.SINGLE;
                        Term              = new BytesRef(singleton);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                    else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString())))
                    {
                        // matches a constant prefix
                        Type              = AUTOMATON_TYPE.PREFIX;
                        Term              = new BytesRef(commonPrefix);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                }
            }

            Type = AUTOMATON_TYPE.NORMAL;
            Term = null;
            if (finite == null)
            {
                this.Finite = SpecialOperations.IsFinite(automaton);
            }
            else
            {
                this.Finite = finite;
            }
            Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton);

            if (this.Finite == true)
            {
                CommonSuffixRef = null;
            }
            else
            {
                CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8);
            }
            RunAutomaton      = new ByteRunAutomaton(utf8, true);
            sortedTransitions = utf8.GetSortedTransitions();
        }
Exemplo n.º 20
0
        private static void AssertAutomaton(Automaton automaton)
        {
            var cra = new CharacterRunAutomaton(automaton);
            var bra = new ByteRunAutomaton(automaton);
            var ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);

            int num = AtLeast(1000);
            for (int i = 0; i < num; i++)
            {
                string s;
                if (Random().NextBoolean())
                {
                    // likely not accepted
                    s = TestUtil.RandomUnicodeString(Random());
                }
                else
                {
                    // will be accepted
                    int[] codepoints = ras.GetRandomAcceptedString(Random());
                    try
                    {
                        s = UnicodeUtil.NewString(codepoints, 0, codepoints.Length);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(codepoints.Length + " codepoints:");
                        for (int j = 0; j < codepoints.Length; j++)
                        {
                            Console.WriteLine("  " + codepoints[j].ToString("x"));
                        }
                        throw e;
                    }
                }
                var bytes = s.GetBytes(Encoding.UTF8);
                Assert.AreEqual(cra.Run(s), bra.Run(bytes, 0, bytes.Length));
            }
        }
Exemplo n.º 21
0
 public CompiledAutomaton(Automaton automaton)
     : this(automaton, null, true)
 {
 }
Exemplo n.º 22
0
        /// <summary>
        /// Simple, original brics implementation of Determinize()
        /// Determinizes the given automaton using the given set of initial states.
        /// </summary>
        public static void DeterminizeSimple(Automaton a, ISet <State> initialset)
        {
            int[] points = a.GetStartPoints();
            // subset construction
            IDictionary <ISet <State>, ISet <State> > sets = new Dictionary <ISet <State>, ISet <State> >();
            Queue <ISet <State> >             worklist     = new Queue <ISet <State> >();// LUCENENET specific - Queue is much more performant than LinkedList
            IDictionary <ISet <State>, State> newstate     = new Dictionary <ISet <State>, State>();

            sets[initialset] = initialset;
            worklist.Enqueue(initialset);
            a.initial            = new State();
            newstate[initialset] = a.initial;
            while (worklist.Count > 0)
            {
                ISet <State> s = worklist.Dequeue();
                State        r = newstate[s];
                foreach (State q in s)
                {
                    if (q.accept)
                    {
                        r.accept = true;
                        break;
                    }
                }
                for (int n = 0; n < points.Length; n++)
                {
                    ISet <State> p = new JCG.HashSet <State>();
                    foreach (State q in s)
                    {
                        foreach (Transition t in q.GetTransitions())
                        {
                            if (t.min <= points[n] && points[n] <= t.max)
                            {
                                p.Add(t.to);
                            }
                        }
                    }
                    if (!sets.ContainsKey(p))
                    {
                        sets[p] = p;
                        worklist.Enqueue(p);
                        newstate[p] = new State();
                    }
                    State q_  = newstate[p];
                    int   min = points[n];
                    int   max;
                    if (n + 1 < points.Length)
                    {
                        max = points[n + 1] - 1;
                    }
                    else
                    {
                        max = Character.MaxCodePoint;
                    }
                    r.AddTransition(new Transition(min, max, q_));
                }
            }
            a.deterministic = true;
            a.ClearNumberedStates();
            a.RemoveDeadTransitions();
        }
Exemplo n.º 23
0
 public CharacterRunAutomaton(Automaton a)
     : base(a, Character.MAX_CODE_POINT, false)
 {
 }
Exemplo n.º 24
0
 /// <summary>
 /// Returns true if the language of this automaton is finite.
 /// <p>
 /// WARNING: this method is slow, it will blow up if the automaton is large.
 /// this is only used to test the correctness of our faster implementation.
 /// </summary>
 public static bool IsFiniteSlow(Automaton a)
 {
     if (!String.IsNullOrEmpty(a.Singleton))
     {
         return true;
     }
     return IsFiniteSlow(a.InitialState, new HashSet<State>());
 }
Exemplo n.º 25
0
        public static BytesRef GetCommonSuffixBytesRef(Automaton a)
        {
            if (a.IsSingleton) // if singleton, the suffix is the string itself.
            {
                return new BytesRef(a.singleton);
            }

            // reverse the language of the automaton, then reverse its common prefix.
            Automaton r = (Automaton)a.Clone();
            Reverse(r);
            r.Determinize();
            BytesRef @ref = SpecialOperations.GetCommonPrefixBytesRef(r);
            ReverseBytes(@ref);
            return @ref;
        }
Exemplo n.º 26
0
 /// <summary>
 /// Checks that an automaton has no detached states that are unreachable
 /// from the initial state.
 /// </summary>
 public static void AssertNoDetachedStates(Automaton a)
 {
     int numStates = a.NumberOfStates;
     a.ClearNumberedStates(); // force recomputation of cached numbered states
     Assert.True(numStates == a.NumberOfStates, "automaton has " + (numStates - a.NumberOfStates) + " detached states");
 }
Exemplo n.º 27
0
        protected internal override IList<FSTUtil.Path<Pair<long?, BytesRef>>> GetFullPrefixPaths(IList<FSTUtil.Path<Pair<long?, BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<Pair<long?, BytesRef>> fst)
        {
            // TODO: right now there's no penalty for fuzzy/edits,
            // ie a completion whose prefix matched exactly what the
            // user typed gets no boost over completions that
            // required an edit, which get no boost over completions
            // requiring two edits.  I suspect a multiplicative
            // factor is appropriate (eg, say a fuzzy match must be at
            // least 2X better weight than the non-fuzzy match to
            // "compete") ... in which case I think the wFST needs
            // to be log weights or something ...

            Automaton levA = convertAutomaton(ToLevenshteinAutomata(lookupAutomaton));
            /*
              Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
              w.write(levA.toDot());
              w.close();
              System.out.println("Wrote LevA to out.dot");
            */
            return FSTUtil.IntersectPrefixPaths(levA, fst);
        }
Exemplo n.º 28
0
        // TODO: this currently requites a determinized machine,
        // but it need not -- we can speed it up by walking the
        // NFA instead.  it'd still be fail fast.
        public static BytesRef GetCommonPrefixBytesRef(Automaton a)
        {
            if (a.IsSingleton)
            {
                return new BytesRef(a.singleton);
            }
            BytesRef @ref = new BytesRef(10);
            HashSet<State> visited = new HashSet<State>();
            State s = a.Initial;
            bool done;
            do
            {
                done = true;
                visited.Add(s);
                if (!s.accept && s.NumTransitions() == 1)
                {
                    var iter = s.Transitions.GetEnumerator();
                    iter.MoveNext();
                    Transition t = iter.Current;

                    if (t.Min_Renamed == t.Max_Renamed && !visited.Contains(t.To))
                    {
                        @ref.Grow([email protected]);
                        @ref.Bytes[@ref.Length - 1] = (byte)t.Min_Renamed;
                        s = t.To;
                        done = false;
                    }
                }
            } while (!done);
            return @ref;
        }
Exemplo n.º 29
0
 private static Automaton NaiveUnion(IList<BytesRef> strings)
 {
     Automaton[] eachIndividual = new Automaton[strings.Count];
     int i = 0;
     foreach (BytesRef bref in strings)
     {
         eachIndividual[i++] = BasicAutomata.MakeString(bref.Utf8ToString());
     }
     return BasicOperations.Union(eachIndividual);
 }
Exemplo n.º 30
0
        // TODO: this is a dangerous method ... Automaton could be
        // huge ... and it's better in general for caller to
        // enumerate & process in a single walk:

        /// <summary>
        /// Returns the set of accepted strings, assuming that at most
        /// <code>limit</code> strings are accepted. If more than <code>limit</code>
        /// strings are accepted, the first limit strings found are returned. If <code>limit</code>&lt;0, then
        /// the limit is infinite.
        /// </summary>
        public static ISet<IntsRef> GetFiniteStrings(Automaton a, int limit)
        {
            HashSet<IntsRef> strings = new HashSet<IntsRef>();
            if (a.IsSingleton)
            {
                if (limit > 0)
                {
                    strings.Add(Util.ToUTF32(a.Singleton, new IntsRef()));
                }
            }
            else if (!GetFiniteStrings(a.Initial, new HashSet<State>(), strings, new IntsRef(), limit))
            {
                return strings;
            }
            return strings;
        }
Exemplo n.º 31
0
        /// <summary>
        /// Compute a DFA that accepts all strings within an edit distance of <paramref name="n"/>.
        /// <para>
        /// All automata have the following properties:
        /// <list type="bullet">
        ///     <item><description>They are deterministic (DFA).</description></item>
        ///     <item><description>There are no transitions to dead states.</description></item>
        ///     <item><description>They are not minimal (some transitions could be combined).</description></item>
        /// </list>
        /// </para>
        /// </summary>
        public virtual Automaton ToAutomaton(int n)
        {
            if (n == 0)
            {
                return(BasicAutomata.MakeString(word, 0, word.Length));
            }

            if (n >= descriptions.Length)
            {
                return(null);
            }

            int range = 2 * n + 1;
            ParametricDescription description = descriptions[n];

            // the number of states is based on the length of the word and n
            State[] states = new State[description.Count];
            // create all states, and mark as accept states if appropriate
            for (int i = 0; i < states.Length; i++)
            {
                states[i]        = new State();
                states[i].number = i;
                states[i].Accept = description.IsAccept(i);
            }
            // create transitions from state to state
            for (int k = 0; k < states.Length; k++)
            {
                int xpos = description.GetPosition(k);
                if (xpos < 0)
                {
                    continue;
                }
                int end = xpos + Math.Min(word.Length - xpos, range);

                for (int x = 0; x < alphabet.Length; x++)
                {
                    int ch = alphabet[x];
                    // get the characteristic vector at this position wrt ch
                    int cvec = GetVector(ch, xpos, end);
                    int dest = description.Transition(k, xpos, cvec);
                    if (dest >= 0)
                    {
                        states[k].AddTransition(new Transition(ch, states[dest]));
                    }
                }
                // add transitions for all other chars in unicode
                // by definition, their characteristic vectors are always 0,
                // because they do not exist in the input string.
                int dest_ = description.Transition(k, xpos, 0); // by definition
                if (dest_ >= 0)
                {
                    for (int r = 0; r < numRanges; r++)
                    {
                        states[k].AddTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest_]));
                    }
                }
            }

            Automaton a = new Automaton(states[0]);

            a.IsDeterministic = true;
            // we create some useless unconnected states, and its a net-win overall to remove these,
            // as well as to combine any adjacent transitions (it makes later algorithms more efficient).
            // so, while we could set our numberedStates here, its actually best not to, and instead to
            // force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
            //a.setNumberedStates(states);
            a.Reduce();
            // we need not trim transitions to dead states, as they are not created.
            //a.restoreInvariant();
            return(a);
        }
Exemplo n.º 32
0
 /// <summary>
 /// Returns true if the given string is accepted by the automaton.
 /// <p>
 /// Complexity: linear in the length of the string.
 /// <p>
 /// <b>Note:</b> for full performance, use the <seealso cref="RunAutomaton"/> class.
 /// </summary>
 public static bool Run(Automaton a, string s)
 {
     if (a.IsSingleton)
     {
         return(s.Equals(a.singleton));
     }
     if (a.deterministic)
     {
         State p = a.Initial;
         for (int i = 0, cp = 0; i < s.Length; i += Character.CharCount(cp))
         {
             State q = p.Step(cp = Character.CodePointAt(s, i));
             if (q == null)
             {
                 return(false);
             }
             p = q;
         }
         return(p.accept);
     }
     else
     {
         State[]            states   = a.NumberedStates;
         LinkedList <State> pp       = new LinkedList <State>();
         LinkedList <State> pp_other = new LinkedList <State>();
         BitArray           bb       = new BitArray(states.Length);
         BitArray           bb_other = new BitArray(states.Length);
         pp.AddLast(a.Initial);
         List <State> dest   = new List <State>();
         bool         accept = a.Initial.accept;
         for (int i = 0, c = 0; i < s.Length; i += Character.CharCount(c))
         {
             c      = Character.CodePointAt(s, i);
             accept = false;
             pp_other.Clear();
             bb_other.SetAll(false);
             foreach (State p in pp)
             {
                 dest.Clear();
                 p.Step(c, dest);
                 foreach (State q in dest)
                 {
                     if (q.accept)
                     {
                         accept = true;
                     }
                     if (!bb_other.SafeGet(q.number))
                     {
                         bb_other.SafeSet(q.number, true);
                         pp_other.AddLast(q);
                     }
                 }
             }
             LinkedList <State> tp = pp;
             pp       = pp_other;
             pp_other = tp;
             BitArray tb = bb;
             bb       = bb_other;
             bb_other = tb;
         }
         return(accept);
     }
 }
Exemplo n.º 33
0
 /// <summary>
 /// See <seealso cref="BasicOperations#concatenate(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Concatenate(Automaton a)
 {
     return(BasicOperations.Concatenate(this, a));
 }
Exemplo n.º 34
0
        /// <summary>
        /// Returns an automaton that accepts the intersection of the languages of the
        /// given automata. Never modifies the input automata languages.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static Automaton Intersection(Automaton a1, Automaton a2)
        {
            if (a1.IsSingleton)
            {
                if (BasicOperations.Run(a2, a1.singleton))
                {
                    return(a1.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a2.IsSingleton)
            {
                if (BasicOperations.Run(a1, a2.singleton))
                {
                    return(a2.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a1 == a2)
            {
                return(a1.CloneIfRequired());
            }
            Transition[][]    transitions1 = a1.GetSortedTransitions();
            Transition[][]    transitions2 = a2.GetSortedTransitions();
            Automaton         c            = new Automaton();
            Queue <StatePair> worklist     = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList
            Dictionary <StatePair, StatePair> newstates = new Dictionary <StatePair, StatePair>();
            StatePair p = new StatePair(c.initial, a1.initial, a2.initial);

            worklist.Enqueue(p);
            newstates[p] = p;
            while (worklist.Count > 0)
            {
                p          = worklist.Dequeue();
                p.s.accept = p.s1.accept && p.s2.accept;
                Transition[] t1 = transitions1[p.s1.number];
                Transition[] t2 = transitions2[p.s2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].max >= t1[n1].min)
                        {
                            StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                            if (!newstates.TryGetValue(q, out StatePair r) || r is null)
                            {
                                q.s = new State();
                                worklist.Enqueue(q);
                                newstates[q] = q;
                                r            = q;
                            }
                            int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min;
                            int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max;
                            p.s.AddTransition(new Transition(min, max, r.s));
                        }
                    }
                }
            }
            c.deterministic = a1.deterministic && a2.deterministic;
            c.RemoveDeadTransitions();
            c.CheckMinimizeAlways();
            return(c);
        }
Exemplo n.º 35
0
 /// <summary>
 /// See <seealso cref="BasicOperations#minus(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Minus(Automaton a)
 {
     return(BasicOperations.Minus(this, a));
 }
Exemplo n.º 36
0
        /// <summary>
        /// Determinizes the given automaton.
        /// <para/>
        /// Worst case complexity: exponential in number of states.
        /// </summary>
        public static void Determinize(Automaton a)
        {
            if (a.IsDeterministic || a.IsSingleton)
            {
                return;
            }

            State[] allStates = a.GetNumberedStates();

            // subset construction
            bool initAccept = a.initial.accept;
            int  initNumber = a.initial.number;

            a.initial = new State();
            SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial);

            Queue <SortedInt32Set.FrozenInt32Set> worklist = new Queue <SortedInt32Set.FrozenInt32Set>(); // LUCENENET specific - Queue is much more performant than LinkedList
            IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>();

            worklist.Enqueue(initialset);

            a.initial.accept     = initAccept;
            newstate[initialset] = a.initial;

            int newStateUpto = 0;

            State[] newStatesArray = new State[5];
            newStatesArray[newStateUpto] = a.initial;
            a.initial.number             = newStateUpto;
            newStateUpto++;

            // like Set<Integer,PointTransitions>
            PointTransitionSet points = new PointTransitionSet();

            // like SortedMap<Integer,Integer>
            SortedInt32Set statesSet = new SortedInt32Set(5);

            while (worklist.Count > 0)
            {
                SortedInt32Set.FrozenInt32Set s = worklist.Dequeue();
                //worklist.Remove(s);

                // Collate all outgoing transitions by min/1+max:
                for (int i = 0; i < s.values.Length; i++)
                {
                    State s0 = allStates[s.values[i]];
                    for (int j = 0; j < s0.numTransitions; j++)
                    {
                        points.Add(s0.TransitionsArray[j]);
                    }
                }

                if (points.count == 0)
                {
                    // No outgoing transitions -- skip it
                    continue;
                }

                points.Sort();

                int lastPoint = -1;
                int accCount  = 0;

                State r = s.state;
                for (int i = 0; i < points.count; i++)
                {
                    int point = points.points[i].point;

                    if (statesSet.upto > 0)
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(lastPoint != -1);
                        }

                        statesSet.ComputeHash();

                        if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out State q) || q is null)
                        {
                            q = new State();

                            SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q);
                            worklist.Enqueue(p);
                            if (newStateUpto == newStatesArray.Length)
                            {
                                // LUCENENET: Resize rather than copy
                                Array.Resize(ref newStatesArray, ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
                            }
                            newStatesArray[newStateUpto] = q;
                            q.number = newStateUpto;
                            newStateUpto++;
                            q.accept    = accCount > 0;
                            newstate[p] = q;
                        }
                        else
                        {
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert((accCount > 0) == q.accept, "accCount={0} vs existing accept={1} states={2}", accCount, q.accept, statesSet);
                            }
                        }

                        r.AddTransition(new Transition(lastPoint, point - 1, q));
                    }

                    // process transitions that end on this point
                    // (closes an overlapping interval)
                    Transition[] transitions = points.points[i].ends.transitions;
                    int          limit       = points.points[i].ends.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Decr(num);
                        accCount -= t.to.accept ? 1 : 0;
                    }
                    points.points[i].ends.count = 0;

                    // process transitions that start on this point
                    // (opens a new interval)
                    transitions = points.points[i].starts.transitions;
                    limit       = points.points[i].starts.count;
                    for (int j = 0; j < limit; j++)
                    {
                        Transition t   = transitions[j];
                        int        num = t.to.number;
                        statesSet.Incr(num);
                        accCount += t.to.accept ? 1 : 0;
                    }
                    lastPoint = point;
                    points.points[i].starts.count = 0;
                }
                points.Reset();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(statesSet.upto == 0, "upto={0}", statesSet.upto);
                }
            }
            a.deterministic = true;
            a.SetNumberedStates(newStatesArray, newStateUpto);
        }
Exemplo n.º 37
0
 /// <summary>
 /// See <seealso cref="BasicOperations#intersection(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Intersection(Automaton a)
 {
     return(BasicOperations.Intersection(this, a));
 }
Exemplo n.º 38
0
        /// <summary>
        /// Returns an automaton that accepts the concatenation of the languages of the
        /// given automata.
        /// <para/>
        /// Complexity: linear in total number of states.
        /// </summary>
        public static Automaton Concatenate(IList <Automaton> l)
        {
            if (l.Count == 0)
            {
                return(BasicAutomata.MakeEmptyString());
            }
            bool all_singleton = true;

            foreach (Automaton a in l)
            {
                if (!a.IsSingleton)
                {
                    all_singleton = false;
                    break;
                }
            }
            if (all_singleton)
            {
                StringBuilder b = new StringBuilder();
                foreach (Automaton a in l)
                {
                    b.Append(a.singleton);
                }
                return(BasicAutomata.MakeString(b.ToString()));
            }
            else
            {
                foreach (Automaton a in l)
                {
                    if (BasicOperations.IsEmpty(a))
                    {
                        return(BasicAutomata.MakeEmpty());
                    }
                }
                JCG.HashSet <int> ids = new JCG.HashSet <int>();
                foreach (Automaton a in l)
                {
                    ids.Add(a.GetHashCode());
                }
                bool      has_aliases = ids.Count != l.Count;
                Automaton b           = l[0];
                if (has_aliases)
                {
                    b = b.CloneExpanded();
                }
                else
                {
                    b = b.CloneExpandedIfRequired();
                }
                ISet <State> ac    = b.GetAcceptStates();
                bool         first = true;
                foreach (Automaton a in l)
                {
                    if (first)
                    {
                        first = false;
                    }
                    else
                    {
                        if (a.IsEmptyString)
                        {
                            continue;
                        }
                        Automaton aa = a;
                        if (has_aliases)
                        {
                            aa = aa.CloneExpanded();
                        }
                        else
                        {
                            aa = aa.CloneExpandedIfRequired();
                        }
                        ISet <State> ns = aa.GetAcceptStates();
                        foreach (State s in ac)
                        {
                            s.accept = false;
                            s.AddEpsilon(aa.initial);
                            if (s.accept)
                            {
                                ns.Add(s);
                            }
                        }
                        ac = ns;
                    }
                }
                b.deterministic = false;
                //b.clearHashCode();
                b.ClearNumberedStates();
                b.CheckMinimizeAlways();
                return(b);
            }
        }
Exemplo n.º 39
0
 /// <summary>
 /// See <seealso cref="BasicOperations#subsetOf(Automaton, Automaton)"/>.
 /// </summary>
 public virtual bool SubsetOf(Automaton a)
 {
     return(BasicOperations.SubsetOf(this, a));
 }
Exemplo n.º 40
0
        /// <summary>
        /// Simple, original brics implementation of determinize()
        /// Determinizes the given automaton using the given set of initial states.
        /// </summary>
        public static void DeterminizeSimple(Automaton a, ISet <State> initialset)
        {
            int[] points = a.StartPoints;
            // subset construction
            IDictionary <ISet <State>, ISet <State> > sets = new Dictionary <ISet <State>, ISet <State> >();
            LinkedList <ISet <State> >        worklist     = new LinkedList <ISet <State> >();
            IDictionary <ISet <State>, State> newstate     = new Dictionary <ISet <State>, State>();

            sets[initialset] = initialset;
            worklist.AddLast(initialset);
            a.Initial            = new State();
            newstate[initialset] = a.Initial;
            while (worklist.Count > 0)
            {
                ISet <State> s = worklist.First.Value;
                worklist.RemoveFirst();
                State r = newstate[s];
                foreach (State q in s)
                {
                    if (q.Accept)
                    {
                        r.Accept = true;
                        break;
                    }
                }
                for (int n = 0; n < points.Length; n++)
                {
                    ISet <State> p = new ValueHashSet <State>();
                    foreach (State q in s)
                    {
                        foreach (Transition t in q.Transitions)
                        {
                            if (t.Min <= points[n] && points[n] <= t.Max)
                            {
                                p.Add(t.To);
                            }
                        }
                    }
                    if (!sets.ContainsKey(p))
                    {
                        sets[p] = p;
                        worklist.AddLast(p);
                        newstate[p] = new State();
                    }
                    State q_  = newstate[p];
                    int   min = points[n];
                    int   max;
                    if (n + 1 < points.Length)
                    {
                        max = points[n + 1] - 1;
                    }
                    else
                    {
                        max = Character.MAX_CODE_POINT;
                    }
                    r.AddTransition(new Transition(min, max, q_));
                }
            }
            a.Deterministic = true;
            a.ClearNumberedStates();
            a.RemoveDeadTransitions();
        }
Exemplo n.º 41
0
 /// <summary>
 /// See <seealso cref="BasicOperations#union(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Union(Automaton a)
 {
     return(BasicOperations.Union(this, a));
 }
Exemplo n.º 42
0
 /// <summary>
 /// See <seealso cref="MinimizationOperations#minimize(Automaton)"/>. Returns the
 /// automaton being given as argument.
 /// </summary>
 public static Automaton Minimize(Automaton a)
 {
     MinimizationOperations.Minimize(a);
     return(a);
 }
 public CharacterRunAutomaton(Automaton a)
     : base(a, Character.MaxCodePoint, false)
 {
 }
Exemplo n.º 44
0
 /// <summary>
 /// below are original, unoptimized implementations of DFA operations for testing.
 /// These are from brics automaton, full license (BSD) below:
 /// </summary>
 /*
  * dk.brics.automaton
  *
  * Copyright (c) 2001-2009 Anders Moeller
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /// <summary>
 /// Simple, original brics implementation of Brzozowski minimize()
 /// </summary>
 public static void MinimizeSimple(Automaton a)
 {
     if (!String.IsNullOrEmpty(a.Singleton))
     {
         return;
     }
     DeterminizeSimple(a, SpecialOperations.Reverse(a));
     DeterminizeSimple(a, SpecialOperations.Reverse(a));
 }
Exemplo n.º 45
0
 internal DumbRegexpQuery(Term term, RegExpSyntax flags)
     : base(term.Field)
 {
     RegExp re = new RegExp(term.Text, flags);
     automaton = re.ToAutomaton();
 }
Exemplo n.º 46
0
            public RandomAcceptedStrings(Automaton a)
            {
                this.a = a;
                if (!String.IsNullOrEmpty(a.Singleton))
                {
                    LeadsToAccept = null;
                    return;
                }

                // must use IdentityHashmap because two Transitions w/
                // different start nodes can be considered the same
                LeadsToAccept = new IdentityHashMap<Transition, bool?>();
                IDictionary<State, IList<ArrivingTransition>> allArriving = new Dictionary<State, IList<ArrivingTransition>>();

                LinkedList<State> q = new LinkedList<State>();
                HashSet<State> seen = new HashSet<State>();

                // reverse map the transitions, so we can quickly look
                // up all arriving transitions to a given state
                foreach (State s in a.NumberedStates)
                {
                    for (int i = 0; i < s.numTransitions; i++)
                    {
                        Transition t = s.TransitionsArray[i];
                        IList<ArrivingTransition> tl;
                        allArriving.TryGetValue(t.Dest, out tl);
                        if (tl == null)
                        {
                            tl = new List<ArrivingTransition>();
                            allArriving[t.Dest] = tl;
                        }
                        tl.Add(new ArrivingTransition(s, t));
                    }
                    if (s.Accept)
                    {
                        q.AddLast(s);
                        seen.Add(s);
                    }
                }

                // Breadth-first search, from accept states,
                // backwards:
                while (q.Count > 0)
                {
                    State s = q.First.Value;
                    q.RemoveFirst();
                    IList<ArrivingTransition> arriving;
                    allArriving.TryGetValue(s, out arriving);
                    if (arriving != null)
                    {
                        foreach (ArrivingTransition at in arriving)
                        {
                            State from = at.From;
                            if (!seen.Contains(from))
                            {
                                q.AddLast(from);
                                seen.Add(from);
                                LeadsToAccept[at.t] = true;
                            }
                        }
                    }
                }
            }
Exemplo n.º 47
0
        /// <summary>
        /// Minimizes the given automaton using Hopcroft's algorithm.
        /// </summary>
        public static void MinimizeHopcroft(Automaton a)
        {
            a.Determinize();
            if (a.initial.numTransitions == 1)
            {
                Transition t = a.initial.TransitionsArray[0];
                if (t.to == a.initial && t.min == Character.MinCodePoint && t.max == Character.MaxCodePoint)
                {
                    return;
                }
            }
            a.Totalize();

            // initialize data structures
            int[]   sigma = a.GetStartPoints();
            State[] states = a.GetNumberedStates();
            int     sigmaLen = sigma.Length, statesLen = states.Length;

            List <State>[,] reverse = new List <State> [statesLen, sigmaLen];
            ISet <State>[] partition  = new JCG.HashSet <State> [statesLen];
            List <State>[] splitblock = new List <State> [statesLen];
            int[]          block      = new int[statesLen];
            StateList[,] active      = new StateList[statesLen, sigmaLen];
            StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen];
            LinkedList <Int32Pair> pending = new LinkedList <Int32Pair>();
            OpenBitSet             pending2 = new OpenBitSet(sigmaLen * statesLen);
            OpenBitSet             split = new OpenBitSet(statesLen),
                                   refine = new OpenBitSet(statesLen), refine2 = new OpenBitSet(statesLen);

            for (int q = 0; q < statesLen; q++)
            {
                splitblock[q] = new List <State>();
                partition[q]  = new JCG.HashSet <State>();
                for (int x = 0; x < sigmaLen; x++)
                {
                    active[q, x] = new StateList();
                }
            }
            // find initial partition and reverse edges
            for (int q = 0; q < statesLen; q++)
            {
                State qq = states[q];
                int   j  = qq.accept ? 0 : 1;
                partition[j].Add(qq);
                block[q] = j;
                for (int x = 0; x < sigmaLen; x++)
                {
                    //List<State>[] r = reverse[qq.Step(sigma[x]).number];
                    var r = qq.Step(sigma[x]).number;
                    if (reverse[r, x] == null)
                    {
                        reverse[r, x] = new List <State>();
                    }
                    reverse[r, x].Add(qq);
                }
            }
            // initialize active sets
            for (int j = 0; j <= 1; j++)
            {
                for (int x = 0; x < sigmaLen; x++)
                {
                    foreach (State qq in partition[j])
                    {
                        if (reverse[qq.number, x] != null)
                        {
                            active2[qq.number, x] = active[j, x].Add(qq);
                        }
                    }
                }
            }
            // initialize pending
            for (int x = 0; x < sigmaLen; x++)
            {
                int j = (active[0, x].Count <= active[1, x].Count) ? 0 : 1;
                pending.AddLast(new Int32Pair(j, x));
                pending2.Set(x * statesLen + j);
            }
            // process pending until fixed point
            int k = 2;

            while (pending.Count > 0)
            {
                Int32Pair ip = pending.First.Value;
                pending.Remove(ip);
                int p = ip.N1;
                int x = ip.N2;
                pending2.Clear(x * statesLen + p);
                // find states that need to be split off their blocks
                for (StateListNode m = active[p, x].First; m != null; m = m.Next)
                {
                    List <State> r = reverse[m.Q.number, x];
                    if (r != null)
                    {
                        foreach (State s in r)
                        {
                            int i = s.number;
                            if (!split.Get(i))
                            {
                                split.Set(i);
                                int j = block[i];
                                splitblock[j].Add(s);
                                if (!refine2.Get(j))
                                {
                                    refine2.Set(j);
                                    refine.Set(j);
                                }
                            }
                        }
                    }
                }
                // refine blocks
                for (int j = refine.NextSetBit(0); j >= 0; j = refine.NextSetBit(j + 1))
                {
                    List <State> sb = splitblock[j];
                    if (sb.Count < partition[j].Count)
                    {
                        ISet <State> b1 = partition[j];
                        ISet <State> b2 = partition[k];
                        foreach (State s in sb)
                        {
                            b1.Remove(s);
                            b2.Add(s);
                            block[s.number] = k;
                            for (int c = 0; c < sigmaLen; c++)
                            {
                                StateListNode sn = active2[s.number, c];
                                if (sn != null && sn.Sl == active[j, c])
                                {
                                    sn.Remove();
                                    active2[s.number, c] = active[k, c].Add(s);
                                }
                            }
                        }
                        // update pending
                        for (int c = 0; c < sigmaLen; c++)
                        {
                            int aj = active[j, c].Count, ak = active[k, c].Count, ofs = c * statesLen;
                            if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak)
                            {
                                pending2.Set(ofs + j);
                                pending.AddLast(new Int32Pair(j, c));
                            }
                            else
                            {
                                pending2.Set(ofs + k);
                                pending.AddLast(new Int32Pair(k, c));
                            }
                        }
                        k++;
                    }
                    refine2.Clear(j);
                    foreach (State s in sb)
                    {
                        split.Clear(s.number);
                    }
                    sb.Clear();
                }
                refine.Clear(0, refine.Length - 1);
            }
            // make a new state for each equivalence class, set initial state
            State[] newstates = new State[k];
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = new State();
                newstates[n] = s;
                foreach (State q in partition[n])
                {
                    if (q == a.initial)
                    {
                        a.initial = s;
                    }
                    s.accept = q.accept;
                    s.number = q.number; // select representative
                    q.number = n;
                }
            }
            // build transitions and set acceptance
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = newstates[n];
                s.accept = states[s.number].accept;
                foreach (Transition t in states[s.number].GetTransitions())
                {
                    s.AddTransition(new Transition(t.min, t.max, newstates[t.to.number]));
                }
            }
            a.ClearNumberedStates();
            a.RemoveDeadTransitions();
        }
Exemplo n.º 48
0
 /// <summary>
 /// Simple, original brics implementation of determinize()
 /// Determinizes the given automaton using the given set of initial states.
 /// </summary>
 public static void DeterminizeSimple(Automaton a, ISet<State> initialset)
 {
     int[] points = a.StartPoints;
     // subset construction
     IDictionary<ISet<State>, ISet<State>> sets = new Dictionary<ISet<State>, ISet<State>>();
     LinkedList<ISet<State>> worklist = new LinkedList<ISet<State>>();
     IDictionary<ISet<State>, State> newstate = new Dictionary<ISet<State>, State>();
     sets[initialset] = initialset;
     worklist.AddLast(initialset);
     a.InitialState = new State();
     newstate[initialset] = a.InitialState;
     while (worklist.Count > 0)
     {
         ISet<State> s = worklist.First.Value;
         worklist.RemoveFirst();
         State r = newstate[s];
         foreach (State q in s)
         {
             if (q.Accept)
             {
                 r.Accept = true;
                 break;
             }
         }
         for (int n = 0; n < points.Length; n++)
         {
             ISet<State> p = new HashSet<State>();
             foreach (State q in s)
             {
                 foreach (Transition t in q.Transitions)
                 {
                     if (t.Min <= points[n] && points[n] <= t.Max)
                     {
                         p.Add(t.Dest);
                     }
                 }
             }
             if (!sets.ContainsKey(p))
             {
                 sets[p] = p;
                 worklist.AddLast(p);
                 newstate[p] = new State();
             }
             State q_ = newstate[p];
             int min = points[n];
             int max;
             if (n + 1 < points.Length)
             {
                 max = points[n + 1] - 1;
             }
             else
             {
                 max = Character.MAX_CODE_POINT;
             }
             r.AddTransition(new Transition(min, max, q_));
         }
     }
     a.Deterministic = true;
     a.ClearNumberedStates();
     a.RemoveDeadTransitions();
 }
Exemplo n.º 49
0
        /// <summary>
        /// Minimizes the given automaton using Hopcroft's algorithm.
        /// </summary>
        public static void MinimizeHopcroft(Automaton a)
        {
            a.Determinize();
            if (a.Initial.numTransitions == 1)
            {
                Transition t = a.Initial.TransitionsArray[0];
                if (t.To == a.Initial && t.Min_Renamed == Character.MIN_CODE_POINT && t.Max_Renamed == Character.MAX_CODE_POINT)
                {
                    return;
                }
            }
            a.Totalize();

            // initialize data structures
            int[] sigma = a.StartPoints;
            State[] states = a.NumberedStates;
            int sigmaLen = sigma.Length, statesLen = states.Length;
            List<State>[,] reverse = new List<State>[statesLen, sigmaLen];
            HashSet<State>[] partition = new HashSet<State>[statesLen];
            List<State>[] splitblock = new List<State>[statesLen];
            int[] block = new int[statesLen];
            StateList[,] active = new StateList[statesLen, sigmaLen];
            StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen];
            LinkedList<IntPair> pending = new LinkedList<IntPair>();
            BitArray pending2 = new BitArray(sigmaLen * statesLen);
            BitArray split = new BitArray(statesLen), refine = new BitArray(statesLen), refine2 = new BitArray(statesLen);
            for (int q = 0; q < statesLen; q++)
            {
                splitblock[q] = new List<State>();
                partition[q] = new HashSet<State>();
                for (int x = 0; x < sigmaLen; x++)
                {
                    active[q, x] = new StateList();
                }
            }
            // find initial partition and reverse edges
            for (int q = 0; q < statesLen; q++)
            {
                State qq = states[q];
                int j = qq.accept ? 0 : 1;
                partition[j].Add(qq);
                block[q] = j;
                for (int x = 0; x < sigmaLen; x++)
                {
                    //List<State>[] r = reverse[qq.Step(sigma[x]).number];
                    var r = qq.Step(sigma[x]).number;
                    if (reverse[r, x] == null)
                    {
                        reverse[r, x] = new List<State>();
                    }
                    reverse[r, x].Add(qq);
                }
            }
            // initialize active sets
            for (int j = 0; j <= 1; j++)
            {
                for (int x = 0; x < sigmaLen; x++)
                {
                    foreach (State qq in partition[j])
                    {
                        if (reverse[qq.number, x] != null)
                        {
                            active2[qq.number, x] = active[j, x].Add(qq);
                        }
                    }
                }
            }
            // initialize pending
            for (int x = 0; x < sigmaLen; x++)
            {
                int j = (active[0, x].Size <= active[1, x].Size) ? 0 : 1;
                pending.AddLast(new IntPair(j, x));
                pending2.Set(x * statesLen + j, true);
            }
            // process pending until fixed point
            int k = 2;
            while (pending.Count > 0)
            {
                IntPair ip = pending.First.Value;
                pending.RemoveFirst();
                int p = ip.N1;
                int x = ip.N2;
                pending2.Set(x * statesLen + p, false);
                // find states that need to be split off their blocks
                for (StateListNode m = active[p, x].First; m != null; m = m.Next)
                {
                    List<State> r = reverse[m.q.number, x];
                    if (r != null)
                    {
                        foreach (State s in r)
                        {
                            int i = s.number;
                            if (!split.Get(i))
                            {
                                split.Set(i, true);
                                int j = block[i];
                                splitblock[j].Add(s);
                                if (!refine2.Get(j))
                                {
                                    refine2.Set(j, true);
                                    refine.Set(j, true);
                                }
                            }
                        }
                    }
                }
                // refine blocks
                for (int j = Number.NextSetBit(refine, 0); j >= 0; j = Number.NextSetBit(refine, j + 1))
                {
                    List<State> sb = splitblock[j];
                    if (sb.Count < partition[j].Count)
                    {
                        HashSet<State> b1 = partition[j];
                        HashSet<State> b2 = partition[k];
                        foreach (State s in sb)
                        {
                            b1.Remove(s);
                            b2.Add(s);
                            block[s.number] = k;
                            for (int c = 0; c < sigmaLen; c++)
                            {
                                StateListNode sn = active2[s.number, c];
                                if (sn != null && sn.Sl == active[j, c])
                                {
                                    sn.Remove();
                                    active2[s.number, c] = active[k, c].Add(s);
                                }
                            }
                        }
                        // update pending
                        for (int c = 0; c < sigmaLen; c++)
                        {
                            int aj = active[j, c].Size, ak = active[k, c].Size, ofs = c * statesLen;
                            if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak)
                            {
                                pending2.Set(ofs + j, true);
                                pending.AddLast(new IntPair(j, c));
                            }
                            else
                            {
                                pending2.Set(ofs + k, true);
                                pending.AddLast(new IntPair(k, c));
                            }
                        }
                        k++;
                    }
                    refine2.Set(j, false);
                    foreach (State s in sb)
                    {
                        split.Set(s.number, false);
                    }
                    sb.Clear();
                }
                refine.SetAll(false);
            }
            // make a new state for each equivalence class, set initial state
            State[] newstates = new State[k];
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = new State();
                newstates[n] = s;
                foreach (State q in partition[n])
                {
                    if (q == a.Initial)
                    {
                        a.Initial = s;
                    }
                    s.accept = q.accept;
                    s.number = q.number; // select representative
                    q.number = n;
                }
            }
            // build transitions and set acceptance
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = newstates[n];
                s.accept = states[s.number].accept;
                foreach (Transition t in states[s.number].Transitions)
                {
                    s.AddTransition(new Transition(t.Min_Renamed, t.Max_Renamed, newstates[t.To.number]));
                }
            }
            a.ClearNumberedStates();
            a.RemoveDeadTransitions();
        }
Exemplo n.º 50
0
 /// <summary>
 /// Returns the longest string that is a prefix of all accepted strings and
 /// visits each state at most once.
 /// </summary>
 /// <returns> common prefix </returns>
 public static string GetCommonPrefix(Automaton a)
 {
     if (a.IsSingleton)
     {
         return a.singleton;
     }
     StringBuilder b = new StringBuilder();
     HashSet<State> visited = new HashSet<State>();
     State s = a.Initial;
     bool done;
     do
     {
         done = true;
         visited.Add(s);
         if (!s.accept && s.NumTransitions() == 1)
         {
             var iter = s.Transitions.GetEnumerator();
             iter.MoveNext();
             Transition t = iter.Current;
             if (t.Min_Renamed == t.Max_Renamed && !visited.Contains(t.To))
             {
                 //b.appendCodePoint(t.Min_Renamed);
                 b.Append(t.Min_Renamed);
                 s = t.To;
                 done = false;
             }
         }
     } while (!done);
     return b.ToString();
 }
Exemplo n.º 51
0
        /// <summary>
        /// Returns an automaton that accepts the intersection of the languages of the
        /// given automata. Never modifies the input automata languages.
        /// <p>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static Automaton Intersection(Automaton a1, Automaton a2)
        {
            if (a1.IsSingleton)
            {
                if (BasicOperations.Run(a2, a1.singleton))
                {
                    return(a1.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a2.IsSingleton)
            {
                if (BasicOperations.Run(a1, a2.singleton))
                {
                    return(a2.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a1 == a2)
            {
                return(a1.CloneIfRequired());
            }
            Transition[][]                    transitions1 = a1.SortedTransitions;
            Transition[][]                    transitions2 = a2.SortedTransitions;
            Automaton                         c            = new Automaton();
            LinkedList <StatePair>            worklist     = new LinkedList <StatePair>();
            Dictionary <StatePair, StatePair> newstates    = new Dictionary <StatePair, StatePair>();
            StatePair                         p            = new StatePair(c.Initial, a1.Initial, a2.Initial);

            worklist.AddLast(p);
            newstates[p] = p;
            while (worklist.Count > 0)
            {
                p = worklist.First.Value;
                worklist.RemoveFirst();
                p.s.accept = p.S1.accept && p.S2.accept;
                Transition[] t1 = transitions1[p.S1.number];
                Transition[] t2 = transitions2[p.S2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].Max_Renamed < t1[n1].Min_Renamed)
                    {
                        b2++;
                    }
                    for (int n2 = b2; n2 < t2.Length && t1[n1].Max_Renamed >= t2[n2].Min_Renamed; n2++)
                    {
                        if (t2[n2].Max_Renamed >= t1[n1].Min_Renamed)
                        {
                            StatePair q = new StatePair(t1[n1].To, t2[n2].To);
                            StatePair r;
                            newstates.TryGetValue(q, out r);
                            if (r == null)
                            {
                                q.s = new State();
                                worklist.AddLast(q);
                                newstates[q] = q;
                                r            = q;
                            }
                            int min = t1[n1].Min_Renamed > t2[n2].Min_Renamed ? t1[n1].Min_Renamed : t2[n2].Min_Renamed;
                            int max = t1[n1].Max_Renamed < t2[n2].Max_Renamed ? t1[n1].Max_Renamed : t2[n2].Max_Renamed;
                            p.s.AddTransition(new Transition(min, max, r.s));
                        }
                    }
                }
            }
            c.deterministic = a1.deterministic && a2.deterministic;
            c.RemoveDeadTransitions();
            c.CheckMinimizeAlways();
            return(c);
        }
Exemplo n.º 52
0
        /// <summary>
        /// Returns the longest string that is a suffix of all accepted strings and
        /// visits each state at most once.
        /// </summary>
        /// <returns> common suffix </returns>
        public static string GetCommonSuffix(Automaton a)
        {
            if (a.IsSingleton) // if singleton, the suffix is the string itself.
            {
                return a.singleton;
            }

            // reverse the language of the automaton, then reverse its common prefix.
            Automaton r = (Automaton)a.Clone();
            Reverse(r);
            r.Determinize();
            return (new StringBuilder(SpecialOperations.GetCommonPrefix(r))).Reverse().ToString();
        }
Exemplo n.º 53
0
        private Automaton ToAutomaton(IDictionary <string, Automaton> automata, AutomatonProvider automaton_provider)
        {
            IList <Automaton> list;
            Automaton         a = null;

            switch (kind)
            {
            case Kind.REGEXP_UNION:
                list = new List <Automaton>();
                FindLeaves(Exp1, Kind.REGEXP_UNION, list, automata, automaton_provider);
                FindLeaves(Exp2, Kind.REGEXP_UNION, list, automata, automaton_provider);
                a = BasicOperations.Union(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CONCATENATION:
                list = new List <Automaton>();
                FindLeaves(Exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                FindLeaves(Exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                a = BasicOperations.Concatenate(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_INTERSECTION:
                a = Exp1.ToAutomaton(automata, automaton_provider).Intersection(Exp2.ToAutomaton(automata, automaton_provider));
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_OPTIONAL:
                a = Exp1.ToAutomaton(automata, automaton_provider).Optional();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT:
                a = Exp1.ToAutomaton(automata, automaton_provider).Repeat();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MIN:
                a = Exp1.ToAutomaton(automata, automaton_provider).Repeat(Min);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MINMAX:
                a = Exp1.ToAutomaton(automata, automaton_provider).Repeat(Min, Max);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_COMPLEMENT:
                a = Exp1.ToAutomaton(automata, automaton_provider).Complement();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CHAR:
                a = BasicAutomata.MakeChar(c);
                break;

            case Kind.REGEXP_CHAR_RANGE:
                a = BasicAutomata.MakeCharRange(From, To);
                break;

            case Kind.REGEXP_ANYCHAR:
                a = BasicAutomata.MakeAnyChar();
                break;

            case Kind.REGEXP_EMPTY:
                a = BasicAutomata.MakeEmpty();
                break;

            case Kind.REGEXP_STRING:
                a = BasicAutomata.MakeString(s);
                break;

            case Kind.REGEXP_ANYSTRING:
                a = BasicAutomata.MakeAnyString();
                break;

            case Kind.REGEXP_AUTOMATON:
                Automaton aa = null;
                if (automata != null)
                {
                    aa = automata[s];
                }
                if (aa == null && automaton_provider != null)
                {
                    try
                    {
                        aa = automaton_provider.GetAutomaton(s);
                    }
                    catch (System.IO.IOException e)
                    {
                        throw new System.ArgumentException(e.Message, e);
                    }
                }
                if (aa == null)
                {
                    throw new System.ArgumentException("'" + s + "' not found");
                }
                a = (Automaton)aa.Clone();     // always clone here (ignore allow_mutate)
                break;

            case Kind.REGEXP_INTERVAL:
                a = BasicAutomata.MakeInterval(Min, Max, Digits);
                break;
            }
            return(a);
        }
Exemplo n.º 54
0
 /// <summary>
 /// Reverses the language of the given (non-singleton) automaton while returning
 /// the set of new initial states.
 /// </summary>
 public static ISet<State> Reverse(Automaton a)
 {
     a.ExpandSingleton();
     // reverse all edges
     Dictionary<State, HashSet<Transition>> m = new Dictionary<State, HashSet<Transition>>();
     State[] states = a.NumberedStates;
     HashSet<State> accept = new HashSet<State>();
     foreach (State s in states)
     {
         if (s.Accept)
         {
             accept.Add(s);
         }
     }
     foreach (State r in states)
     {
         m[r] = new HashSet<Transition>();
         r.accept = false;
     }
     foreach (State r in states)
     {
         foreach (Transition t in r.Transitions)
         {
             m[t.To].Add(new Transition(t.Min_Renamed, t.Max_Renamed, r));
         }
     }
     foreach (State r in states)
     {
         HashSet<Transition> tr = m[r];
         r.Transitions = tr.ToArray(/*new Transition[tr.Count]*/);
     }
     // make new initial+final states
     a.Initial.accept = true;
     a.Initial = new State();
     foreach (State r in accept)
     {
         a.Initial.AddEpsilon(r); // ensures that all initial states are reachable
     }
     a.deterministic = false;
     a.ClearNumberedStates();
     return accept;
 }
Exemplo n.º 55
0
 /// <summary>
 /// Constructs a new <code>RunAutomaton</code> from a deterministic
 /// <code>Automaton</code>.
 /// </summary>
 /// <param name="a"> an automaton </param>
 /// <param name="maxInterval"></param>
 /// <param name="tableize"></param>
 protected RunAutomaton(Automaton a, int maxInterval, bool tableize)
 {
     this._maxInterval = maxInterval;
     a.Determinize();
     _points = a.StartPoints;
     State[] states = a.NumberedStates;
     Initial = a.Initial.Number;
     _size = states.Length;
     Accept = new bool[_size];
     Transitions = new int[_size * _points.Length];
     for (int n = 0; n < _size * _points.Length; n++)
     {
         Transitions[n] = -1;
     }
     foreach (State s in states)
     {
         int n = s.number;
         Accept[n] = s.accept;
         for (int c = 0; c < _points.Length; c++)
         {
             State q = s.Step(_points[c]);
             if (q != null)
             {
                 Transitions[n * _points.Length + c] = q.number;
             }
         }
     }
     /*
      * Set alphabet table for optimal run performance.
      */
     if (tableize)
     {
         _classmap = new int[maxInterval + 1];
         int i = 0;
         for (int j = 0; j <= maxInterval; j++)
         {
             if (i + 1 < _points.Length && j == _points[i + 1])
             {
                 i++;
             }
             _classmap[j] = i;
         }
     }
     else
     {
         _classmap = null;
     }
 }
Exemplo n.º 56
0
        /// <summary>
        /// Compute a DFA that accepts all strings within an edit distance of <code>n</code>.
        /// <p>
        /// All automata have the following properties:
        /// <ul>
        /// <li>They are deterministic (DFA).
        /// <li>There are no transitions to dead states.
        /// <li>They are not minimal (some transitions could be combined).
        /// </ul>
        /// </p>
        /// </summary>
        public virtual Automaton ToAutomaton(int n)
        {
            if (n == 0)
            {
                return BasicAutomata.MakeString(Word, 0, Word.Length);
            }

            if (n >= Descriptions.Length)
            {
                return null;
            }

            int range = 2 * n + 1;
            ParametricDescription description = Descriptions[n];
            // the number of states is based on the length of the word and n
            State[] states = new State[description.Size()];
            // create all states, and mark as accept states if appropriate
            for (int i = 0; i < states.Length; i++)
            {
                states[i] = new State();
                states[i].number = i;
                states[i].Accept = description.IsAccept(i);
            }
            // create transitions from state to state
            for (int k = 0; k < states.Length; k++)
            {
                int xpos = description.GetPosition(k);
                if (xpos < 0)
                {
                    continue;
                }
                int end = xpos + Math.Min(Word.Length - xpos, range);

                for (int x = 0; x < Alphabet.Length; x++)
                {
                    int ch = Alphabet[x];
                    // get the characteristic vector at this position wrt ch
                    int cvec = GetVector(ch, xpos, end);
                    int dest = description.Transition(k, xpos, cvec);
                    if (dest >= 0)
                    {
                        states[k].AddTransition(new Transition(ch, states[dest]));
                    }
                }
                // add transitions for all other chars in unicode
                // by definition, their characteristic vectors are always 0,
                // because they do not exist in the input string.
                int dest_ = description.Transition(k, xpos, 0); // by definition
                if (dest_ >= 0)
                {
                    for (int r = 0; r < NumRanges; r++)
                    {
                        states[k].AddTransition(new Transition(RangeLower[r], RangeUpper[r], states[dest_]));
                    }
                }
            }

            Automaton a = new Automaton(states[0]);
            a.Deterministic = true;
            // we create some useless unconnected states, and its a net-win overall to remove these,
            // as well as to combine any adjacent transitions (it makes later algorithms more efficient).
            // so, while we could set our numberedStates here, its actually best not to, and instead to
            // force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
            //a.setNumberedStates(states);
            a.Reduce();
            // we need not trim transitions to dead states, as they are not created.
            //a.restoreInvariant();
            return a;
        }
Exemplo n.º 57
0
        /// <summary>
        /// Returns true if the language of <paramref name="a1"/> is a subset of the language
        /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if
        /// not already marked as deterministic.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static bool SubsetOf(Automaton a1, Automaton a2)
        {
            if (a1 == a2)
            {
                return(true);
            }
            if (a1.IsSingleton)
            {
                if (a2.IsSingleton)
                {
                    return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal));
                }
                return(BasicOperations.Run(a2, a1.singleton));
            }
            a2.Determinize();
            Transition[][]         transitions1 = a1.GetSortedTransitions();
            Transition[][]         transitions2 = a2.GetSortedTransitions();
            LinkedList <StatePair> worklist     = new LinkedList <StatePair>();
            HashSet <StatePair>    visited      = new HashSet <StatePair>();
            StatePair p = new StatePair(a1.initial, a2.initial);

            worklist.AddLast(p);
            visited.Add(p);
            while (worklist.Count > 0)
            {
                p = worklist.First.Value;
                worklist.Remove(p);
                if (p.S1.accept && !p.S2.accept)
                {
                    return(false);
                }
                Transition[] t1 = transitions1[p.S1.number];
                Transition[] t2 = transitions2[p.S2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    int min1 = t1[n1].min, max1 = t1[n1].max;

                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].min > min1)
                        {
                            return(false);
                        }
                        if (t2[n2].max < Character.MAX_CODE_POINT)
                        {
                            min1 = t2[n2].max + 1;
                        }
                        else
                        {
                            min1 = Character.MAX_CODE_POINT;
                            max1 = Character.MIN_CODE_POINT;
                        }
                        StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                        if (!visited.Contains(q))
                        {
                            worklist.AddLast(q);
                            visited.Add(q);
                        }
                    }
                    if (min1 <= max1)
                    {
                        return(false);
                    }
                }
            }
            return(true);
        }