Special automata operations. @lucene.experimental
Example #1
0
        // below are original, unoptimized implementations of DFA operations for testing.
        // These are from brics automaton, full license (BSD) below:


        /*
         * dk.brics.automaton
         *
         * Copyright (c) 2001-2009 Anders Moeller
         * All rights reserved.
         *
         * Redistribution and use in source and binary forms, with or without
         * modification, are permitted provided that the following conditions
         * are met:
         * 1. Redistributions of source code must retain the above copyright
         *    notice, this list of conditions and the following disclaimer.
         * 2. Redistributions in binary form must reproduce the above copyright
         *    notice, this list of conditions and the following disclaimer in the
         *    documentation and/or other materials provided with the distribution.
         * 3. The name of the author may not be used to endorse or promote products
         *    derived from this software without specific prior written permission.
         *
         * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
         * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
         * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
         * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
         * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
         * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
         * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
         * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
         * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
         * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
         */

        /// <summary>
        /// Simple, original brics implementation of Brzozowski Minimize()
        /// </summary>
        public static void MinimizeSimple(Automaton a)
        {
            if (a.IsSingleton)
            {
                return;
            }
            DeterminizeSimple(a, SpecialOperations.Reverse(a));
            DeterminizeSimple(a, SpecialOperations.Reverse(a));
        }
Example #2
0
        /// <summary>
        /// below are original, unoptimized implementations of DFA operations for testing.
        /// These are from brics automaton, full license (BSD) below:
        /// </summary>

        /*
         * dk.brics.automaton
         *
         * Copyright (c) 2001-2009 Anders Moeller
         * All rights reserved.
         *
         * Redistribution and use in source and binary forms, with or without
         * modification, are permitted provided that the following conditions
         * are met:
         * 1. Redistributions of source code must retain the above copyright
         *    notice, this list of conditions and the following disclaimer.
         * 2. Redistributions in binary form must reproduce the above copyright
         *    notice, this list of conditions and the following disclaimer in the
         *    documentation and/or other materials provided with the distribution.
         * 3. The name of the author may not be used to endorse or promote products
         *    derived from this software without specific prior written permission.
         *
         * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
         * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
         * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
         * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
         * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
         * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
         * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
         * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
         * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
         * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
         */

        /// <summary>
        /// Simple, original brics implementation of Brzozowski minimize()
        /// </summary>
        public static void MinimizeSimple(Automaton a)
        {
            if (!String.IsNullOrEmpty(a.Singleton))
            {
                return;
            }
            DeterminizeSimple(a, SpecialOperations.Reverse(a));
            DeterminizeSimple(a, SpecialOperations.Reverse(a));
        }
Example #3
0
        public virtual void TestIsFinite()
        {
            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                Automaton a = AutomatonTestUtil.RandomAutomaton(Random);
                Automaton b = (Automaton)a.Clone();
                Assert.AreEqual(AutomatonTestUtil.IsFiniteSlow(a), SpecialOperations.IsFinite(b));
            }
        }
Example #4
0
        /// <summary>
        /// Builds a DFA for some string, and checks all Lev automata
        /// up to some maximum distance.
        /// </summary>
        private void AssertLev(string s, int maxDistance)
        {
            LevenshteinAutomata builder  = new LevenshteinAutomata(s, false);
            LevenshteinAutomata tbuilder = new LevenshteinAutomata(s, true);

            Automaton[] automata  = new Automaton[maxDistance + 1];
            Automaton[] tautomata = new Automaton[maxDistance + 1];
            for (int n = 0; n < automata.Length; n++)
            {
                automata[n]  = builder.ToAutomaton(n);
                tautomata[n] = tbuilder.ToAutomaton(n);
                Assert.IsNotNull(automata[n]);
                Assert.IsNotNull(tautomata[n]);
                Assert.IsTrue(automata[n].Deterministic);
                Assert.IsTrue(tautomata[n].Deterministic);
                Assert.IsTrue(SpecialOperations.IsFinite(automata[n]));
                Assert.IsTrue(SpecialOperations.IsFinite(tautomata[n]));
                AutomatonTestUtil.AssertNoDetachedStates(automata[n]);
                AutomatonTestUtil.AssertNoDetachedStates(tautomata[n]);
                // check that the dfa for n-1 accepts a subset of the dfa for n
                if (n > 0)
                {
                    Assert.IsTrue(automata[n - 1].SubsetOf(automata[n]));
                    Assert.IsTrue(automata[n - 1].SubsetOf(tautomata[n]));
                    Assert.IsTrue(tautomata[n - 1].SubsetOf(automata[n]));
                    Assert.IsTrue(tautomata[n - 1].SubsetOf(tautomata[n]));
                    Assert.AreNotSame(automata[n - 1], automata[n]);
                }
                // check that Lev(N) is a subset of LevT(N)
                Assert.IsTrue(automata[n].SubsetOf(tautomata[n]));
                // special checks for specific n
                switch (n)
                {
                case 0:
                    // easy, matches the string itself
                    Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), automata[0]));
                    Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), tautomata[0]));
                    break;

                case 1:
                    // generate a lev1 naively, and check the accepted lang is the same.
                    Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1(s), automata[1]));
                    Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1T(s), tautomata[1]));
                    break;

                default:
                    AssertBruteForce(s, automata[n], n);
                    AssertBruteForceT(s, tautomata[n], n);
                    break;
                }
            }
        }
        /// <summary>
        /// Returns the longest string that is a suffix of all accepted strings and
        /// visits each state at most once.
        /// </summary>
        /// <returns> Common suffix. </returns>
        public static string GetCommonSuffix(Automaton a)
        {
            if (a.IsSingleton) // if singleton, the suffix is the string itself.
            {
                return(a.singleton);
            }

            // reverse the language of the automaton, then reverse its common prefix.
            Automaton r = (Automaton)a.Clone();

            Reverse(r);
            r.Determinize();
            return((new StringBuilder(SpecialOperations.GetCommonPrefix(r))).Reverse().ToString());
        }
        public static BytesRef GetCommonSuffixBytesRef(Automaton a)
        {
            if (a.IsSingleton) // if singleton, the suffix is the string itself.
            {
                return(new BytesRef(a.singleton));
            }

            // reverse the language of the automaton, then reverse its common prefix.
            Automaton r = (Automaton)a.Clone();

            Reverse(r);
            r.Determinize();
            BytesRef @ref = SpecialOperations.GetCommonPrefixBytesRef(r);

            ReverseBytes(@ref);
            return(@ref);
        }
Example #7
0
        public virtual void TestFiniteStrings()
        {
            Automaton a = BasicOperations.Union(BasicAutomata.MakeString("dog"), BasicAutomata.MakeString("duck"));

            MinimizationOperations.Minimize(a);
            ISet <Int32sRef> strings = SpecialOperations.GetFiniteStrings(a, -1);

            Assert.AreEqual(2, strings.Count);
            Int32sRef dog = new Int32sRef();

            Util.ToInt32sRef(new BytesRef("dog"), dog);
            Assert.IsTrue(strings.Contains(dog));
            Int32sRef duck = new Int32sRef();

            Util.ToInt32sRef(new BytesRef("duck"), duck);
            Assert.IsTrue(strings.Contains(duck));
        }
 public void AssertLexicon(List<Automaton> a, List<string> terms)
 {
     var automata = CollectionsHelper.Shuffle(a);
     var lex = BasicOperations.Union(automata);
     lex.Determinize();
     Assert.IsTrue(SpecialOperations.IsFinite(lex));
     foreach (string s in terms)
     {
         Assert.IsTrue(BasicOperations.Run(lex, s));
     }
     var lexByte = new ByteRunAutomaton(lex);
     foreach (string s in terms)
     {
         sbyte[] bytes = s.GetBytes(Encoding.UTF8);
         Assert.IsTrue(lexByte.Run(bytes, 0, bytes.Length));
     }
 }
Example #9
0
        public void AssertLexicon()
        {
            Collections.Shuffle(automata, Random());
            var lex = BasicOperations.Union(automata);

            lex.Determinize();
            Assert.IsTrue(SpecialOperations.IsFinite(lex));
            foreach (string s in terms)
            {
                assertTrue(BasicOperations.Run(lex, s));
            }
            var lexByte = new ByteRunAutomaton(lex);

            foreach (string s in terms)
            {
                var bytes = s.GetBytes(Encoding.UTF8);
                assertTrue(lexByte.Run(bytes, 0, bytes.Length));
            }
        }
Example #10
0
        public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify)
        {
            if (simplify)
            {
                // Test whether the automaton is a "simple" form and
                // if so, don't create a runAutomaton.  Note that on a
                // large automaton these tests could be costly:
                if (BasicOperations.IsEmpty(automaton))
                {
                    // matches nothing
                    Type              = AUTOMATON_TYPE.NONE;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else if (BasicOperations.IsTotal(automaton))
                {
                    // matches all possible strings
                    Type              = AUTOMATON_TYPE.ALL;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else
                {
                    string commonPrefix;
                    string singleton;
                    if (automaton.Singleton == null)
                    {
                        commonPrefix = SpecialOperations.GetCommonPrefix(automaton);
                        if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix)))
                        {
                            singleton = commonPrefix;
                        }
                        else
                        {
                            singleton = null;
                        }
                    }
                    else
                    {
                        commonPrefix = null;
                        singleton    = automaton.Singleton;
                    }

                    if (singleton != null)
                    {
                        // matches a fixed string in singleton or expanded
                        // representation
                        Type              = AUTOMATON_TYPE.SINGLE;
                        Term              = new BytesRef(singleton);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                    else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString())))
                    {
                        // matches a constant prefix
                        Type              = AUTOMATON_TYPE.PREFIX;
                        Term              = new BytesRef(commonPrefix);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                }
            }

            Type = AUTOMATON_TYPE.NORMAL;
            Term = null;
            if (finite == null)
            {
                this.Finite = SpecialOperations.IsFinite(automaton);
            }
            else
            {
                this.Finite = finite;
            }
            Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton);

            if (this.Finite == true)
            {
                CommonSuffixRef = null;
            }
            else
            {
                CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8);
            }
            RunAutomaton      = new ByteRunAutomaton(utf8, true);
            sortedTransitions = utf8.GetSortedTransitions();
        }
Example #11
0
 /// <summary>
 /// Gets character class of given codepoint
 /// </summary>
 internal int GetCharClass(int c)
 {
     return(SpecialOperations.FindIndex(c, _points));
 }