private static Automaton NaiveUnion(IList <BytesRef> strings)
        {
            Automaton[] eachIndividual = new Automaton[strings.Count];
            int         i = 0;

            foreach (BytesRef bref in strings)
            {
                eachIndividual[i++] = BasicAutomata.MakeString(bref.Utf8ToString());
            }
            return(BasicOperations.Union(eachIndividual));
        }
        public virtual void TestSingletonNFAConcatenate()
        {
            Automaton singleton         = BasicAutomata.MakeString("prefix");
            Automaton expandedSingleton = singleton.CloneExpanded();
            // an NFA (two transitions for 't' from initial state)
            Automaton nfa    = BasicOperations.Union(BasicAutomata.MakeString("this"), BasicAutomata.MakeString("three"));
            Automaton concat = BasicOperations.Concatenate(singleton, nfa);

            Assert.IsFalse(concat.IsDeterministic);
            Assert.IsTrue(BasicOperations.SameLanguage(BasicOperations.Concatenate(expandedSingleton, nfa), concat));
        }
        public virtual void TestSingleton()
        {
            Automaton singleton         = BasicAutomata.MakeString("foobar");
            Automaton expandedSingleton = singleton.CloneExpanded();

            Assert.IsTrue(BasicOperations.SameLanguage(singleton, expandedSingleton));

            singleton         = BasicAutomata.MakeString("\ud801\udc1c");
            expandedSingleton = singleton.CloneExpanded();
            Assert.IsTrue(BasicOperations.SameLanguage(singleton, expandedSingleton));
        }
Beispiel #4
0
        public virtual void Test()
        {
            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                Automaton a = AutomatonTestUtil.RandomAutomaton(Random);
                Automaton b = (Automaton)a.Clone();
                MinimizationOperations.Minimize(b);
                Assert.IsTrue(BasicOperations.SameLanguage(a, b));
            }
        }
Beispiel #5
0
        /// <summary>
        /// Builds a DFA for some string, and checks all Lev automata
        /// up to some maximum distance.
        /// </summary>
        private void AssertLev(string s, int maxDistance)
        {
            LevenshteinAutomata builder  = new LevenshteinAutomata(s, false);
            LevenshteinAutomata tbuilder = new LevenshteinAutomata(s, true);

            Automaton[] automata  = new Automaton[maxDistance + 1];
            Automaton[] tautomata = new Automaton[maxDistance + 1];
            for (int n = 0; n < automata.Length; n++)
            {
                automata[n]  = builder.ToAutomaton(n);
                tautomata[n] = tbuilder.ToAutomaton(n);
                Assert.IsNotNull(automata[n]);
                Assert.IsNotNull(tautomata[n]);
                Assert.IsTrue(automata[n].Deterministic);
                Assert.IsTrue(tautomata[n].Deterministic);
                Assert.IsTrue(SpecialOperations.IsFinite(automata[n]));
                Assert.IsTrue(SpecialOperations.IsFinite(tautomata[n]));
                AutomatonTestUtil.AssertNoDetachedStates(automata[n]);
                AutomatonTestUtil.AssertNoDetachedStates(tautomata[n]);
                // check that the dfa for n-1 accepts a subset of the dfa for n
                if (n > 0)
                {
                    Assert.IsTrue(automata[n - 1].SubsetOf(automata[n]));
                    Assert.IsTrue(automata[n - 1].SubsetOf(tautomata[n]));
                    Assert.IsTrue(tautomata[n - 1].SubsetOf(automata[n]));
                    Assert.IsTrue(tautomata[n - 1].SubsetOf(tautomata[n]));
                    Assert.AreNotSame(automata[n - 1], automata[n]);
                }
                // check that Lev(N) is a subset of LevT(N)
                Assert.IsTrue(automata[n].SubsetOf(tautomata[n]));
                // special checks for specific n
                switch (n)
                {
                case 0:
                    // easy, matches the string itself
                    Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), automata[0]));
                    Assert.IsTrue(BasicOperations.SameLanguage(BasicAutomata.MakeString(s), tautomata[0]));
                    break;

                case 1:
                    // generate a lev1 naively, and check the accepted lang is the same.
                    Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1(s), automata[1]));
                    Assert.IsTrue(BasicOperations.SameLanguage(NaiveLev1T(s), tautomata[1]));
                    break;

                default:
                    AssertBruteForce(s, automata[n], n);
                    AssertBruteForceT(s, tautomata[n], n);
                    break;
                }
            }
        }
Beispiel #6
0
        /// <summary>
        /// Return an automaton that accepts all 1-character insertions, deletions, and
        /// substitutions of s.
        /// </summary>
        private Automaton NaiveLev1(string s)
        {
            Automaton a = BasicAutomata.MakeString(s);

            a = BasicOperations.Union(a, InsertionsOf(s));
            MinimizationOperations.Minimize(a);
            a = BasicOperations.Union(a, DeletionsOf(s));
            MinimizationOperations.Minimize(a);
            a = BasicOperations.Union(a, SubstitutionsOf(s));
            MinimizationOperations.Minimize(a);

            return(a);
        }
        public virtual void TestEmptySingletonConcatenate()
        {
            Automaton singleton         = BasicAutomata.MakeString("");
            Automaton expandedSingleton = singleton.CloneExpanded();
            Automaton other             = BasicAutomata.MakeCharRange('5', '7');
            Automaton concat1           = BasicOperations.Concatenate(expandedSingleton, other);
            Automaton concat2           = BasicOperations.Concatenate(singleton, other);

            Assert.IsTrue(concat2.IsDeterministic);
            Assert.IsTrue(BasicOperations.SameLanguage(concat1, concat2));
            Assert.IsTrue(BasicOperations.SameLanguage(other, concat1));
            Assert.IsTrue(BasicOperations.SameLanguage(other, concat2));
        }
Beispiel #8
0
        public virtual void TestAgainstBrzozowski()
        {
            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                Automaton a = AutomatonTestUtil.RandomAutomaton(Random);
                AutomatonTestUtil.MinimizeSimple(a);
                Automaton b = (Automaton)a.Clone();
                MinimizationOperations.Minimize(b);
                Assert.IsTrue(BasicOperations.SameLanguage(a, b));
                Assert.AreEqual(a.GetNumberOfStates(), b.GetNumberOfStates());
                Assert.AreEqual(a.GetNumberOfTransitions(), b.GetNumberOfTransitions());
            }
        }
Beispiel #9
0
        public virtual void TestAgainstSimple()
        {
            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                Automaton a = AutomatonTestUtil.RandomAutomaton(Random());
                Automaton b = (Automaton)a.Clone();
                AutomatonTestUtil.DeterminizeSimple(a);
                b.Deterministic = false; // force det
                b.Determinize();
                // TODO: more verifications possible?
                Assert.IsTrue(BasicOperations.SameLanguage(a, b));
            }
        }
        public virtual void TestStringUnion()
        {
            List <BytesRef> strings = new List <BytesRef>();

            for (int i = RandomInts.RandomInt32Between(Random, 0, 1000); --i >= 0;)
            {
                strings.Add(new BytesRef(TestUtil.RandomUnicodeString(Random)));
            }

            strings.Sort();
            Automaton union = BasicAutomata.MakeStringUnion(strings);

            Assert.IsTrue(union.IsDeterministic);
            Assert.IsTrue(BasicOperations.SameLanguage(union, NaiveUnion(strings)));
        }
 public void AssertLexicon(List<Automaton> a, List<string> terms)
 {
     var automata = CollectionsHelper.Shuffle(a);
     var lex = BasicOperations.Union(automata);
     lex.Determinize();
     Assert.IsTrue(SpecialOperations.IsFinite(lex));
     foreach (string s in terms)
     {
         Assert.IsTrue(BasicOperations.Run(lex, s));
     }
     var lexByte = new ByteRunAutomaton(lex);
     foreach (string s in terms)
     {
         sbyte[] bytes = s.GetBytes(Encoding.UTF8);
         Assert.IsTrue(lexByte.Run(bytes, 0, bytes.Length));
     }
 }
Beispiel #12
0
        public virtual void TestFiniteStrings()
        {
            Automaton a = BasicOperations.Union(BasicAutomata.MakeString("dog"), BasicAutomata.MakeString("duck"));

            MinimizationOperations.Minimize(a);
            ISet <Int32sRef> strings = SpecialOperations.GetFiniteStrings(a, -1);

            Assert.AreEqual(2, strings.Count);
            Int32sRef dog = new Int32sRef();

            Util.ToInt32sRef(new BytesRef("dog"), dog);
            Assert.IsTrue(strings.Contains(dog));
            Int32sRef duck = new Int32sRef();

            Util.ToInt32sRef(new BytesRef("duck"), duck);
            Assert.IsTrue(strings.Contains(duck));
        }
Beispiel #13
0
        /// <summary>
        /// Return an automaton that accepts all 1-character substitutions of s
        /// (replacing one character)
        /// </summary>
        private Automaton SubstitutionsOf(string s)
        {
            IList <Automaton> list = new List <Automaton>();

            for (int i = 0; i < s.Length; i++)
            {
                Automaton au = BasicAutomata.MakeString(s.Substring(0, i));
                au = BasicOperations.Concatenate(au, BasicAutomata.MakeAnyChar());
                au = BasicOperations.Concatenate(au, BasicAutomata.MakeString(s.Substring(i + 1)));
                list.Add(au);
            }

            Automaton a = BasicOperations.Union(list);

            MinimizationOperations.Minimize(a);
            return(a);
        }
Beispiel #14
0
        public void AssertLexicon()
        {
            Collections.Shuffle(automata, Random());
            var lex = BasicOperations.Union(automata);

            lex.Determinize();
            Assert.IsTrue(SpecialOperations.IsFinite(lex));
            foreach (string s in terms)
            {
                assertTrue(BasicOperations.Run(lex, s));
            }
            var lexByte = new ByteRunAutomaton(lex);

            foreach (string s in terms)
            {
                var bytes = s.GetBytes(Encoding.UTF8);
                assertTrue(lexByte.Run(bytes, 0, bytes.Length));
            }
        }
Beispiel #15
0
        /// <summary>
        /// Returns an automaton that accepts the union of the languages of the given
        /// automata.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Union(ICollection <Automaton> l)
        {
            JCG.HashSet <int> ids = new JCG.HashSet <int>();
            foreach (Automaton a in l)
            {
                ids.Add(a.GetHashCode());
            }
            bool  has_aliases = ids.Count != l.Count;
            State s           = new State();

            foreach (Automaton b in l)
            {
                if (BasicOperations.IsEmpty(b))
                {
                    continue;
                }
                Automaton bb = b;
                if (has_aliases)
                {
                    bb = bb.CloneExpanded();
                }
                else
                {
                    bb = bb.CloneExpandedIfRequired();
                }
                s.AddEpsilon(bb.initial);
            }
            Automaton a_ = new Automaton
            {
                initial       = s,
                deterministic = false
            };

            //a.clearHashCode();
            a_.ClearNumberedStates();
            a_.CheckMinimizeAlways();
            return(a_);
        }
Beispiel #16
0
 /// <summary>
 /// Returns a (deterministic) automaton that accepts the intersection of the
 /// language of <paramref name="a1"/> and the complement of the language of
 /// <paramref name="a2"/>. As a side-effect, the automata may be determinized, if not
 /// already deterministic.
 /// <para/>
 /// Complexity: quadratic in number of states (if already deterministic).
 /// </summary>
 public static Automaton Minus(Automaton a1, Automaton a2)
 {
     if (BasicOperations.IsEmpty(a1) || a1 == a2)
     {
         return(BasicAutomata.MakeEmpty());
     }
     if (BasicOperations.IsEmpty(a2))
     {
         return(a1.CloneIfRequired());
     }
     if (a1.IsSingleton)
     {
         if (BasicOperations.Run(a2, a1.singleton))
         {
             return(BasicAutomata.MakeEmpty());
         }
         else
         {
             return(a1.CloneIfRequired());
         }
     }
     return(Intersection(a1, a2.Complement()));
 }
Beispiel #17
0
        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random());
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception t)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw t;
                    }
                }
            }
        }
        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                RandomAcceptedStrings rx = new RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random);
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception /*t*/)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                    }
                }
            }
        }
Beispiel #19
0
 /// <summary>
 /// See <see cref="BasicOperations.Repeat(Automaton, int, int)"/>.
 /// </summary>
 public virtual Automaton Repeat(int min, int max)
 {
     return(BasicOperations.Repeat(this, min, max));
 }
Beispiel #20
0
        private Automaton ToAutomaton(IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider)
        {
            IList <Automaton> list;
            Automaton         a = null;

            switch (kind)
            {
            case Kind.REGEXP_UNION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider);
                a = BasicOperations.Union(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CONCATENATION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                a = BasicOperations.Concatenate(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_INTERSECTION:
                a = exp1.ToAutomaton(automata, automaton_provider).Intersection(exp2.ToAutomaton(automata, automaton_provider));
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_OPTIONAL:
                a = exp1.ToAutomaton(automata, automaton_provider).Optional();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MIN:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MINMAX:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min, max);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_COMPLEMENT:
                a = exp1.ToAutomaton(automata, automaton_provider).Complement();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CHAR:
                a = BasicAutomata.MakeChar(c);
                break;

            case Kind.REGEXP_CHAR_RANGE:
                a = BasicAutomata.MakeCharRange(from, to);
                break;

            case Kind.REGEXP_ANYCHAR:
                a = BasicAutomata.MakeAnyChar();
                break;

            case Kind.REGEXP_EMPTY:
                a = BasicAutomata.MakeEmpty();
                break;

            case Kind.REGEXP_STRING:
                a = BasicAutomata.MakeString(s);
                break;

            case Kind.REGEXP_ANYSTRING:
                a = BasicAutomata.MakeAnyString();
                break;

            case Kind.REGEXP_AUTOMATON:
                Automaton aa = null;
                if (automata != null)
                {
                    aa = automata[s];
                }
                if (aa == null && automaton_provider != null)
                {
                    try
                    {
                        aa = automaton_provider.GetAutomaton(s);
                    }
                    catch (Exception e) when(e.IsIOException())
                    {
                        throw new ArgumentException(e.ToString(), e);
                    }
                }
                if (aa == null)
                {
                    throw new ArgumentException("'" + s + "' not found");
                }
                a = (Automaton)aa.Clone();     // always clone here (ignore allow_mutate)
                break;

            case Kind.REGEXP_INTERVAL:
                a = BasicAutomata.MakeInterval(min, max, digits);
                break;
            }
            return(a);
        }
Beispiel #21
0
        public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify)
        {
            if (simplify)
            {
                // Test whether the automaton is a "simple" form and
                // if so, don't create a runAutomaton.  Note that on a
                // large automaton these tests could be costly:
                if (BasicOperations.IsEmpty(automaton))
                {
                    // matches nothing
                    Type              = AUTOMATON_TYPE.NONE;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else if (BasicOperations.IsTotal(automaton))
                {
                    // matches all possible strings
                    Type              = AUTOMATON_TYPE.ALL;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else
                {
                    string commonPrefix;
                    string singleton;
                    if (automaton.Singleton == null)
                    {
                        commonPrefix = SpecialOperations.GetCommonPrefix(automaton);
                        if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix)))
                        {
                            singleton = commonPrefix;
                        }
                        else
                        {
                            singleton = null;
                        }
                    }
                    else
                    {
                        commonPrefix = null;
                        singleton    = automaton.Singleton;
                    }

                    if (singleton != null)
                    {
                        // matches a fixed string in singleton or expanded
                        // representation
                        Type              = AUTOMATON_TYPE.SINGLE;
                        Term              = new BytesRef(singleton);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                    else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString())))
                    {
                        // matches a constant prefix
                        Type              = AUTOMATON_TYPE.PREFIX;
                        Term              = new BytesRef(commonPrefix);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                }
            }

            Type = AUTOMATON_TYPE.NORMAL;
            Term = null;
            if (finite == null)
            {
                this.Finite = SpecialOperations.IsFinite(automaton);
            }
            else
            {
                this.Finite = finite;
            }
            Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton);

            if (this.Finite == true)
            {
                CommonSuffixRef = null;
            }
            else
            {
                CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8);
            }
            RunAutomaton      = new ByteRunAutomaton(utf8, true);
            sortedTransitions = utf8.GetSortedTransitions();
        }
Beispiel #22
0
 /// <summary>
 /// See <see cref="BasicOperations.Union(ICollection{Automaton})"/>.
 /// </summary>
 public static Automaton Union(ICollection <Automaton> l)
 {
     return(BasicOperations.Union(l));
 }
Beispiel #23
0
 /// <summary>
 /// See <see cref="BasicOperations.Determinize(Automaton)"/>.
 /// </summary>
 public virtual void Determinize()
 {
     BasicOperations.Determinize(this);
 }
Beispiel #24
0
 /// <summary>
 /// See <see cref="BasicOperations.Union(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Union(Automaton a)
 {
     return(BasicOperations.Union(this, a));
 }
Beispiel #25
0
 /// <summary>
 /// See <see cref="BasicOperations.SubsetOf(Automaton, Automaton)"/>.
 /// </summary>
 public virtual bool SubsetOf(Automaton a)
 {
     return(BasicOperations.SubsetOf(this, a));
 }
Beispiel #26
0
 /// <summary>
 /// See <see cref="BasicOperations.Intersection(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Intersection(Automaton a)
 {
     return(BasicOperations.Intersection(this, a));
 }
Beispiel #27
0
 /// <summary>
 /// See <see cref="BasicOperations.Minus(Automaton, Automaton)"/>.
 /// </summary>
 public virtual Automaton Minus(Automaton a)
 {
     return(BasicOperations.Minus(this, a));
 }
Beispiel #28
0
 /// <summary>
 /// See <see cref="BasicOperations.Repeat(Automaton)"/>.
 /// </summary>
 public virtual Automaton Repeat()
 {
     return(BasicOperations.Repeat(this));
 }
Beispiel #29
0
 /// <summary>
 /// See <see cref="BasicOperations.Optional(Automaton)"/>.
 /// </summary>
 public virtual Automaton Optional()
 {
     return(BasicOperations.Optional(this));
 }
Beispiel #30
0
 /// <summary>
 /// See <see cref="BasicOperations.Complement(Automaton)"/>.
 /// </summary>
 public virtual Automaton Complement()
 {
     return(BasicOperations.Complement(this));
 }