Exemplo n.º 1
0
        private static void AssertAutomaton(Automaton a)
        {
            Automaton clone = (Automaton)a.Clone();
            // complement(complement(a)) = a
            Automaton equivalent = BasicOperations.Complement(BasicOperations.Complement(a));

            Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));

            // a union a = a
            equivalent = BasicOperations.Union(a, clone);
            Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));

            // a intersect a = a
            equivalent = BasicOperations.Intersection(a, clone);
            Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));

            // a minus a = empty
            Automaton empty = BasicOperations.Minus(a, clone);

            Assert.IsTrue(BasicOperations.IsEmpty(empty));

            // as long as don't accept the empty string
            // then optional(a) - empty = a
            if (!BasicOperations.Run(a, ""))
            {
                //System.out.println("test " + a);
                Automaton optional = BasicOperations.Optional(a);
                //System.out.println("optional " + optional);
                equivalent = BasicOperations.Minus(optional, BasicAutomata.MakeEmptyString());
                //System.out.println("equiv " + equivalent);
                Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));
            }
        }
Exemplo n.º 2
0
 public void AssertLexicon(List<Automaton> a, List<string> terms)
 {
     var automata = CollectionsHelper.Shuffle(a);
     var lex = BasicOperations.Union(automata);
     lex.Determinize();
     Assert.IsTrue(SpecialOperations.IsFinite(lex));
     foreach (string s in terms)
     {
         Assert.IsTrue(BasicOperations.Run(lex, s));
     }
     var lexByte = new ByteRunAutomaton(lex);
     foreach (string s in terms)
     {
         sbyte[] bytes = s.GetBytes(Encoding.UTF8);
         Assert.IsTrue(lexByte.Run(bytes, 0, bytes.Length));
     }
 }
Exemplo n.º 3
0
        public void AssertLexicon()
        {
            Collections.Shuffle(automata, Random());
            var lex = BasicOperations.Union(automata);

            lex.Determinize();
            Assert.IsTrue(SpecialOperations.IsFinite(lex));
            foreach (string s in terms)
            {
                assertTrue(BasicOperations.Run(lex, s));
            }
            var lexByte = new ByteRunAutomaton(lex);

            foreach (string s in terms)
            {
                var bytes = s.GetBytes(Encoding.UTF8);
                assertTrue(lexByte.Run(bytes, 0, bytes.Length));
            }
        }
Exemplo n.º 4
0
 /// <summary>
 /// Returns a (deterministic) automaton that accepts the intersection of the
 /// language of <paramref name="a1"/> and the complement of the language of
 /// <paramref name="a2"/>. As a side-effect, the automata may be determinized, if not
 /// already deterministic.
 /// <para/>
 /// Complexity: quadratic in number of states (if already deterministic).
 /// </summary>
 public static Automaton Minus(Automaton a1, Automaton a2)
 {
     if (BasicOperations.IsEmpty(a1) || a1 == a2)
     {
         return(BasicAutomata.MakeEmpty());
     }
     if (BasicOperations.IsEmpty(a2))
     {
         return(a1.CloneIfRequired());
     }
     if (a1.IsSingleton)
     {
         if (BasicOperations.Run(a2, a1.singleton))
         {
             return(BasicAutomata.MakeEmpty());
         }
         else
         {
             return(a1.CloneIfRequired());
         }
     }
     return(Intersection(a1, a2.Complement()));
 }
Exemplo n.º 5
0
        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                RandomAcceptedStrings rx = new RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random);
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception /*t*/)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                    }
                }
            }
        }
Exemplo n.º 6
0
        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random());
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception t)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw t;
                    }
                }
            }
        }
Exemplo n.º 7
0
        /// <summary>
        /// Returns true if the language of <paramref name="a1"/> is a subset of the language
        /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if
        /// not already marked as deterministic.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static bool SubsetOf(Automaton a1, Automaton a2)
        {
            if (a1 == a2)
            {
                return(true);
            }
            if (a1.IsSingleton)
            {
                if (a2.IsSingleton)
                {
                    return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal));
                }
                return(BasicOperations.Run(a2, a1.singleton));
            }
            a2.Determinize();
            Transition[][]         transitions1 = a1.GetSortedTransitions();
            Transition[][]         transitions2 = a2.GetSortedTransitions();
            LinkedList <StatePair> worklist     = new LinkedList <StatePair>();
            HashSet <StatePair>    visited      = new HashSet <StatePair>();
            StatePair p = new StatePair(a1.initial, a2.initial);

            worklist.AddLast(p);
            visited.Add(p);
            while (worklist.Count > 0)
            {
                p = worklist.First.Value;
                worklist.Remove(p);
                if (p.S1.accept && !p.S2.accept)
                {
                    return(false);
                }
                Transition[] t1 = transitions1[p.S1.number];
                Transition[] t2 = transitions2[p.S2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    int min1 = t1[n1].min, max1 = t1[n1].max;

                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].min > min1)
                        {
                            return(false);
                        }
                        if (t2[n2].max < Character.MAX_CODE_POINT)
                        {
                            min1 = t2[n2].max + 1;
                        }
                        else
                        {
                            min1 = Character.MAX_CODE_POINT;
                            max1 = Character.MIN_CODE_POINT;
                        }
                        StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                        if (!visited.Contains(q))
                        {
                            worklist.AddLast(q);
                            visited.Add(q);
                        }
                    }
                    if (min1 <= max1)
                    {
                        return(false);
                    }
                }
            }
            return(true);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Returns an automaton that accepts the intersection of the languages of the
        /// given automata. Never modifies the input automata languages.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static Automaton Intersection(Automaton a1, Automaton a2)
        {
            if (a1.IsSingleton)
            {
                if (BasicOperations.Run(a2, a1.singleton))
                {
                    return(a1.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a2.IsSingleton)
            {
                if (BasicOperations.Run(a1, a2.singleton))
                {
                    return(a2.CloneIfRequired());
                }
                else
                {
                    return(BasicAutomata.MakeEmpty());
                }
            }
            if (a1 == a2)
            {
                return(a1.CloneIfRequired());
            }
            Transition[][]                    transitions1 = a1.GetSortedTransitions();
            Transition[][]                    transitions2 = a2.GetSortedTransitions();
            Automaton                         c            = new Automaton();
            LinkedList <StatePair>            worklist     = new LinkedList <StatePair>();
            Dictionary <StatePair, StatePair> newstates    = new Dictionary <StatePair, StatePair>();
            StatePair                         p            = new StatePair(c.initial, a1.initial, a2.initial);

            worklist.AddLast(p);
            newstates[p] = p;
            while (worklist.Count > 0)
            {
                p = worklist.First.Value;
                worklist.Remove(p);
                p.s.accept = p.S1.accept && p.S2.accept;
                Transition[] t1 = transitions1[p.S1.number];
                Transition[] t2 = transitions2[p.S2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].max >= t1[n1].min)
                        {
                            StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                            StatePair r;
                            newstates.TryGetValue(q, out r);
                            if (r == null)
                            {
                                q.s = new State();
                                worklist.AddLast(q);
                                newstates[q] = q;
                                r            = q;
                            }
                            int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min;
                            int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max;
                            p.s.AddTransition(new Transition(min, max, r.s));
                        }
                    }
                }
            }
            c.deterministic = a1.deterministic && a2.deterministic;
            c.RemoveDeadTransitions();
            c.CheckMinimizeAlways();
            return(c);
        }
Exemplo n.º 9
0
        /// <summary>
        /// Returns true if the language of <paramref name="a1"/> is a subset of the language
        /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if
        /// not already marked as deterministic.
        /// <para/>
        /// Complexity: quadratic in number of states.
        /// </summary>
        public static bool SubsetOf(Automaton a1, Automaton a2)
        {
            if (a1 == a2)
            {
                return(true);
            }
            if (a1.IsSingleton)
            {
                if (a2.IsSingleton)
                {
                    return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal));
                }
                return(BasicOperations.Run(a2, a1.singleton));
            }
            a2.Determinize();
            Transition[][]    transitions1 = a1.GetSortedTransitions();
            Transition[][]    transitions2 = a2.GetSortedTransitions();
            Queue <StatePair> worklist     = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList

            JCG.HashSet <StatePair> visited = new JCG.HashSet <StatePair>();
            StatePair p = new StatePair(a1.initial, a2.initial);

            worklist.Enqueue(p);
            visited.Add(p);
            while (worklist.Count > 0)
            {
                p = worklist.Dequeue();
                if (p.s1.accept && !p.s2.accept)
                {
                    return(false);
                }
                Transition[] t1 = transitions1[p.s1.number];
                Transition[] t2 = transitions2[p.s2.number];
                for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
                {
                    while (b2 < t2.Length && t2[b2].max < t1[n1].min)
                    {
                        b2++;
                    }
                    int min1 = t1[n1].min, max1 = t1[n1].max;

                    for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
                    {
                        if (t2[n2].min > min1)
                        {
                            return(false);
                        }
                        if (t2[n2].max < Character.MaxCodePoint)
                        {
                            min1 = t2[n2].max + 1;
                        }
                        else
                        {
                            min1 = Character.MaxCodePoint;
                            max1 = Character.MinCodePoint;
                        }
                        StatePair q = new StatePair(t1[n1].to, t2[n2].to);
                        if (!visited.Contains(q))
                        {
                            worklist.Enqueue(q);
                            visited.Add(q);
                        }
                    }
                    if (min1 <= max1)
                    {
                        return(false);
                    }
                }
            }
            return(true);
        }