Beispiel #1
0
        private static void AssertAutomaton(Automaton a)
        {
            Automaton clone = (Automaton)a.Clone();
            // complement(complement(a)) = a
            Automaton equivalent = BasicOperations.Complement(BasicOperations.Complement(a));

            Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));

            // a union a = a
            equivalent = BasicOperations.Union(a, clone);
            Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));

            // a intersect a = a
            equivalent = BasicOperations.Intersection(a, clone);
            Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));

            // a minus a = empty
            Automaton empty = BasicOperations.Minus(a, clone);

            Assert.IsTrue(BasicOperations.IsEmpty(empty));

            // as long as don't accept the empty string
            // then optional(a) - empty = a
            if (!BasicOperations.Run(a, ""))
            {
                //System.out.println("test " + a);
                Automaton optional = BasicOperations.Optional(a);
                //System.out.println("optional " + optional);
                equivalent = BasicOperations.Minus(optional, BasicAutomata.MakeEmptyString());
                //System.out.println("equiv " + equivalent);
                Assert.IsTrue(BasicOperations.SameLanguage(a, equivalent));
            }
        }
        public virtual void TestEmptyLanguageConcatenate()
        {
            Automaton a      = BasicAutomata.MakeString("a");
            Automaton concat = BasicOperations.Concatenate(a, BasicAutomata.MakeEmpty());

            Assert.IsTrue(BasicOperations.IsEmpty(concat));
        }
Beispiel #3
0
        /// <summary>
        /// Returns an automaton that accepts the union of the languages of the given
        /// automata.
        /// <para/>
        /// Complexity: linear in number of states.
        /// </summary>
        public static Automaton Union(ICollection <Automaton> l)
        {
            JCG.HashSet <int> ids = new JCG.HashSet <int>();
            foreach (Automaton a in l)
            {
                ids.Add(a.GetHashCode());
            }
            bool  has_aliases = ids.Count != l.Count;
            State s           = new State();

            foreach (Automaton b in l)
            {
                if (BasicOperations.IsEmpty(b))
                {
                    continue;
                }
                Automaton bb = b;
                if (has_aliases)
                {
                    bb = bb.CloneExpanded();
                }
                else
                {
                    bb = bb.CloneExpandedIfRequired();
                }
                s.AddEpsilon(bb.initial);
            }
            Automaton a_ = new Automaton
            {
                initial       = s,
                deterministic = false
            };

            //a.clearHashCode();
            a_.ClearNumberedStates();
            a_.CheckMinimizeAlways();
            return(a_);
        }
Beispiel #4
0
 /// <summary>
 /// Returns a (deterministic) automaton that accepts the intersection of the
 /// language of <paramref name="a1"/> and the complement of the language of
 /// <paramref name="a2"/>. As a side-effect, the automata may be determinized, if not
 /// already deterministic.
 /// <para/>
 /// Complexity: quadratic in number of states (if already deterministic).
 /// </summary>
 public static Automaton Minus(Automaton a1, Automaton a2)
 {
     if (BasicOperations.IsEmpty(a1) || a1 == a2)
     {
         return(BasicAutomata.MakeEmpty());
     }
     if (BasicOperations.IsEmpty(a2))
     {
         return(a1.CloneIfRequired());
     }
     if (a1.IsSingleton)
     {
         if (BasicOperations.Run(a2, a1.singleton))
         {
             return(BasicAutomata.MakeEmpty());
         }
         else
         {
             return(a1.CloneIfRequired());
         }
     }
     return(Intersection(a1, a2.Complement()));
 }
        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                RandomAcceptedStrings rx = new RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random);
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception /*t*/)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                    }
                }
            }
        }
Beispiel #6
0
        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random());
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception t)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw t;
                    }
                }
            }
        }
Beispiel #7
0
        public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify)
        {
            if (simplify)
            {
                // Test whether the automaton is a "simple" form and
                // if so, don't create a runAutomaton.  Note that on a
                // large automaton these tests could be costly:
                if (BasicOperations.IsEmpty(automaton))
                {
                    // matches nothing
                    Type              = AUTOMATON_TYPE.NONE;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else if (BasicOperations.IsTotal(automaton))
                {
                    // matches all possible strings
                    Type              = AUTOMATON_TYPE.ALL;
                    Term              = null;
                    CommonSuffixRef   = null;
                    RunAutomaton      = null;
                    sortedTransitions = null;
                    this.Finite       = null;
                    return;
                }
                else
                {
                    string commonPrefix;
                    string singleton;
                    if (automaton.Singleton == null)
                    {
                        commonPrefix = SpecialOperations.GetCommonPrefix(automaton);
                        if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix)))
                        {
                            singleton = commonPrefix;
                        }
                        else
                        {
                            singleton = null;
                        }
                    }
                    else
                    {
                        commonPrefix = null;
                        singleton    = automaton.Singleton;
                    }

                    if (singleton != null)
                    {
                        // matches a fixed string in singleton or expanded
                        // representation
                        Type              = AUTOMATON_TYPE.SINGLE;
                        Term              = new BytesRef(singleton);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                    else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString())))
                    {
                        // matches a constant prefix
                        Type              = AUTOMATON_TYPE.PREFIX;
                        Term              = new BytesRef(commonPrefix);
                        CommonSuffixRef   = null;
                        RunAutomaton      = null;
                        sortedTransitions = null;
                        this.Finite       = null;
                        return;
                    }
                }
            }

            Type = AUTOMATON_TYPE.NORMAL;
            Term = null;
            if (finite == null)
            {
                this.Finite = SpecialOperations.IsFinite(automaton);
            }
            else
            {
                this.Finite = finite;
            }
            Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton);

            if (this.Finite == true)
            {
                CommonSuffixRef = null;
            }
            else
            {
                CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8);
            }
            RunAutomaton      = new ByteRunAutomaton(utf8, true);
            sortedTransitions = utf8.GetSortedTransitions();
        }
Beispiel #8
0
        /// <summary>
        /// Returns an automaton that accepts the concatenation of the languages of the
        /// given automata.
        /// <para/>
        /// Complexity: linear in total number of states.
        /// </summary>
        public static Automaton Concatenate(IList <Automaton> l)
        {
            if (l.Count == 0)
            {
                return(BasicAutomata.MakeEmptyString());
            }
            bool all_singleton = true;

            foreach (Automaton a in l)
            {
                if (!a.IsSingleton)
                {
                    all_singleton = false;
                    break;
                }
            }
            if (all_singleton)
            {
                StringBuilder b = new StringBuilder();
                foreach (Automaton a in l)
                {
                    b.Append(a.singleton);
                }
                return(BasicAutomata.MakeString(b.ToString()));
            }
            else
            {
                foreach (Automaton a in l)
                {
                    if (BasicOperations.IsEmpty(a))
                    {
                        return(BasicAutomata.MakeEmpty());
                    }
                }
                HashSet <int> ids = new HashSet <int>();
                foreach (Automaton a in l)
                {
                    ids.Add(a.GetHashCode());
                }
                bool      has_aliases = ids.Count != l.Count;
                Automaton b           = l[0];
                if (has_aliases)
                {
                    b = b.CloneExpanded();
                }
                else
                {
                    b = b.CloneExpandedIfRequired();
                }
                ISet <State> ac    = b.GetAcceptStates();
                bool         first = true;
                foreach (Automaton a in l)
                {
                    if (first)
                    {
                        first = false;
                    }
                    else
                    {
                        if (a.IsEmptyString)
                        {
                            continue;
                        }
                        Automaton aa = a;
                        if (has_aliases)
                        {
                            aa = aa.CloneExpanded();
                        }
                        else
                        {
                            aa = aa.CloneExpandedIfRequired();
                        }
                        ISet <State> ns = aa.GetAcceptStates();
                        foreach (State s in ac)
                        {
                            s.accept = false;
                            s.AddEpsilon(aa.initial);
                            if (s.accept)
                            {
                                ns.Add(s);
                            }
                        }
                        ac = ns;
                    }
                }
                b.deterministic = false;
                //b.clearHashCode();
                b.ClearNumberedStates();
                b.CheckMinimizeAlways();
                return(b);
            }
        }