ToAutomaton() public method

Constructs new Automaton from this RegExp. Same as toAutomaton(null) (empty automaton map).
public ToAutomaton ( ) : Automaton
return Automaton
Example #1
0
 private void FindLeaves(RegExp exp, Kind kind, IList <Automaton> list, IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider)
 {
     if (exp.kind == kind)
     {
         FindLeaves(exp.exp1, kind, list, automata, automaton_provider);
         FindLeaves(exp.exp2, kind, list, automata, automaton_provider);
     }
     else
     {
         list.Add(exp.ToAutomaton(automata, automaton_provider));
     }
 }
Example #2
0
        public void TestSpecialCase3()
        {
            RegExp                re        = new RegExp("(\\鯺)*(.)*\\Ӕ");
            string                input     = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4";
            Automaton             automaton = re.ToAutomaton();
            CharacterRunAutomaton cra       = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton      bra       = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            sbyte[] bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length));
        }
Example #3
0
        public void TestSpecialCase2()
        {
            RegExp                re        = new RegExp(".+\u0775");
            string                input     = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775";
            Automaton             automaton = re.ToAutomaton();
            CharacterRunAutomaton cra       = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton      bra       = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            sbyte[] bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails!
        }
Example #4
0
        public void TestSpecialCase()
        {
            RegExp                re        = new RegExp(".?");
            Automaton             automaton = re.ToAutomaton();
            CharacterRunAutomaton cra       = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton      bra       = new ByteRunAutomaton(automaton);

            // make sure character dfa accepts empty string
            Assert.IsTrue(cra.IsAccept(cra.InitialState));
            Assert.IsTrue(cra.Run(""));
            Assert.IsTrue(cra.Run(new char[0], 0, 0));

            // make sure byte dfa accepts empty string
            Assert.IsTrue(bra.IsAccept(bra.InitialState));
            Assert.IsTrue(bra.Run(new byte[0], 0, 0));
        }
        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random), RegExpSyntax.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                RandomAcceptedStrings rx = new RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random);
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception /*t*/)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                    }
                }
            }
        }
Example #6
0
        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);

            for (int i = 0; i < ITER1; i++)
            {
                RegExp    re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE);
                Automaton a  = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random());
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception t)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw t;
                    }
                }
            }
        }
        public virtual void TestGetRandomAcceptedString()
        {
            int ITER1 = AtLeast(100);
            int ITER2 = AtLeast(100);
            for (int i = 0; i < ITER1; i++)
            {
                RegExp re = new RegExp(AutomatonTestUtil.RandomRegexp(Random()), RegExp.NONE);
                Automaton a = re.ToAutomaton();
                Assert.IsFalse(BasicOperations.IsEmpty(a));

                AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
                for (int j = 0; j < ITER2; j++)
                {
                    int[] acc = null;
                    try
                    {
                        acc = rx.GetRandomAcceptedString(Random());
                        string s = UnicodeUtil.NewString(acc, 0, acc.Length);
                        Assert.IsTrue(BasicOperations.Run(a, s));
                    }
                    catch (Exception t)
                    {
                        Console.WriteLine("regexp: " + re);
                        if (acc != null)
                        {
                            Console.WriteLine("fail acc re=" + re + " count=" + acc.Length);
                            for (int k = 0; k < acc.Length; k++)
                            {
                                Console.WriteLine("  " + acc[k].ToString("x"));
                            }
                        }
                        throw t;
                    }
                }
            }
        }
Example #8
0
        private Automaton ToAutomaton(IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider)
        {
            IList <Automaton> list;
            Automaton         a = null;

            switch (kind)
            {
            case Kind.REGEXP_UNION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider);
                a = BasicOperations.Union(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CONCATENATION:
                list = new List <Automaton>();
                FindLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                FindLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
                a = BasicOperations.Concatenate(list);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_INTERSECTION:
                a = exp1.ToAutomaton(automata, automaton_provider).Intersection(exp2.ToAutomaton(automata, automaton_provider));
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_OPTIONAL:
                a = exp1.ToAutomaton(automata, automaton_provider).Optional();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MIN:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_REPEAT_MINMAX:
                a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min, max);
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_COMPLEMENT:
                a = exp1.ToAutomaton(automata, automaton_provider).Complement();
                MinimizationOperations.Minimize(a);
                break;

            case Kind.REGEXP_CHAR:
                a = BasicAutomata.MakeChar(c);
                break;

            case Kind.REGEXP_CHAR_RANGE:
                a = BasicAutomata.MakeCharRange(from, to);
                break;

            case Kind.REGEXP_ANYCHAR:
                a = BasicAutomata.MakeAnyChar();
                break;

            case Kind.REGEXP_EMPTY:
                a = BasicAutomata.MakeEmpty();
                break;

            case Kind.REGEXP_STRING:
                a = BasicAutomata.MakeString(s);
                break;

            case Kind.REGEXP_ANYSTRING:
                a = BasicAutomata.MakeAnyString();
                break;

            case Kind.REGEXP_AUTOMATON:
                Automaton aa = null;
                if (automata != null)
                {
                    aa = automata[s];
                }
                if (aa == null && automaton_provider != null)
                {
                    try
                    {
                        aa = automaton_provider.GetAutomaton(s);
                    }
                    catch (Exception e) when(e.IsIOException())
                    {
                        throw new ArgumentException(e.ToString(), e);
                    }
                }
                if (aa == null)
                {
                    throw new ArgumentException("'" + s + "' not found");
                }
                a = (Automaton)aa.Clone();     // always clone here (ignore allow_mutate)
                break;

            case Kind.REGEXP_INTERVAL:
                a = BasicAutomata.MakeInterval(min, max, digits);
                break;
            }
            return(a);
        }
        public void TestSpecialCase()
        {
            RegExp re = new RegExp(".?");
            Automaton automaton = re.ToAutomaton();
            CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
            // make sure character dfa accepts empty string
            Assert.IsTrue(cra.IsAccept(cra.InitialState));
            Assert.IsTrue(cra.Run(""));
            Assert.IsTrue(cra.Run(new char[0], 0, 0));

            // make sure byte dfa accepts empty string
            Assert.IsTrue(bra.IsAccept(bra.InitialState));
            Assert.IsTrue(bra.Run(new byte[0], 0, 0));
        }
Example #10
0
        public void TestSpecialCase3()
        {
            RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ");
            string input = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4";
            Automaton automaton = re.ToAutomaton();
            CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton bra = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            var bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length));
        }
Example #11
0
        public void TestSpecialCase2()
        {
            RegExp re = new RegExp(".+\u0775");
            string input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775";
            Automaton automaton = re.ToAutomaton();
            CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton bra = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            var bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails!
        }
Example #12
0
 private void FindLeaves(RegExp exp, Kind kind, IList<Automaton> list, IDictionary<string, Automaton> automata, AutomatonProvider automaton_provider)
 {
     if (exp.kind == kind)
     {
         FindLeaves(exp.Exp1, kind, list, automata, automaton_provider);
         FindLeaves(exp.Exp2, kind, list, automata, automaton_provider);
     }
     else
     {
         list.Add(exp.ToAutomaton(automata, automaton_provider));
     }
 }
Example #13
0
 internal DumbRegexpQuery(Term term, RegExpSyntax flags)
     : base(term.Field)
 {
     RegExp re = new RegExp(term.Text, flags);
     automaton = re.ToAutomaton();
 }