예제 #1
0
                internal SimpleAutomatonTermsEnum(TestRegexpRandom2.DumbRegexpQuery outerInstance, TermsEnum tenum)
                    : base(tenum)
                {
                    this.outerInstance = outerInstance;

                    runAutomaton = new CharacterRunAutomaton(outerInstance.automaton);
                    SetInitialSeekTerm(new BytesRef(""));
                }
예제 #2
0
        public void TestSpecialCase3()
        {
            RegExp                re        = new RegExp("(\\鯺)*(.)*\\Ӕ");
            string                input     = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4";
            Automaton             automaton = re.ToAutomaton();
            CharacterRunAutomaton cra       = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton      bra       = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            sbyte[] bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length));
        }
예제 #3
0
        public void TestSpecialCase2()
        {
            RegExp                re        = new RegExp(".+\u0775");
            string                input     = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775";
            Automaton             automaton = re.ToAutomaton();
            CharacterRunAutomaton cra       = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton      bra       = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            sbyte[] bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails!
        }
예제 #4
0
        public void TestSpecialCase()
        {
            RegExp                re        = new RegExp(".?");
            Automaton             automaton = re.ToAutomaton();
            CharacterRunAutomaton cra       = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton      bra       = new ByteRunAutomaton(automaton);

            // make sure character dfa accepts empty string
            Assert.IsTrue(cra.IsAccept(cra.InitialState));
            Assert.IsTrue(cra.Run(""));
            Assert.IsTrue(cra.Run(new char[0], 0, 0));

            // make sure byte dfa accepts empty string
            Assert.IsTrue(bra.IsAccept(bra.InitialState));
            Assert.IsTrue(bra.Run(new byte[0], 0, 0));
        }
 public override void SetUp()
 {
     base.SetUp();
     // build an automaton matching this jvm's letter definition
     State initial = new State();
     State accept = new State();
     accept.Accept = true;
     for (int i = 0; i <= 0x10FFFF; i++)
     {
         if (Character.IsLetter(i))
         {
             initial.AddTransition(new Transition(i, i, accept));
         }
     }
     Automaton single = new Automaton(initial);
     single.Reduce();
     Automaton repeat = BasicOperations.Repeat(single);
     jvmLetter = new CharacterRunAutomaton(repeat);
 }
 private void AssertBruteForceT(string input, Automaton dfa, int distance)
 {
     CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa);
     int maxLen = input.Length + distance + 1;
     int maxNum = (int)Math.Pow(2, maxLen);
     for (int i = 0; i < maxNum; i++)
     {
         string encoded = Convert.ToString(i, 2);
         bool accepts = ra.Run(encoded);
         if (accepts)
         {
             Assert.IsTrue(GetTDistance(input, encoded) <= distance);
         }
         else
         {
             Assert.IsTrue(GetTDistance(input, encoded) > distance);
         }
     }
 }
예제 #7
0
        private void AssertBruteForceT(string input, Automaton dfa, int distance)
        {
            CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa);
            int maxLen = input.Length + distance + 1;
            int maxNum = (int)Math.Pow(2, maxLen);

            for (int i = 0; i < maxNum; i++)
            {
                string encoded = Convert.ToString(i, 2);
                bool   accepts = ra.Run(encoded);
                if (accepts)
                {
                    Assert.IsTrue(GetTDistance(input, encoded) <= distance);
                }
                else
                {
                    Assert.IsTrue(GetTDistance(input, encoded) > distance);
                }
            }
        }
예제 #8
0
        private static void AssertAutomaton(Automaton automaton)
        {
            var cra = new CharacterRunAutomaton(automaton);
            var bra = new ByteRunAutomaton(automaton);
            var ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);

            int num = AtLeast(1000);

            for (int i = 0; i < num; i++)
            {
                string s;
                if (Random().NextBoolean())
                {
                    // likely not accepted
                    s = TestUtil.RandomUnicodeString(Random());
                }
                else
                {
                    // will be accepted
                    int[] codepoints = ras.GetRandomAcceptedString(Random());
                    try
                    {
                        s = UnicodeUtil.NewString(codepoints, 0, codepoints.Length);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(codepoints.Length + " codepoints:");
                        for (int j = 0; j < codepoints.Length; j++)
                        {
                            Console.WriteLine("  " + codepoints[j].ToString("x"));
                        }
                        throw e;
                    }
                }
                var bytes = s.GetBytes(Encoding.UTF8);
                Assert.AreEqual(cra.Run(s), bra.Run(bytes, 0, bytes.Length));
            }
        }
예제 #9
0
        private static void AssertAutomaton(Automaton automaton)
        {
            var cra = new CharacterRunAutomaton(automaton);
            var bra = new ByteRunAutomaton(automaton);
            var ras = new RandomAcceptedStrings(automaton);

            int num = AtLeast(1000);

            for (int i = 0; i < num; i++)
            {
                string s;
                if (Random.NextBoolean())
                {
                    // likely not accepted
                    s = TestUtil.RandomUnicodeString(Random);
                }
                else
                {
                    // will be accepted
                    int[] codepoints = ras.GetRandomAcceptedString(Random);
                    try
                    {
                        s = UnicodeUtil.NewString(codepoints, 0, codepoints.Length);
                    }
                    catch (Exception /*e*/)
                    {
                        Console.WriteLine(codepoints.Length + " codepoints:");
                        for (int j = 0; j < codepoints.Length; j++)
                        {
                            Console.WriteLine("  " + codepoints[j].ToString("x"));
                        }
                        throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                    }
                }
                var bytes = s.GetBytes(Encoding.UTF8);
                Assert.AreEqual(cra.Run(s), bra.Run(bytes, 0, bytes.Length));
            }
        }
예제 #10
0
        public void TestSpecialCase3()
        {
            RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ");
            string input = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4";
            Automaton automaton = re.ToAutomaton();
            CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton bra = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            var bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length));
        }
예제 #11
0
 public virtual void TestStopwords()
 {
     CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").ToAutomaton());
     ICommonQueryParserConfiguration qp = GetParserConfig(new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet));
     Query result = GetQuery("field:the OR field:foo", qp);
     assertNotNull("result is null and it shouldn't be", result);
     assertTrue("result is not a BooleanQuery", result is BooleanQuery);
     assertTrue(((BooleanQuery)result).Clauses.Length + " does not equal: " + 0, ((BooleanQuery)result).Clauses.Length == 0);
     result = GetQuery("field:woo OR field:the", qp);
     assertNotNull("result is null and it shouldn't be", result);
     assertTrue("result is not a TermQuery", result is TermQuery);
     result = GetQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp);
     assertNotNull("result is null and it shouldn't be", result);
     assertTrue("result is not a BooleanQuery", result is BooleanQuery);
     if (VERBOSE) Console.WriteLine("Result: " + result);
     assertTrue(((BooleanQuery)result).Clauses.Length + " does not equal: " + 2, ((BooleanQuery)result).Clauses.Length == 2);
 }
예제 #12
0
        public void TestStopwords()
        {
            StandardQueryParser qp = new StandardQueryParser();
            CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").ToAutomaton());
            qp.Analyzer = (new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet));

            Query result = qp.Parse("a:the OR a:foo", "a");
            assertNotNull("result is null and it shouldn't be", result);
            assertTrue("result is not a BooleanQuery", result is BooleanQuery);
            assertTrue(((BooleanQuery)result).Clauses.size() + " does not equal: "
                + 0, ((BooleanQuery)result).Clauses.size() == 0);
            result = qp.Parse("a:woo OR a:the", "a");
            assertNotNull("result is null and it shouldn't be", result);
            assertTrue("result is not a TermQuery", result is TermQuery);
            result = qp.Parse(
                    "(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",
                    "a");
            assertNotNull("result is null and it shouldn't be", result);
            assertTrue("result is not a BooleanQuery", result is BooleanQuery);
            if (VERBOSE)
                Console.WriteLine("Result: " + result);
            assertTrue(((BooleanQuery)result).Clauses.size() + " does not equal: "
                + 2, ((BooleanQuery)result).Clauses.size() == 2);
        }
예제 #13
0
        public void TestBoost()
        {
            CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.MakeString("on"));
            Analyzer oneStopAnalyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet);
            StandardQueryParser qp = new StandardQueryParser();
            qp.Analyzer = (oneStopAnalyzer);

            Query q = qp.Parse("on^1.0", "field");
            assertNotNull(q);
            q = qp.Parse("\"hello\"^2.0", "field");
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = qp.Parse("hello^2.0", "field");
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = qp.Parse("\"on\"^1.0", "field");
            assertNotNull(q);

            StandardQueryParser qp2 = new StandardQueryParser();
            qp2.Analyzer = (new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));

            q = qp2.Parse("the^3", "field");
            // "the" is a stop word so the result is an empty query:
            assertNotNull(q);
            assertEquals("", q.toString());
            assertEquals(1.0f, q.Boost, 0.01f);
        }
예제 #14
0
        public virtual void TestPhraseQueryPositionIncrements()
        {
            CharacterRunAutomaton stopStopList =
            new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").ToAutomaton());

            ICommonQueryParserConfiguration qp = GetParserConfig(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopStopList));

            qp = GetParserConfig(
                                 new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopStopList));
            qp.EnablePositionIncrements=(true);

            PhraseQuery phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term("field", "1"));
            phraseQuery.Add(new Term("field", "2"), 2);
            assertEquals(phraseQuery, GetQuery("\"1 stop 2\"", qp));
        }
예제 #15
0
        public void TestSpecialCase()
        {
            RegExp re = new RegExp(".?");
            Automaton automaton = re.ToAutomaton();
            CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
            // make sure character dfa accepts empty string
            Assert.IsTrue(cra.IsAccept(cra.InitialState));
            Assert.IsTrue(cra.Run(""));
            Assert.IsTrue(cra.Run(new char[0], 0, 0));

            // make sure byte dfa accepts empty string
            Assert.IsTrue(bra.IsAccept(bra.InitialState));
            Assert.IsTrue(bra.Run(new byte[0], 0, 0));
        }
예제 #16
0
        public virtual void TestBoost()
        {
            CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.MakeString("on"));
            Analyzer oneStopAnalyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopWords);
            ICommonQueryParserConfiguration qp = GetParserConfig(oneStopAnalyzer);
            Query q = GetQuery("on^1.0", qp);
            assertNotNull(q);
            q = GetQuery("\"hello\"^2.0", qp);
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = GetQuery("hello^2.0", qp);
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = GetQuery("\"on\"^1.0", qp);
            assertNotNull(q);

            Analyzer a2 = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
            ICommonQueryParserConfiguration qp2 = GetParserConfig(a2);
            q = GetQuery("the^3", qp2);
            // "the" is a stop word so the result is an empty query:
            assertNotNull(q);
            assertEquals("", q.toString());
            assertEquals(1.0f, q.Boost, 0.01f);
        }
예제 #17
0
        public void TestSpecialCase2()
        {
            RegExp re = new RegExp(".+\u0775");
            string input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775";
            Automaton automaton = re.ToAutomaton();
            CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
            ByteRunAutomaton bra = new ByteRunAutomaton(automaton);

            Assert.IsTrue(cra.Run(input));

            var bytes = input.GetBytes(Encoding.UTF8);
            Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails!
        }
예제 #18
0
        private static void AssertAutomaton(Automaton automaton)
        {
            var cra = new CharacterRunAutomaton(automaton);
            var bra = new ByteRunAutomaton(automaton);
            var ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);

            int num = AtLeast(1000);
            for (int i = 0; i < num; i++)
            {
                string s;
                if (Random().NextBoolean())
                {
                    // likely not accepted
                    s = TestUtil.RandomUnicodeString(Random());
                }
                else
                {
                    // will be accepted
                    int[] codepoints = ras.GetRandomAcceptedString(Random());
                    try
                    {
                        s = UnicodeUtil.NewString(codepoints, 0, codepoints.Length);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(codepoints.Length + " codepoints:");
                        for (int j = 0; j < codepoints.Length; j++)
                        {
                            Console.WriteLine("  " + codepoints[j].ToString("x"));
                        }
                        throw e;
                    }
                }
                var bytes = s.GetBytes(Encoding.UTF8);
                Assert.AreEqual(cra.Run(s), bra.Run(bytes, 0, bytes.Length));
            }
        }
예제 #19
0
        public void TestBoost()
        {
            CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.MakeString("on"));
            Analyzer oneStopAnalyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet);

            PrecedenceQueryParser qp = new PrecedenceQueryParser();
            qp.Analyzer = (oneStopAnalyzer);
            Query q = qp.Parse("on^1.0", "field");
            assertNotNull(q);
            q = qp.Parse("\"hello\"^2.0", "field");
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = qp.Parse("hello^2.0", "field");
            assertNotNull(q);
            assertEquals(q.Boost, (float)2.0, (float)0.5);
            q = qp.Parse("\"on\"^1.0", "field");
            assertNotNull(q);

            q = GetParser(new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).Parse("the^3",
                    "field");
            assertNotNull(q);
        }