internal SimpleAutomatonTermsEnum(TestRegexpRandom2.DumbRegexpQuery outerInstance, TermsEnum tenum) : base(tenum) { this.outerInstance = outerInstance; runAutomaton = new CharacterRunAutomaton(outerInstance.automaton); SetInitialSeekTerm(new BytesRef("")); }
public void TestSpecialCase3() { RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ"); string input = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); sbyte[] bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); }
public void TestSpecialCase2() { RegExp re = new RegExp(".+\u0775"); string input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); sbyte[] bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails! }
public void TestSpecialCase() { RegExp re = new RegExp(".?"); Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); // make sure character dfa accepts empty string Assert.IsTrue(cra.IsAccept(cra.InitialState)); Assert.IsTrue(cra.Run("")); Assert.IsTrue(cra.Run(new char[0], 0, 0)); // make sure byte dfa accepts empty string Assert.IsTrue(bra.IsAccept(bra.InitialState)); Assert.IsTrue(bra.Run(new byte[0], 0, 0)); }
public override void SetUp() { base.SetUp(); // build an automaton matching this jvm's letter definition State initial = new State(); State accept = new State(); accept.Accept = true; for (int i = 0; i <= 0x10FFFF; i++) { if (Character.IsLetter(i)) { initial.AddTransition(new Transition(i, i, accept)); } } Automaton single = new Automaton(initial); single.Reduce(); Automaton repeat = BasicOperations.Repeat(single); jvmLetter = new CharacterRunAutomaton(repeat); }
private void AssertBruteForceT(string input, Automaton dfa, int distance) { CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa); int maxLen = input.Length + distance + 1; int maxNum = (int)Math.Pow(2, maxLen); for (int i = 0; i < maxNum; i++) { string encoded = Convert.ToString(i, 2); bool accepts = ra.Run(encoded); if (accepts) { Assert.IsTrue(GetTDistance(input, encoded) <= distance); } else { Assert.IsTrue(GetTDistance(input, encoded) > distance); } } }
private static void AssertAutomaton(Automaton automaton) { var cra = new CharacterRunAutomaton(automaton); var bra = new ByteRunAutomaton(automaton); var ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton); int num = AtLeast(1000); for (int i = 0; i < num; i++) { string s; if (Random().NextBoolean()) { // likely not accepted s = TestUtil.RandomUnicodeString(Random()); } else { // will be accepted int[] codepoints = ras.GetRandomAcceptedString(Random()); try { s = UnicodeUtil.NewString(codepoints, 0, codepoints.Length); } catch (Exception e) { Console.WriteLine(codepoints.Length + " codepoints:"); for (int j = 0; j < codepoints.Length; j++) { Console.WriteLine(" " + codepoints[j].ToString("x")); } throw e; } } var bytes = s.GetBytes(Encoding.UTF8); Assert.AreEqual(cra.Run(s), bra.Run(bytes, 0, bytes.Length)); } }
private static void AssertAutomaton(Automaton automaton) { var cra = new CharacterRunAutomaton(automaton); var bra = new ByteRunAutomaton(automaton); var ras = new RandomAcceptedStrings(automaton); int num = AtLeast(1000); for (int i = 0; i < num; i++) { string s; if (Random.NextBoolean()) { // likely not accepted s = TestUtil.RandomUnicodeString(Random); } else { // will be accepted int[] codepoints = ras.GetRandomAcceptedString(Random); try { s = UnicodeUtil.NewString(codepoints, 0, codepoints.Length); } catch (Exception /*e*/) { Console.WriteLine(codepoints.Length + " codepoints:"); for (int j = 0; j < codepoints.Length; j++) { Console.WriteLine(" " + codepoints[j].ToString("x")); } throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } var bytes = s.GetBytes(Encoding.UTF8); Assert.AreEqual(cra.Run(s), bra.Run(bytes, 0, bytes.Length)); } }
public void TestSpecialCase3() { RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ"); string input = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); var bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); }
public virtual void TestStopwords() { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").ToAutomaton()); ICommonQueryParserConfiguration qp = GetParserConfig(new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = GetQuery("field:the OR field:foo", qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result is BooleanQuery); assertTrue(((BooleanQuery)result).Clauses.Length + " does not equal: " + 0, ((BooleanQuery)result).Clauses.Length == 0); result = GetQuery("field:woo OR field:the", qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result is TermQuery); result = GetQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result is BooleanQuery); if (VERBOSE) Console.WriteLine("Result: " + result); assertTrue(((BooleanQuery)result).Clauses.Length + " does not equal: " + 2, ((BooleanQuery)result).Clauses.Length == 2); }
public void TestStopwords() { StandardQueryParser qp = new StandardQueryParser(); CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").ToAutomaton()); qp.Analyzer = (new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = qp.Parse("a:the OR a:foo", "a"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result is BooleanQuery); assertTrue(((BooleanQuery)result).Clauses.size() + " does not equal: " + 0, ((BooleanQuery)result).Clauses.size() == 0); result = qp.Parse("a:woo OR a:the", "a"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result is TermQuery); result = qp.Parse( "(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", "a"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result is BooleanQuery); if (VERBOSE) Console.WriteLine("Result: " + result); assertTrue(((BooleanQuery)result).Clauses.size() + " does not equal: " + 2, ((BooleanQuery)result).Clauses.size() == 2); }
public void TestBoost() { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.MakeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet); StandardQueryParser qp = new StandardQueryParser(); qp.Analyzer = (oneStopAnalyzer); Query q = qp.Parse("on^1.0", "field"); assertNotNull(q); q = qp.Parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.Boost, (float)2.0, (float)0.5); q = qp.Parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.Boost, (float)2.0, (float)0.5); q = qp.Parse("\"on\"^1.0", "field"); assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); qp2.Analyzer = (new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); q = qp2.Parse("the^3", "field"); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.Boost, 0.01f); }
public virtual void TestPhraseQueryPositionIncrements() { CharacterRunAutomaton stopStopList = new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").ToAutomaton()); ICommonQueryParserConfiguration qp = GetParserConfig(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopStopList)); qp = GetParserConfig( new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopStopList)); qp.EnablePositionIncrements=(true); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("field", "1")); phraseQuery.Add(new Term("field", "2"), 2); assertEquals(phraseQuery, GetQuery("\"1 stop 2\"", qp)); }
public virtual void TestBoost() { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.MakeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopWords); ICommonQueryParserConfiguration qp = GetParserConfig(oneStopAnalyzer); Query q = GetQuery("on^1.0", qp); assertNotNull(q); q = GetQuery("\"hello\"^2.0", qp); assertNotNull(q); assertEquals(q.Boost, (float)2.0, (float)0.5); q = GetQuery("hello^2.0", qp); assertNotNull(q); assertEquals(q.Boost, (float)2.0, (float)0.5); q = GetQuery("\"on\"^1.0", qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); ICommonQueryParserConfiguration qp2 = GetParserConfig(a2); q = GetQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.Boost, 0.01f); }
public void TestSpecialCase2() { RegExp re = new RegExp(".+\u0775"); string input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775"; Automaton automaton = re.ToAutomaton(); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton); Assert.IsTrue(cra.Run(input)); var bytes = input.GetBytes(Encoding.UTF8); Assert.IsTrue(bra.Run(bytes, 0, bytes.Length)); // this one fails! }
public void TestBoost() { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.MakeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.Analyzer = (oneStopAnalyzer); Query q = qp.Parse("on^1.0", "field"); assertNotNull(q); q = qp.Parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.Boost, (float)2.0, (float)0.5); q = qp.Parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.Boost, (float)2.0, (float)0.5); q = qp.Parse("\"on\"^1.0", "field"); assertNotNull(q); q = GetParser(new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).Parse("the^3", "field"); assertNotNull(q); }