public void TestFuzzinessLong2() { //Lucene-5033 Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); addDoc("abcdef", writer); addDoc("segment", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query; query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 3f, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 4f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); reader.Dispose(); directory.Dispose(); }
public void TestFuzzinessLong2() { //Lucene-5033 Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); addDoc("abcdef", writer); addDoc("segment", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query; query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 3f, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 4f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); reader.Dispose(); directory.Dispose(); }
public void TestBoostOnlyRewrite() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); addDoc("Lucene", writer); addDoc("Lucene", writer); addDoc("Lucenne", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene")); query.MultiTermRewriteMethod = new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); // normally, 'Lucenne' would be the first result as IDF will skew the score. assertEquals("Lucene", reader.Document(hits[0].Doc).Get("field")); assertEquals("Lucene", reader.Document(hits[1].Doc).Get("field")); assertEquals("Lucenne", reader.Document(hits[2].Doc).Get("field")); reader.Dispose(); directory.Dispose(); }
public void TestBoostOnlyRewrite() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); addDoc("Lucene", writer); addDoc("Lucene", writer); addDoc("Lucenne", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene")); query.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50)); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); // normally, 'Lucenne' would be the first result as IDF will skew the score. assertEquals("Lucene", reader.Document(hits[0].Doc).Get("field")); assertEquals("Lucene", reader.Document(hits[1].Doc).Get("field")); assertEquals("Lucenne", reader.Document(hits[2].Doc).Get("field")); reader.Dispose(); directory.Dispose(); }
public void TestDistanceAsEditsSearching() { Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, index); addDoc("foobar", w); addDoc("test", w); addDoc("working", w); IndexReader reader = w.GetReader(); IndexSearcher searcher = NewSearcher(reader); w.Dispose(); SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", "fouba"), 2); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("foobar", searcher.Doc(hits[0].Doc).Get("field")); q = new SlowFuzzyQuery(new Term("field", "foubara"), 2); hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("foobar", searcher.Doc(hits[0].Doc).Get("field")); q = new SlowFuzzyQuery(new Term("field", "t"), 3); hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("test", searcher.Doc(hits[0].Doc).Get("field")); q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50); hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("test", searcher.Doc(hits[0].Doc).Get("field")); q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50); hits = searcher.Search(q, 10).ScoreDocs; assertEquals(2, hits.Length); assertEquals("test", searcher.Doc(hits[0].Doc).Get("field")); assertEquals("foobar", searcher.Doc(hits[1].Doc).Get("field")); reader.Dispose(); index.Dispose(); }
public void TestTieBreaker() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); addDoc("a123456", writer); addDoc("c123456", writer); addDoc("d123456", writer); addDoc("e123456", writer); Directory directory2 = NewDirectory(); RandomIndexWriter writer2 = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory2); addDoc("a123456", writer2); addDoc("b123456", writer2); addDoc("b123456", writer2); addDoc("b123456", writer2); addDoc("c123456", writer2); addDoc("f123456", writer2); IndexReader ir1 = writer.GetReader(); IndexReader ir2 = writer2.GetReader(); MultiReader mr = new MultiReader(ir1, ir2); IndexSearcher searcher = NewSearcher(mr); SlowFuzzyQuery fq = new SlowFuzzyQuery(new Term("field", "z123456"), 1f, 0, 2); TopDocs docs = searcher.Search(fq, 2); assertEquals(5, docs.TotalHits); // 5 docs, from the a and b's mr.Dispose(); ir1.Dispose(); ir2.Dispose(); writer.Dispose(); writer2.Dispose(); directory.Dispose(); directory2.Dispose(); }
public void TestDistanceAsEditsSearching() { Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), index, Similarity, TimeZone); addDoc("foobar", w); addDoc("test", w); addDoc("working", w); IndexReader reader = w.Reader; IndexSearcher searcher = NewSearcher(reader); w.Dispose(); SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", "fouba"), 2); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("foobar", searcher.Doc(hits[0].Doc).Get("field")); q = new SlowFuzzyQuery(new Term("field", "foubara"), 2); hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("foobar", searcher.Doc(hits[0].Doc).Get("field")); q = new SlowFuzzyQuery(new Term("field", "t"), 3); hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("test", searcher.Doc(hits[0].Doc).Get("field")); q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50); hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("test", searcher.Doc(hits[0].Doc).Get("field")); q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50); hits = searcher.Search(q, 10).ScoreDocs; assertEquals(2, hits.Length); assertEquals("test", searcher.Doc(hits[0].Doc).Get("field")); assertEquals("foobar", searcher.Doc(hits[1].Doc).Get("field")); reader.Dispose(); index.Dispose(); }
public void TestTokenLengthOpt() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); addDoc("12345678911", writer); addDoc("segment", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); Query query; // term not over 10 chars, so optimization shortcuts query = new SlowFuzzyQuery(new Term("field", "1234569"), 0.9f); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // 10 chars, so no optimization query = new SlowFuzzyQuery(new Term("field", "1234567891"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // over 10 chars, so no optimization query = new SlowFuzzyQuery(new Term("field", "12345678911"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // over 10 chars, no match query = new SlowFuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); reader.Dispose(); directory.Dispose(); }
public void TestGiga() { Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, index); addDoc("Lucene in Action", w); addDoc("Lucene for Dummies", w); //addDoc("Giga", w); addDoc("Giga byte", w); addDoc("ManagingGigabytesManagingGigabyte", w); addDoc("ManagingGigabytesManagingGigabytes", w); addDoc("The Art of Computer Science", w); addDoc("J. K. Rowling", w); addDoc("JK Rowling", w); addDoc("Joanne K Roling", w); addDoc("Bruce Willis", w); addDoc("Willis bruce", w); addDoc("Brute willis", w); addDoc("B. willis", w); IndexReader r = w.GetReader(); w.Dispose(); Query q = new SlowFuzzyQuery(new Term("field", "giga"), 0.9f); // 3. search IndexSearcher searcher = NewSearcher(r); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("Giga byte", searcher.Doc(hits[0].Doc).Get("field")); r.Dispose(); index.Dispose(); }
public override bool Equals(object obj) { if (this == obj) { return(true); } if (!base.Equals(obj)) { return(false); } if (GetType() != obj.GetType()) { return(false); } SlowFuzzyQuery other = (SlowFuzzyQuery)obj; if (Number.FloatToIntBits(minimumSimilarity) != Number .FloatToIntBits(other.minimumSimilarity)) { return(false); } if (prefixLength != other.prefixLength) { return(false); } if (term == null) { if (other.term != null) { return(false); } } else if (!term.Equals(other.term)) { return(false); } return(true); }
public void TestTokenLengthOpt() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); addDoc("12345678911", writer); addDoc("segment", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); Query query; // term not over 10 chars, so optimization shortcuts query = new SlowFuzzyQuery(new Term("field", "1234569"), 0.9f); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // 10 chars, so no optimization query = new SlowFuzzyQuery(new Term("field", "1234567891"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // over 10 chars, so no optimization query = new SlowFuzzyQuery(new Term("field", "12345678911"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // over 10 chars, no match query = new SlowFuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); reader.Dispose(); directory.Dispose(); }
public override bool Equals(object obj) { if (this == obj) { return(true); } if (!base.Equals(obj)) { return(false); } if (GetType() != obj.GetType()) { return(false); } SlowFuzzyQuery other = (SlowFuzzyQuery)obj; if (J2N.BitConversion.SingleToInt32Bits(minimumSimilarity) != J2N.BitConversion .SingleToInt32Bits(other.minimumSimilarity)) { return(false); } if (prefixLength != other.prefixLength) { return(false); } if (m_term == null) { if (other.m_term != null) { return(false); } } else if (!m_term.Equals(other.m_term)) { return(false); } return(true); }
public void TestGiga() { Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), index, Similarity, TimeZone); addDoc("Lucene in Action", w); addDoc("Lucene for Dummies", w); //addDoc("Giga", w); addDoc("Giga byte", w); addDoc("ManagingGigabytesManagingGigabyte", w); addDoc("ManagingGigabytesManagingGigabytes", w); addDoc("The Art of Computer Science", w); addDoc("J. K. Rowling", w); addDoc("JK Rowling", w); addDoc("Joanne K Roling", w); addDoc("Bruce Willis", w); addDoc("Willis bruce", w); addDoc("Brute willis", w); addDoc("B. willis", w); IndexReader r = w.Reader; w.Dispose(); Query q = new SlowFuzzyQuery(new Term("field", "giga"), 0.9f); // 3. search IndexSearcher searcher = NewSearcher(r); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; assertEquals(1, hits.Length); assertEquals("Giga byte", searcher.Doc(hits[0].Doc).Get("field")); r.Dispose(); index.Dispose(); }
public void TestTieBreaker() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); addDoc("a123456", writer); addDoc("c123456", writer); addDoc("d123456", writer); addDoc("e123456", writer); Directory directory2 = NewDirectory(); RandomIndexWriter writer2 = new RandomIndexWriter(Random(), directory2, Similarity, TimeZone); addDoc("a123456", writer2); addDoc("b123456", writer2); addDoc("b123456", writer2); addDoc("b123456", writer2); addDoc("c123456", writer2); addDoc("f123456", writer2); IndexReader ir1 = writer.Reader; IndexReader ir2 = writer2.Reader; MultiReader mr = new MultiReader(ir1, ir2); IndexSearcher searcher = NewSearcher(mr); SlowFuzzyQuery fq = new SlowFuzzyQuery(new Term("field", "z123456"), 1f, 0, 2); TopDocs docs = searcher.Search(fq, 2); assertEquals(5, docs.TotalHits); // 5 docs, from the a and b's mr.Dispose(); ir1.Dispose(); ir2.Dispose(); writer.Dispose(); writer2.Dispose(); directory.Dispose(); directory2.Dispose(); }
public void TestFuzziness() { //every test with SlowFuzzyQuery.defaultMinSimilarity //is exercising the Automaton, not the brute force linear method Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); addDoc("aaaaa", writer); addDoc("aaaab", writer); addDoc("aaabb", writer); addDoc("aabbb", writer); addDoc("abbbb", writer); addDoc("bbbbb", writer); addDoc("ddddd", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); // same with prefix query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(2, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // test scoring query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals("3 documents should match", 3, hits.Length); List<String> order = Arrays.AsList("bbbbb", "abbbb", "aabbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); assertEquals(order[i], term); } // test pq size by supplying maxExpansions=2 // This query would normally return 3 documents, because 3 terms match (see above): query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals("only 2 documents should match", 2, hits.Length); order = Arrays.AsList("bbbbb", "abbbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); assertEquals(order[i], term); } // not similar enough: query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // query identical to a word in the index: query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); // default allows for up to two edits: assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(2, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // different field = no match: query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); reader.Dispose(); directory.Dispose(); }
public void TestFuzziness() { //every test with SlowFuzzyQuery.defaultMinSimilarity //is exercising the Automaton, not the brute force linear method Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); addDoc("aaaaa", writer); addDoc("aaaab", writer); addDoc("aaabb", writer); addDoc("aabbb", writer); addDoc("abbbb", writer); addDoc("bbbbb", writer); addDoc("ddddd", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); // same with prefix query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(2, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // test scoring query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals("3 documents should match", 3, hits.Length); IList <String> order = new string[] { "bbbbb", "abbbb", "aabbb" }; for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); assertEquals(order[i], term); } // test pq size by supplying maxExpansions=2 // This query would normally return 3 documents, because 3 terms match (see above): query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals("only 2 documents should match", 2, hits.Length); order = new string[] { "bbbbb", "abbbb" }; for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); assertEquals(order[i], term); } // not similar enough: query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // query identical to a word in the index: query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); // default allows for up to two edits: assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(3, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(2, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // different field = no match: query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); reader.Dispose(); directory.Dispose(); }
public void TestFuzzinessLong() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); addDoc("aaaaaaa", writer); addDoc("segment", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query; // not similar enough: query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // edit distance to "aaaaaaa" = 3, this matches because the string is longer than // in testDefaultFuzziness so a bigger difference is allowed: query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // no match, more than half of the characters is wrong: query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // "student" and "stellent" are indeed similar to "segment" by default: query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // now with prefix query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // "student" doesn't match anymore thanks to increased minimum similarity: query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); try { query = new SlowFuzzyQuery(new Term("field", "student"), 1.1f); fail("Expected IllegalArgumentException"); } catch (Exception e) when(e.IsIllegalArgumentException()) { // expecting exception } try { query = new SlowFuzzyQuery(new Term("field", "student"), -0.1f); fail("Expected IllegalArgumentException"); } catch (ArgumentOutOfRangeException) // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) { // expecting exception } reader.Dispose(); directory.Dispose(); }
public void assertFromTestData(int[] codePointTable) { if (Verbose) { Console.WriteLine("TEST: codePointTable=" + codePointTable); } Stream stream = GetType().getResourceAsStream("fuzzyTestData.txt"); TextReader reader = new StreamReader(stream, Encoding.UTF8); int bits = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture); int terms = (int)Math.Pow(2, bits); Store.Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); for (int i = 0; i < terms; i++) { field.SetStringValue(MapInt(codePointTable, i)); writer.AddDocument(doc); } IndexReader r = writer.GetReader(); IndexSearcher searcher = NewSearcher(r); if (Verbose) { Console.WriteLine("TEST: searcher=" + searcher); } // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation, // otherwise scores are different! searcher.Similarity = (new DefaultSimilarity()); writer.Dispose(); String line; int lineNum = 0; while ((line = reader.ReadLine()) != null) { lineNum++; String[] @params = line.Split(',').TrimEnd(); String query = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture)); int prefix = int.Parse(@params[1], CultureInfo.InvariantCulture); int pqSize = int.Parse(@params[2], CultureInfo.InvariantCulture); float minScore = float.Parse(@params[3], CultureInfo.InvariantCulture); #pragma warning disable 612, 618 SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix); #pragma warning restore 612, 618 q.MultiTermRewriteMethod = new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize); int expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture); TopDocs docs = searcher.Search(q, expectedResults); assertEquals(expectedResults, docs.TotalHits); for (int i = 0; i < expectedResults; i++) { String[] scoreDoc = reader.ReadLine().Split(',').TrimEnd(); assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc); assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon); } } r.Dispose(); dir.Dispose(); }
public void assertFromTestData(int[] codePointTable) { if (VERBOSE) { Console.WriteLine("TEST: codePointTable=" + codePointTable); } //Stream stream = GetType().getResourceAsStream("fuzzyTestData.txt"); Stream stream = GetType().Assembly.GetManifestResourceStream("Lucene.Net.Sandbox.Queries.fuzzyTestData.txt"); TextReader reader = new StreamReader(stream, Encoding.UTF8); int bits = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture); int terms = (int)Math.Pow(2, bits); Store.Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); for (int i = 0; i < terms; i++) { field.StringValue = (MapInt(codePointTable, i)); writer.AddDocument(doc); } IndexReader r = writer.Reader; IndexSearcher searcher = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + searcher); } // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation, // otherwise scores are different! searcher.Similarity = (new DefaultSimilarity()); writer.Dispose(); String line; int lineNum = 0; while ((line = reader.ReadLine()) != null) { lineNum++; String[] @params = line.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); String query = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture)); int prefix = int.Parse(@params[1], CultureInfo.InvariantCulture); int pqSize = int.Parse(@params[2], CultureInfo.InvariantCulture); float minScore = float.Parse(@params[3], CultureInfo.InvariantCulture); #pragma warning disable 612, 618 SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix); #pragma warning restore 612, 618 q.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize)); int expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture); TopDocs docs = searcher.Search(q, expectedResults); assertEquals(expectedResults, docs.TotalHits); for (int i = 0; i < expectedResults; i++) { String[] scoreDoc = reader.ReadLine().Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc); assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon); } } r.Dispose(); dir.Dispose(); }
public void TestFuzzinessLong() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); addDoc("aaaaaaa", writer); addDoc("segment", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); SlowFuzzyQuery query; // not similar enough: query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // edit distance to "aaaaaaa" = 3, this matches because the string is longer than // in testDefaultFuzziness so a bigger difference is allowed: query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // no match, more than half of the characters is wrong: query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // now with prefix query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // "student" and "stellent" are indeed similar to "segment" by default: query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); // now with prefix query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(1, hits.Length); query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); // "student" doesn't match anymore thanks to increased minimum similarity: query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; assertEquals(0, hits.Length); try { query = new SlowFuzzyQuery(new Term("field", "student"), 1.1f); fail("Expected IllegalArgumentException"); } #pragma warning disable 168 catch (ArgumentException e) #pragma warning restore 168 { // expecting exception } try { query = new SlowFuzzyQuery(new Term("field", "student"), -0.1f); fail("Expected IllegalArgumentException"); } #pragma warning disable 168 catch (ArgumentException e) #pragma warning restore 168 { // expecting exception } reader.Dispose(); directory.Dispose(); }