public Result Search (string term, int count, int start) { try { term = term.ToLower (); Term htTerm = new Term ("hottext", term); Query qq1 = new FuzzyQuery (htTerm); Query qq2 = new TermQuery (htTerm); qq2.Boost = 10f; Query qq3 = new PrefixQuery (htTerm); qq3.Boost = 10f; DisjunctionMaxQuery q1 = new DisjunctionMaxQuery (0f); q1.Add (qq1); q1.Add (qq2); q1.Add (qq3); Query q2 = new TermQuery (new Term ("text", term)); q2.Boost = 3f; Query q3 = new TermQuery (new Term ("examples", term)); q3.Boost = 3f; DisjunctionMaxQuery q = new DisjunctionMaxQuery (0f); q.Add (q1); q.Add (q2); q.Add (q3); TopDocs top = SearchInternal (q, count, start); Result r = new Result (term, searcher, top.ScoreDocs); Results.Add (r); return r; } catch (IOException) { Console.WriteLine ("No index in {0}", dir); return null; } }
public virtual void Test2() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)); AddDoc("LANGE", writer); AddDoc("LUETH", writer); AddDoc("PIRSING", writer); AddDoc("RIEGEL", writer); AddDoc("TRZECZIAK", writer); AddDoc("WALKER", writer); AddDoc("WBR", writer); AddDoc("WE", writer); AddDoc("WEB", writer); AddDoc("WEBE", writer); AddDoc("WEBER", writer); AddDoc("WEBERE", writer); AddDoc("WEBREE", writer); AddDoc("WEBEREI", writer); AddDoc("WBRE", writer); AddDoc("WITTKOPF", writer); AddDoc("WOJNAROWSKI", writer); AddDoc("WRICKE", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1); //query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(8, hits.Length); reader.Dispose(); directory.Dispose(); }
public virtual void TestTokenLengthOpt() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddDoc("12345678911", writer); AddDoc("segment", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); Query query; // term not over 10 chars, so optimization shortcuts query = new FuzzyQuery(new Term("field", "1234569"), 0.9f); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // 10 chars, so no optimization query = new FuzzyQuery(new Term("field", "1234567891"), 0.9f); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // over 10 chars, so no optimization query = new FuzzyQuery(new Term("field", "12345678911"), 0.9f); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // over 10 chars, no match query = new FuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); }
public virtual void TestBoostOnlyRewrite() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); AddDoc("Lucene", writer); AddDoc("Lucene", writer); AddDoc("Lucenne", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); FuzzyQuery query = new FuzzyQuery(new Term("field", "lucene")); query.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50)); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // normally, 'Lucenne' would be the first result as IDF will skew the score. Assert.AreEqual("Lucene", reader.Document(hits[0].Doc).Get("field")); Assert.AreEqual("Lucene", reader.Document(hits[1].Doc).Get("field")); Assert.AreEqual("Lucenne", reader.Document(hits[2].Doc).Get("field")); reader.Dispose(); directory.Dispose(); }
public void FuzzyQueryTest() { string titulo = "titulo"; string texto = "texto"; using (var diretorio = new RAMDirectory()) { IndexarArquivosEmDocumento(diretorio, new Field[] { new Field(titulo, "fuzzy", Field.Store.YES, Field.Index.ANALYZED), new Field(titulo, "wuzzy", Field.Store.YES, Field.Index.ANALYZED) }); using (var searcher = new IndexSearcher(diretorio, true)) { var query = new FuzzyQuery(new Term(titulo, "wuzza")); var matches = searcher.Search(query, 10); Assert.AreEqual(2, matches.TotalHits, "both close enough"); Assert.IsTrue(matches.ScoreDocs[0].Score != matches.ScoreDocs[1].Score, "wuzzy closer then fuzzy"); var doc = searcher.Doc(matches.ScoreDocs[0].Doc); Assert.AreEqual("wuzzy", doc.Get(titulo), "wazza bear"); } } }
public virtual void TestBoostOnlyRewrite() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); AddDoc("Lucene", writer); AddDoc("Lucene", writer); AddDoc("Lucenne", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); FuzzyQuery query = new FuzzyQuery(new Term("field", "lucene")); query.MultiTermRewriteMethod = (new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50)); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // normally, 'Lucenne' would be the first result as IDF will skew the score. Assert.AreEqual("Lucene", reader.Document(hits[0].Doc).Get("field")); Assert.AreEqual("Lucene", reader.Document(hits[1].Doc).Get("field")); Assert.AreEqual("Lucenne", reader.Document(hits[2].Doc).Get("field")); reader.Dispose(); directory.Dispose(); }
public override bool Equals(System.Object o) { if (this == o) { return(true); } if (!(o is FuzzyQuery)) { return(false); } if (!base.Equals(o)) { return(false); } FuzzyQuery fuzzyQuery = (FuzzyQuery)o; if (minimumSimilarity != fuzzyQuery.minimumSimilarity) { return(false); } if (prefixLength != fuzzyQuery.prefixLength) { return(false); } return(true); }
public override Query VisitFuzzyQuery(FuzzyQuery fuzzyq) { _dump.Append("FuzzyQ("); var q = base.VisitFuzzyQuery(fuzzyq); var fq = q as FuzzyQuery; if (fq != null) { _dump.Append(", minSimilarity:"); _dump.Append(fq.GetMinSimilarity()); } _dump.Append(BoostToString(q)); _dump.Append(")"); return q; }
public override bool Equals(object obj) { if (this == obj) { return(true); } if (!base.Equals(obj)) { return(false); } if (this.GetType() != obj.GetType()) { return(false); } FuzzyQuery other = (FuzzyQuery)obj; if (maxEdits != other.maxEdits) { return(false); } if (prefixLength != other.prefixLength) { return(false); } if (maxExpansions != other.maxExpansions) { return(false); } if (transpositions != other.transpositions) { return(false); } if (term == null) { if (other.term != null) { return(false); } } else if (!term.Equals(other.term)) { return(false); } return(true); }
public virtual void TestTieBreaker() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); AddDoc("a123456", writer); AddDoc("c123456", writer); AddDoc("d123456", writer); AddDoc("e123456", writer); Directory directory2 = NewDirectory(); RandomIndexWriter writer2 = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory2); AddDoc("a123456", writer2); AddDoc("b123456", writer2); AddDoc("b123456", writer2); AddDoc("b123456", writer2); AddDoc("c123456", writer2); AddDoc("f123456", writer2); IndexReader ir1 = writer.GetReader(); IndexReader ir2 = writer2.GetReader(); MultiReader mr = new MultiReader(ir1, ir2); IndexSearcher searcher = NewSearcher(mr); FuzzyQuery fq = new FuzzyQuery(new Term("field", "z123456"), 1, 0, 2, false); TopDocs docs = searcher.Search(fq, 2); Assert.AreEqual(5, docs.TotalHits); // 5 docs, from the a and b's mr.Dispose(); ir1.Dispose(); ir2.Dispose(); writer.Dispose(); writer2.Dispose(); directory.Dispose(); directory2.Dispose(); }
public override bool Equals(object obj) { if (this == obj) { return(true); } if (!base.Equals(obj)) { return(false); } if (this.GetType() != obj.GetType()) { return(false); } FuzzyQuery other = (FuzzyQuery)obj; if (MaxEdits_Renamed != other.MaxEdits_Renamed) { return(false); } if (PrefixLength_Renamed != other.PrefixLength_Renamed) { return(false); } if (MaxExpansions != other.MaxExpansions) { return(false); } if (Transpositions_Renamed != other.Transpositions_Renamed) { return(false); } if (_term == null) { if (other._term != null) { return(false); } } else if (!_term.Equals(other._term)) { return(false); } return(true); }
public IEnumerable<string> Search(string searchQuery) { const int Hits_Limit = 5; var timer = System.Diagnostics.Stopwatch.StartNew(); var query = new FuzzyQuery(new Term("word", searchQuery), 0.5f); var docs = SearchHandle.Searcher.Search(query, null, Hits_Limit, Sort.RELEVANCE); foreach (var hit in docs.ScoreDocs) { var doc = SearchHandle.Searcher.Doc(hit.Doc); yield return doc.Get("word"); } timer.Stop(); var elapsed = timer.Elapsed; }
public virtual void TestDistanceAsEditsSearching() { Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, index); AddDoc("foobar", w); AddDoc("test", w); AddDoc("working", w); IndexReader reader = w.GetReader(); IndexSearcher searcher = NewSearcher(reader); w.Dispose(); FuzzyQuery q = new FuzzyQuery(new Term("field", "fouba"), 2); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field")); q = new FuzzyQuery(new Term("field", "foubara"), 2); hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field")); try { q = new FuzzyQuery(new Term("field", "t"), 3); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException expected) #pragma warning restore 168 { // expected } reader.Dispose(); index.Dispose(); }
public virtual void TestGiga() { MockAnalyzer analyzer = new MockAnalyzer(Random); Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, index); AddDoc("Lucene in Action", w); AddDoc("Lucene for Dummies", w); //addDoc("Giga", w); AddDoc("Giga byte", w); AddDoc("ManagingGigabytesManagingGigabyte", w); AddDoc("ManagingGigabytesManagingGigabytes", w); AddDoc("The Art of Computer Science", w); AddDoc("J. K. Rowling", w); AddDoc("JK Rowling", w); AddDoc("Joanne K Roling", w); AddDoc("Bruce Willis", w); AddDoc("Willis bruce", w); AddDoc("Brute willis", w); AddDoc("B. willis", w); IndexReader r = w.GetReader(); w.Dispose(); Query q = new FuzzyQuery(new Term("field", "giga"), 0); // 3. search IndexSearcher searcher = NewSearcher(r); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("Giga byte", searcher.Doc(hits[0].Doc).Get("field")); r.Dispose(); index.Dispose(); }
public virtual void Test2() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)); AddDoc("LANGE", writer); AddDoc("LUETH", writer); AddDoc("PIRSING", writer); AddDoc("RIEGEL", writer); AddDoc("TRZECZIAK", writer); AddDoc("WALKER", writer); AddDoc("WBR", writer); AddDoc("WE", writer); AddDoc("WEB", writer); AddDoc("WEBE", writer); AddDoc("WEBER", writer); AddDoc("WEBERE", writer); AddDoc("WEBREE", writer); AddDoc("WEBEREI", writer); AddDoc("WBRE", writer); AddDoc("WITTKOPF", writer); AddDoc("WOJNAROWSKI", writer); AddDoc("WRICKE", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1); //query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(8, hits.Length); reader.Dispose(); directory.Dispose(); }
public virtual void TestEquals() { WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a")); WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a")); WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a")); // reflexive? Assert.AreEqual(wq1, wq2); Assert.AreEqual(wq2, wq1); // transitive? Assert.AreEqual(wq2, wq3); Assert.AreEqual(wq1, wq3); Assert.IsFalse(wq1.Equals(null)); FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a")); Assert.IsFalse(wq1.Equals(fq)); Assert.IsFalse(fq.Equals(wq1)); }
public virtual void TestDistanceAsEditsSearching() { Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, index); AddDoc("foobar", w); AddDoc("test", w); AddDoc("working", w); IndexReader reader = w.GetReader(); IndexSearcher searcher = NewSearcher(reader); w.Dispose(); FuzzyQuery q = new FuzzyQuery(new Term("field", "fouba"), 2); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field")); q = new FuzzyQuery(new Term("field", "foubara"), 2); hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field")); try { q = new FuzzyQuery(new Term("field", "t"), 3); Assert.Fail(); } catch (ArgumentOutOfRangeException) // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) { // expected } reader.Dispose(); index.Dispose(); }
public virtual void TestEquals() { WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a")); WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a")); WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a")); // reflexive? Assert.AreEqual(wq1, wq2); Assert.AreEqual(wq2, wq1); // transitive? Assert.AreEqual(wq2, wq3); Assert.AreEqual(wq1, wq3); Assert.IsFalse(wq1.Equals(null)); FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a")); Assert.IsFalse(wq1.Equals(fq)); Assert.IsFalse(fq.Equals(wq1)); }
public string DidYouMean(string pattern) { try { IndexSearcher searcher = new IndexSearcher(m_HistoryPath); Term t = new Term(Constants.SearchedText, pattern); FuzzyQuery query = new FuzzyQuery(t); Hits hits = searcher.Search(query); if (hits.Length() != 0) return hits.Doc(0).Get(Constants.SearchedText); else return ""; } catch (Exception) { return ""; } }
public virtual void TestTieBreaker() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); AddDoc("a123456", writer); AddDoc("c123456", writer); AddDoc("d123456", writer); AddDoc("e123456", writer); Directory directory2 = NewDirectory(); RandomIndexWriter writer2 = new RandomIndexWriter(Random(), directory2, Similarity, TimeZone); AddDoc("a123456", writer2); AddDoc("b123456", writer2); AddDoc("b123456", writer2); AddDoc("b123456", writer2); AddDoc("c123456", writer2); AddDoc("f123456", writer2); IndexReader ir1 = writer.Reader; IndexReader ir2 = writer2.Reader; MultiReader mr = new MultiReader(ir1, ir2); IndexSearcher searcher = NewSearcher(mr); FuzzyQuery fq = new FuzzyQuery(new Term("field", "z123456"), 1, 0, 2, false); TopDocs docs = searcher.Search(fq, 2); Assert.AreEqual(5, docs.TotalHits); // 5 docs, from the a and b's mr.Dispose(); ir1.Dispose(); ir2.Dispose(); writer.Dispose(); writer2.Dispose(); directory.Dispose(); directory2.Dispose(); }
public virtual void TestDistanceAsEditsSearching() { Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), index); AddDoc("foobar", w); AddDoc("test", w); AddDoc("working", w); IndexReader reader = w.Reader; IndexSearcher searcher = NewSearcher(reader); w.Dispose(); FuzzyQuery q = new FuzzyQuery(new Term("field", "fouba"), 2); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field")); q = new FuzzyQuery(new Term("field", "foubara"), 2); hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field")); try { q = new FuzzyQuery(new Term("field", "t"), 3); Assert.Fail(); } catch (System.ArgumentException expected) { // expected } reader.Dispose(); index.Dispose(); }
public override bool Equals(System.Object obj) { if (this == obj) { return(true); } if (!base.Equals(obj)) { return(false); } if (GetType() != obj.GetType()) { return(false); } FuzzyQuery other = (FuzzyQuery)obj; if (BitConverter.ToInt32(BitConverter.GetBytes(minimumSimilarity), 0) != BitConverter.ToInt32(BitConverter.GetBytes(other.minimumSimilarity), 0)) { return(false); } if (prefixLength != other.prefixLength) { return(false); } if (Term == null) { if (other.Term != null) { return(false); } } else if (!Term.Equals(other.Term)) { return(false); } return(true); }
public virtual void TestFuzziness() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, directory); AddDoc("aaaaa", writer); AddDoc("aaaab", writer); AddDoc("aaabb", writer); AddDoc("aabbb", writer); AddDoc("abbbb", writer); AddDoc("bbbbb", writer); AddDoc("ddddd", writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // same with prefix query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 6); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // test scoring query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "3 documents should match"); IList <string> order = new List <string> { "bbbbb", "abbbb", "aabbb" }; for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].Score); Assert.AreEqual(order[i], term); } // test pq size by supplying maxExpansions=2 // this query would normally return 3 documents, because 3 terms match (see above): query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0, 2, false); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "only 2 documents should match"); order = new List <string> { "bbbbb", "abbbb" }; for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].Score); Assert.AreEqual(order[i], term); } // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.DefaultMaxEdits, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query identical to a word in the index: query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); // default allows for up to two edits: Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); // now with prefix query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // different field = no match: query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); reader.Dispose(); directory.Dispose(); }
private void search() { DateTime start = DateTime.Now; // create the result DataTable this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); this.Results.Columns.Add("url", typeof(string)); this.Results.Columns.Add("Type", typeof(string)); // create the searcher // index is placed in "index" subdirectory string indexDirectory = Server.MapPath("~/App_Data/index"); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // List<string> STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>(); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); BooleanQuery bquery = new BooleanQuery(); //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer); List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" }; List<string> Projects = new List<string>(); if (Session["ProjectList"] != null) { Projects = (List<string>)Session["ProjectList"]; } List<string> allType = new List<string> { "A", "B", "C" }; if (this.Request.QueryString["Page"] != null) { if (allType.Contains(this.Request.QueryString["Page"].ToString())) { allType.Remove(this.Request.QueryString["Page"]); foreach (string type in allType) { TermQuery termq1 = new TermQuery(new Term("EXTPRP", type)); bquery.Add(termq1, Occur.MUST_NOT); FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0); bquery.Add(termq, Occur.MUST_NOT); } } } //Query query = parser.Parse(this.Query); //foreach (string term in SearchTerm) //{ // if (term == "title") // { // TermQuery termq = new TermQuery(new Term(term, this.Query)); // termq.Boost = 50f; // bquery.Add(termq, Occur.SHOULD); // } // else // { // TermQuery termq = new TermQuery(new Term(term, this.Query)); // termq.Boost = 5f; // bquery.Add(termq, Occur.SHOULD); // } //} foreach (string term in SearchTerm) { if (term == "title") { TermQuery termq = new TermQuery(new Term(term, this.Query)); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0); termq.Boost = 0.1f; bquery.Add(termq, Occur.SHOULD); } } //foreach (string project in Projects) //{ // TermQuery termq1 = new TermQuery(new Term("Project", project)); // bquery.Add(termq1, Occur.MUST_NOT); //} //foreach (string project in Projects.Distinct()) //{ // TermQuery termq1 = new TermQuery(new Term("path", project)); // bquery.Add(termq1, Occur.MUST); // FuzzyQuery termq = new FuzzyQuery(new Term("path", project), 0.5f, 0); // bquery.Add(termq, Occur.MUST); //} //bquery.Add(new TermQuery(new Term("Project", "DEV")), Occur.SHOULD); //List<ScoreDoc> TempArrList = new List<ScoreDoc>(); TopDocs hits = searcher.Search(bquery, null, 10000); //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore); //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0); //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray(); if (Projects.Count() != 0) { hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")))).Distinct().ToArray(); } //foreach (string project in Projects.Distinct()) //{ // //hits.ScoreDocs = hits.ScoreDocs.Where(obj => Regex.IsMatch(searcher.Doc(obj.Doc).Get("path").Replace(@"\", @"\\"), @".*" + project.Replace(@"\", @"\\") + ".*")).ToArray(); // string s = Path.GetDirectoryName("\\SAGITEC-1629\\Soogle\\CARS\\bhagyashree.txt"); // hits.ScoreDocs = hits.ScoreDocs.Where(obj => Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path")).Contains(project)).ToArray(); //} this.total = hits.ScoreDocs.Count(); this.startAt = InitStartAt(); int resultsCount = Math.Min(total, this.maxResults + this.startAt); // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(200); QueryScorer scorer = new QueryScorer(bquery); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; int j = 0; for (int i = startAt; i < resultsCount; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String path = doc.Get("path"); string getExtension = doc.Get("Extension"); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = ""; try { string document = doc.Get("text"); if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp") { sample = ""; } else { sample = highlighter.GetBestFragment(stream, document);//, 2, "..."); } } catch (Exception ex) { } // create a new row with the result data DataRow row = this.Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", ""); row["url"] = "http://sagitec-1629/KNBASE/" + path.Replace(@"\", "/").Replace("//SAGITEC-1629/Soogle/", ""); row["sample"] = sample; if (path.Contains('.')) { row["Type"] = GetMIMEType(path); } //if (!Projects.Contains(doc.Get("Project")) || !allType.Contains(doc.Get("EXTPRP"))) //{ this.Results.Rows.Add(row); //} j++; } Repeater1.DataSource = Results; Repeater1.DataBind(); searcher.Dispose(); // result information this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = Math.Min(startAt + maxResults, total); }
public override Query VisitFuzzyQuery(FuzzyQuery fuzzyq) { var term = fuzzyq.GetTerm(); var visited = VisitTerm(term); if (term == visited) return fuzzyq; if (visited == null) return null; return new FuzzyQuery(visited); }
public virtual void TestDistanceAsEditsSearching() { Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), index); AddDoc("foobar", w); AddDoc("test", w); AddDoc("working", w); IndexReader reader = w.Reader; IndexSearcher searcher = NewSearcher(reader); w.Dispose(); FuzzyQuery q = new FuzzyQuery(new Term("field", "fouba"), 2); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field")); q = new FuzzyQuery(new Term("field", "foubara"), 2); hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("foobar", searcher.Doc(hits[0].Doc).Get("field")); try { q = new FuzzyQuery(new Term("field", "t"), 3); Assert.Fail(); } catch (System.ArgumentException expected) { // expected } reader.Dispose(); index.Dispose(); }
public virtual void TestBoostOnlyRewrite() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory); AddDoc("Lucene", writer); AddDoc("Lucene", writer); AddDoc("Lucenne", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); FuzzyQuery query = new FuzzyQuery(new Term("field", "lucene")); query.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50)); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // normally, 'Lucenne' would be the first result as IDF will skew the score. Assert.AreEqual("Lucene", reader.Document(hits[0].Doc).Get("field")); Assert.AreEqual("Lucene", reader.Document(hits[1].Doc).Get("field")); Assert.AreEqual("Lucenne", reader.Document(hits[2].Doc).Get("field")); reader.Dispose(); directory.Dispose(); }
protected override ParameterizedSql BuildQuery(FuzzyQuery query) { // FuzzyQuery are to be treated as TermQueries. No actual fuzzy search. return BuildQuery(new TermQuery(new Term(FieldPlaceholder, query.Term.Text))); }
private List<int> SearchManualField(string field, string search, List<int> manualIds) { IndexReader reader = IndexReader.Open(directoryTemp, true); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer); var query = new FuzzyQuery(new Term(field, search), 0.45f); TopScoreDocCollector collector = TopScoreDocCollector.Create(100, true); searcher.Search(query, collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; foreach (ScoreDoc scoreDoc in hits) { Document document = searcher.Doc(scoreDoc.Doc); int manualId = int.Parse(document.Get("Id")); if (!manualIds.Contains(manualId)) { manualIds.Add(manualId); } } reader.Dispose(); searcher.Dispose(); analyzer.Close(); return manualIds; }
public virtual void TestFuzzinessLong() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddDoc("aaaaaaa", writer); AddDoc("segment", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); FuzzyQuery query; // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // edit distance to "aaaaaaa" = 3, this matches because the string is longer than // in testDefaultFuzziness so a bigger difference is allowed: query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaaaa")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaaaa")); query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaaaa")); query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // no match, more than half of the characters is wrong: query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // now with prefix query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // "student" and "stellent" are indeed similar to "segment" by default: query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // now with prefix query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // "student" doesn't match anymore thanks to increased minimum similarity: query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); try { query = new FuzzyQuery(new Term("field", "student"), 1.1f); Assert.Fail("Expected IllegalArgumentException"); } catch (System.ArgumentException e) { // expecting exception } try { query = new FuzzyQuery(new Term("field", "student"), -0.1f); Assert.Fail("Expected IllegalArgumentException"); } catch (System.ArgumentException e) { // expecting exception } searcher.Close(); directory.Close(); }
//******************************************************************************************************************************** private void search() { if (TextBoxQuery.Text != "") { DateTime start = DateTime.Now; // create the result DataTable this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); this.Results.Columns.Add("url", typeof(string)); this.Results.Columns.Add("Type", typeof(string)); // create the searcher // index is placed in "index" subdirectory string indexDirectory = Server.MapPath(IndexDirPath); var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); // List<string> STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS_SET.ToList<string>(); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory)); BooleanQuery bquery = new BooleanQuery(); //var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", analyzer); List<string> SearchTerm = new List<string> { "text", "path", "title", "Extension", "EXTPRP" }; List<string> Projects = new List<string>(); if (Session["ProjectList"] != null) { Projects = (List<string>)Session["ProjectList"]; } List<string> allType = null; if (hnkClickLink.Value == "") { allType = new List<string>(); } else { allType = new List<string> { "Doc", "Code", "Images", "Other" }; } if (this.Request.QueryString["Page"] != null) { if (allType.Contains(Convert.ToString(hnkClickLink.Value))) { allType.Remove(Convert.ToString(hnkClickLink.Value)); foreach (string type in allType) { TermQuery termq1 = new TermQuery(new Term("EXTPRP", type)); bquery.Add(termq1, Occur.MUST_NOT); FuzzyQuery termq = new FuzzyQuery(new Term("EXTPRP", type), 0.5f, 0); bquery.Add(termq, Occur.MUST_NOT); } } } //Query query = parser.Parse(this.Query); foreach (string term in SearchTerm) { if (term == "title") { TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { TermQuery termq = new TermQuery(new Term(term, this.Query.ToLower())); termq.Boost = 0.1f; bquery.Add(termq, Occur.SHOULD); } } foreach (string term in SearchTerm) { if (this.Query.Contains(".")) { string SearchKeyword = this.Query.Replace(".", ""); if (term == "Extension") { TermQuery termq = new TermQuery(new Term(term, SearchKeyword.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } } else { if (term == "title") { FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query.ToLower())); termq.Boost = 5f; bquery.Add(termq, Occur.SHOULD); } else { //FuzzyQuery termq = new FuzzyQuery(new Term(term, this.Query), 0.5f, 0); //termq.Boost = 0.1f; //bquery.Add(termq, Occur.SHOULD); } } } TopDocs hits = searcher.Search(bquery, null, 10000); //TopDocs hits = new TopDocs(TempArrList.Count(), TempArrList.ToArray(), hitsWithText.MaxScore); //hits.ScoreDocs.CopyTo(hits.ScoreDocs, 0); //hits.ScoreDocs = hits.ScoreDocs.OrderBy(obj => searcher.Doc(obj.Doc).Get("path")).ToArray(); if (Projects.Count() != 0) { hits.ScoreDocs = hits.ScoreDocs.Where(obj => Projects.Contains(SplitPath(Path.GetDirectoryName(searcher.Doc(obj.Doc).Get("path"))))).Distinct().ToArray(); } this.total = hits.ScoreDocs.Count(); this.startAt = InitStartAt(); int resultsCount = Math.Min(total, this.maxResults + this.startAt); // create highlighter IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;background-color:yellow;\">", "</span>"); SimpleFragmenter fragmenter = new SimpleFragmenter(200); QueryScorer scorer = new QueryScorer(bquery); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.TextFragmenter = fragmenter; //highlighter.MaxDocCharsToAnalyze=200; //for (int i = startAt; i < resultsCount; i++) //{ // Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); // String path = doc.Get("path"); // string getExtension = doc.Get("Extension"); // TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); // String sample = ""; // try // { // string document = doc.Get("text"); // if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg") // { // sample = ""; // } // else // { // string outp = highlighter.GetBestFragment(stream, document); // if (outp != null) // sample = ReplaceSpecialChar(outp.Trim()); //, 2, "..."); // else // sample = Limit(doc.Get("text").Trim(), 200); // } // } // catch (Exception ex) // { // } // // create a new row with the result data // DataRow row = this.Results.NewRow(); // row["title"] = doc.Get("title"); // row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); // row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); // row["sample"] = sample; // if (path.Contains('.')) // { // row["Type"] = GetMIMEType(path); // } // this.Results.Rows.Add(row); //} for (int i = 0; i < this.total; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String path = doc.Get("path"); string getExtension = doc.Get("Extension"); TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text"))); String sample = ""; try { string document = doc.Get("text"); if (getExtension.ToLower() == ".png" || getExtension.ToLower() == ".jpg" || getExtension.ToLower() == ".gif" || getExtension.ToLower() == ".bmp" || getExtension.ToLower() == ".jpeg") { sample = ""; } else { string outp = highlighter.GetBestFragment(stream, document); if (outp != null) sample = Limit(outp.Trim(), 200); //, 2, "..."); else sample = Limit(doc.Get("text").Trim(), 200); } } catch (Exception ex) { } // create a new row with the result data DataRow row = Results.NewRow(); row["title"] = doc.Get("title"); row["path"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); row["url"] = ApplicationPath + path.Replace(@"\", "/").Replace(VirtualPath, ""); row["sample"] = sample; if (path.Contains('.')) { row["Type"] = GetMIMEType(path); } Results.Rows.Add(row); } //****************************** Logic for Paging for Repeater Control**************************************** PagedDataSource pgitems = new PagedDataSource(); DataView dv = new DataView(Results); pgitems.DataSource = dv; pgitems.AllowPaging = true; pgitems.PageSize = 10;//You can set the number of items here using some logic. pgitems.CurrentPageIndex = PageNumber; btnPrev.Visible = !pgitems.IsFirstPage; btnNext.Visible = !pgitems.IsLastPage; if (pgitems.PageCount > 1) { rptPages.Visible = true; ArrayList pages = new ArrayList(); for (int i = PageNumber; i < 5 + PageNumber; i++) pages.Add((i + 1).ToString()); rptPages.DataSource = pages; rptPages.DataBind(); } else rptPages.Visible = false; Repeater1.DataSource = pgitems; Repeater1.DataBind(); //************************************************************************************************************* //Repeater1.DataSource = Results; //Repeater1.DataBind(); searcher.Dispose(); // result information this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = Math.Min(startAt + maxResults, total); } }
private static Query BuildTermQuery(string termQuery) { Query mainQuery; if (termQuery.Length < 3) mainQuery = new PrefixQuery(new Term("name", termQuery)); else { BooleanQuery boolQuery = new BooleanQuery(); var fuzzQuery = new FuzzyQuery(new Term("name", termQuery)); var prefQuery = new PrefixQuery(new Term("name", termQuery)); boolQuery.Add(prefQuery, BooleanClause.Occur.SHOULD); boolQuery.Add(fuzzQuery, BooleanClause.Occur.SHOULD); mainQuery = boolQuery; //first implemetation //mainQuery = fuzzQuery; } return mainQuery; }
public virtual void TestTokenLengthOpt() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddDoc("12345678911", writer); AddDoc("segment", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); Query query; // term not over 10 chars, so optimization shortcuts query = new FuzzyQuery(new Term("field", "1234569"), 0.9f); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // 10 chars, so no optimization query = new FuzzyQuery(new Term("field", "1234567891"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // over 10 chars, so no optimization query = new FuzzyQuery(new Term("field", "12345678911"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // over 10 chars, no match query = new FuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); }
public virtual void TestFuzzinessLong() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddDoc("aaaaaaa", writer); AddDoc("segment", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); FuzzyQuery query; // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // edit distance to "aaaaaaa" = 3, this matches because the string is longer than // in testDefaultFuzziness so a bigger difference is allowed: query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa")); query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // no match, more than half of the characters is wrong: query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // now with prefix query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // "student" and "stellent" are indeed similar to "segment" by default: query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // now with prefix query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // "student" doesn't match anymore thanks to increased minimum similarity: query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); Assert.Throws<ArgumentException>(() => new FuzzyQuery(new Term("field", "student"), 1.1f), "Expected ArgumentException"); Assert.Throws<ArgumentException>(() => new FuzzyQuery(new Term("field", "student"), -0.1f), "Expected ArgumentException"); searcher.Close(); directory.Close(); }
private static void MultiTermQuery(FuzzyQuery query, AzureQueryLogger.IndentedTextWriter writer) { writer.WriteLine("MinSimilarity: {0}", (object)query.MinSimilarity); writer.WriteLine("PrefixLength: {0}", (object)query.PrefixLength); AzureQueryLogger.VisitTerm(query.Term, "Fuzzy Term", writer); }
/// <summary> /// Sets up and adds a fuzzy query object allowing the search for an explcit term in the field /// </summary> /// <param name="fieldName">The field name to search within</param> /// <param name="fieldValue">The value to match</param> /// <param name="occur">Whether it must, must not or should occur in the field</param> /// <param name="boost">A boost multiplier (1 is default / normal).</param> /// <param name="key">The dictionary key to allow reference beyond the initial scope</param> /// <param name="caseSensitive">A boolean denoting whether or not to retain case</param> /// <returns>The generated fuzzy query object</returns> public virtual FuzzyQuery Fuzzy(string fieldName, string fieldValue, BooleanClause.Occur occur = null, float? boost = null, string key = null, bool? caseSensitive = null) { Term term = GetTerm(fieldName, fieldValue, caseSensitive); FuzzyQuery query = new FuzzyQuery(term); SetBoostValue(query, boost); Add(query, occur, key); return query; }
public virtual void TestFuzziness() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddDoc("aaaaa", writer); AddDoc("aaaab", writer); AddDoc("aaabb", writer); AddDoc("aabbb", writer); AddDoc("abbbb", writer); AddDoc("bbbbb", writer); AddDoc("ddddd", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); // same with prefix query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // query identical to a word in the index: query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa")); // default allows for up to two edits: Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].doc).Get("field"), ("aaaab")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd")); // now with prefix query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); // different field = no match: query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); searcher.Close(); directory.Close(); }
public virtual void TestFuzziness() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddDoc("aaaaa", writer); AddDoc("aaaab", writer); AddDoc("aaabb", writer); AddDoc("aabbb", writer); AddDoc("abbbb", writer); AddDoc("bbbbb", writer); AddDoc("ddddd", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // same with prefix query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // test scoring query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "3 documents should match"); List <String> order = new List <string>(new[] { "bbbbb", "abbbb", "aabbb" }); for (int i = 0; i < hits.Length; i++) { String term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); Assert.AreEqual(order[i], term); } // test BooleanQuery.maxClauseCount int savedClauseCount = BooleanQuery.MaxClauseCount; try { BooleanQuery.MaxClauseCount = 2; // This query would normally return 3 documents, because 3 terms match (see above): query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "only 2 documents should match"); order = new List <string>(new[] { "bbbbb", "abbbb" }); for (int i = 0; i < hits.Length; i++) { String term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); Assert.AreEqual(order[i], term); } } finally { BooleanQuery.MaxClauseCount = savedClauseCount; } // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query identical to a word in the index: query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); // default allows for up to two edits: Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); // now with prefix query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // different field = no match: query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); searcher.Close(); directory.Close(); }
public virtual void TestFuzzy() { Query regular = new TermQuery(new Term("field", "foobar")); Query expected = new FuzzyQuery(new Term("field", "foobar"), 2); assertEquals(expected, Parse("foobar~2")); assertEquals(regular, Parse("foobar~")); assertEquals(regular, Parse("foobar~a")); assertEquals(regular, Parse("foobar~1a")); BooleanQuery @bool = new BooleanQuery(); FuzzyQuery fuzzy = new FuzzyQuery(new Term("field", "foo"), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); @bool.Add(fuzzy, BooleanClause.Occur.MUST); @bool.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST); assertEquals(@bool, Parse("foo~" + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + 1 + " bar")); }
public virtual void TestTieBreaker() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory); AddDoc("a123456", writer); AddDoc("c123456", writer); AddDoc("d123456", writer); AddDoc("e123456", writer); Directory directory2 = NewDirectory(); RandomIndexWriter writer2 = new RandomIndexWriter(Random(), directory2); AddDoc("a123456", writer2); AddDoc("b123456", writer2); AddDoc("b123456", writer2); AddDoc("b123456", writer2); AddDoc("c123456", writer2); AddDoc("f123456", writer2); IndexReader ir1 = writer.Reader; IndexReader ir2 = writer2.Reader; MultiReader mr = new MultiReader(ir1, ir2); IndexSearcher searcher = NewSearcher(mr); FuzzyQuery fq = new FuzzyQuery(new Term("field", "z123456"), 1, 0, 2, false); TopDocs docs = searcher.Search(fq, 2); Assert.AreEqual(5, docs.TotalHits); // 5 docs, from the a and b's mr.Dispose(); ir1.Dispose(); ir2.Dispose(); writer.Dispose(); writer2.Dispose(); directory.Dispose(); directory2.Dispose(); }
public virtual void TestFuzziness() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); AddDoc("aaaaa", writer); AddDoc("aaaab", writer); AddDoc("aaabb", writer); AddDoc("aabbb", writer); AddDoc("abbbb", writer); AddDoc("bbbbb", writer); AddDoc("ddddd", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); Hits hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); // same with prefix query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query); Assert.AreEqual(2, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // query identical to a word in the index: query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); // default allows for up to two edits: Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); // query similar to a word in the index: query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); Assert.AreEqual(hits.Doc(2).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query); Assert.AreEqual(2, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaa")); Assert.AreEqual(hits.Doc(1).Get("field"), ("aaaab")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); // now with prefix query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // different field = no match: query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); searcher.Close(); directory.Close(); }
public virtual void TestGiga() { MockAnalyzer analyzer = new MockAnalyzer(Random()); Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), index); AddDoc("Lucene in Action", w); AddDoc("Lucene for Dummies", w); //addDoc("Giga", w); AddDoc("Giga byte", w); AddDoc("ManagingGigabytesManagingGigabyte", w); AddDoc("ManagingGigabytesManagingGigabytes", w); AddDoc("The Art of Computer Science", w); AddDoc("J. K. Rowling", w); AddDoc("JK Rowling", w); AddDoc("Joanne K Roling", w); AddDoc("Bruce Willis", w); AddDoc("Willis bruce", w); AddDoc("Brute willis", w); AddDoc("B. willis", w); IndexReader r = w.Reader; w.Dispose(); Query q = new FuzzyQuery(new Term("field", "giga"), 0); // 3. search IndexSearcher searcher = NewSearcher(r); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("Giga byte", searcher.Doc(hits[0].Doc).Get("field")); r.Dispose(); index.Dispose(); }
public virtual void TestFuzzinessLong() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); AddDoc("aaaaaaa", writer); AddDoc("segment", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); FuzzyQuery query; // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); Hits hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // edit distance to "aaaaaaa" = 3, this matches because the string is longer than // in testDefaultFuzziness so a bigger difference is allowed: query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaaaa")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaaaa")); query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); Assert.AreEqual(hits.Doc(0).Get("field"), ("aaaaaaa")); query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // no match, more than half of the characters is wrong: query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // now with prefix query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // "student" and "stellent" are indeed similar to "segment" by default: query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); // now with prefix query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); // "student" doesn't match anymore thanks to increased minimum similarity: query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); try { query = new FuzzyQuery(new Term("field", "student"), 1.1f); Assert.Fail("Expected IllegalArgumentException"); } catch (System.ArgumentException) { // expecting exception } try { query = new FuzzyQuery(new Term("field", "student"), - 0.1f); Assert.Fail("Expected IllegalArgumentException"); } catch (System.ArgumentException) { // expecting exception } searcher.Close(); directory.Close(); }
public virtual void TestFuzziness() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory); AddDoc("aaaaa", writer); AddDoc("aaaab", writer); AddDoc("aaabb", writer); AddDoc("aabbb", writer); AddDoc("abbbb", writer); AddDoc("bbbbb", writer); AddDoc("ddddd", writer); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); writer.Dispose(); FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // same with prefix query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 6); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // test scoring query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "3 documents should match"); IList<string> order = Arrays.AsList("bbbbb", "abbbb", "aabbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].Score); Assert.AreEqual(order[i], term); } // test pq size by supplying maxExpansions=2 // this query would normally return 3 documents, because 3 terms match (see above): query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0, 2, false); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "only 2 documents should match"); order = Arrays.AsList("bbbbb", "abbbb"); for (int i = 0; i < hits.Length; i++) { string term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].Score); Assert.AreEqual(order[i], term); } // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.DefaultMaxEdits, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query identical to a word in the index: query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); // default allows for up to two edits: Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); // now with prefix query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // different field = no match: query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); reader.Dispose(); directory.Dispose(); }
protected abstract ParameterizedSql BuildQuery(FuzzyQuery query);
/// <summary> /// Factory method to generate a fuzzy query. /// </summary> protected virtual Query NewFuzzyQuery(string text, int fuzziness) { BooleanQuery bq = new BooleanQuery(true); foreach (var entry in weights) { Query q = new FuzzyQuery(new Term(entry.Key, text), fuzziness); if (q != null) { q.Boost = entry.Value; bq.Add(q, BooleanClause.Occur.SHOULD); } } return Simplify(bq); }
private ParameterizedSql BuildFuzzy(FuzzyQuery fuzzyQuery) { Term term = CopyTerm(fuzzyQuery.Term); if (term != null) { return BuildQuery(new FuzzyQuery(term, fuzzyQuery.MinSimilarity, fuzzyQuery.PrefixLength)); } return null; }
public virtual void TestFuzziness() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddDoc("aaaaa", writer); AddDoc("aaaab", writer); AddDoc("aaabb", writer); AddDoc("aabbb", writer); AddDoc("abbbb", writer); AddDoc("bbbbb", writer); AddDoc("ddddd", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // same with prefix query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // test scoring query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length, "3 documents should match"); List<String> order = new List<string>(new[] {"bbbbb", "abbbb", "aabbb"}); for (int i = 0; i < hits.Length; i++) { String term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); Assert.AreEqual(order[i], term); } // test BooleanQuery.maxClauseCount int savedClauseCount = BooleanQuery.MaxClauseCount; try { BooleanQuery.MaxClauseCount = 2; // This query would normally return 3 documents, because 3 terms match (see above): query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "only 2 documents should match"); order = new List<string>(new[] {"bbbbb", "abbbb"}); for (int i = 0; i < hits.Length; i++) { String term = searcher.Doc(hits[i].Doc).Get("field"); //System.out.println(hits[i].score); Assert.AreEqual(order[i], term); } } finally { BooleanQuery.MaxClauseCount = savedClauseCount; } // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query identical to a word in the index: query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); // default allows for up to two edits: Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // query similar to a word in the index: query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); // now with prefix query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa")); Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab")); query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); // now with prefix query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd")); query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // different field = no match: query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); searcher.Close(); directory.Close(); }
public static TopDocs Query(string keyword) { var indexSearcher = IndexManager.GenerateSearcher(); #region 生成Query语句 var field = new string[2]; field[0] = "fileName"; field[1] = "fileContent"; var boolQuery = new BooleanQuery(); //if (!string.IsNullOrEmpty(keyword)) //{ var keywordQuery = new BooleanQuery(); string queryKeyword = GetKeyWordsSplitBySpace(keyword, new PanGuTokenizer());//对关键字进行分词处理 #region 查询fileName var term = new Term(field[0], keyword ); var fuzzQuery = new FuzzyQuery(term); keywordQuery.Add(fuzzQuery, BooleanClause.Occur.SHOULD); #endregion #region 查询fileContent term = new Term(field[1], keyword ); fuzzQuery = new FuzzyQuery(term); keywordQuery.Add(fuzzQuery, BooleanClause.Occur.SHOULD); #endregion boolQuery.Add(keywordQuery, BooleanClause.Occur.MUST); //} #endregion return indexSearcher.Search(boolQuery, null, 1000); }