public override Query VisitPhraseQuery(PhraseQuery phraseq) { _dump.Append("PhraseQ("); var terms = phraseq.GetTerms(); PhraseQuery newQuery = null; int index = 0; int count = terms.Length; while (index < count) { var visitedTerm = VisitTerm(terms[index]); if (newQuery != null) { newQuery.Add(visitedTerm); } else if (visitedTerm != terms[index]) { newQuery = new PhraseQuery(); for (int i = 0; i < index; i++) newQuery.Add(terms[i]); newQuery.Add(visitedTerm); } index++; if (index < count) _dump.Append(", "); } _dump.Append(", Slop:").Append(phraseq.GetSlop()).Append(BoostToString(phraseq)).Append(")"); if (newQuery != null) return newQuery; return phraseq; }
public virtual void TestPhrase() { PhraseQuery query = new PhraseQuery(); query.Add(new Term("Field", "seventy")); query.Add(new Term("Field", "seven")); CheckHits(query, new int[]{77, 177, 277, 377, 477, 577, 677, 777, 877, 977}); }
public virtual void Test1() { BooleanQuery q = new BooleanQuery(); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Slop = 1; phraseQuery.Add(new Term(FIELD, "w1")); phraseQuery.Add(new Term(FIELD, "w2")); q.Add(phraseQuery, Occur.MUST); q.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true), Occur.SHOULD); q.Add(Snear(Sf("w3", 2), St("w2"), St("w3"), 5, true), Occur.SHOULD); Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")), new ItemizedFilter(new int[] { 1, 3 })); t.Boost = 1000; q.Add(t, Occur.SHOULD); t = new ConstantScoreQuery(new ItemizedFilter(new int[] { 0, 2 })); t.Boost = 30; q.Add(t, Occur.SHOULD); DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f); dm.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true)); dm.Add(new TermQuery(new Term(FIELD, "QQ"))); BooleanQuery xxYYZZ = new BooleanQuery(); xxYYZZ.Add(new TermQuery(new Term(FIELD, "xx")), Occur.SHOULD); xxYYZZ.Add(new TermQuery(new Term(FIELD, "yy")), Occur.SHOULD); xxYYZZ.Add(new TermQuery(new Term(FIELD, "zz")), Occur.MUST_NOT); dm.Add(xxYYZZ); BooleanQuery xxW1 = new BooleanQuery(); xxW1.Add(new TermQuery(new Term(FIELD, "xx")), Occur.MUST_NOT); xxW1.Add(new TermQuery(new Term(FIELD, "w1")), Occur.MUST_NOT); dm.Add(xxW1); DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f); dm2.Add(new TermQuery(new Term(FIELD, "w1"))); dm2.Add(new TermQuery(new Term(FIELD, "w2"))); dm2.Add(new TermQuery(new Term(FIELD, "w3"))); dm.Add(dm2); q.Add(dm, Occur.SHOULD); BooleanQuery b = new BooleanQuery(); b.MinimumNumberShouldMatch = 2; b.Add(Snear("w1", "w2", 1, true), Occur.SHOULD); b.Add(Snear("w2", "w3", 1, true), Occur.SHOULD); b.Add(Snear("w1", "w3", 3, true), Occur.SHOULD); q.Add(b, Occur.SHOULD); Qtest(q, new int[] { 0, 1, 2 }); }
public virtual void TestANDPhrase() { PhraseQuery phrase1 = new PhraseQuery(); phrase1.Add(new Term("field", "foo")); phrase1.Add(new Term("field", "bar")); PhraseQuery phrase2 = new PhraseQuery(); phrase2.Add(new Term("field", "star")); phrase2.Add(new Term("field", "wars")); BooleanQuery expected = new BooleanQuery(); expected.Add(phrase1, BooleanClause.Occur.MUST); expected.Add(phrase2, BooleanClause.Occur.MUST); assertEquals(expected, Parse("\"foo bar\"+\"star wars\"")); }
// This is a simplified query builder which works for single Terms and single Phrases // Returns null, TermQuery, or PhraseQuery public static Lucene.Net.Search.Query GetFieldQuery(Analyzer analyzer, string field, string queryText) { TokenStream stream = analyzer.TokenStream(field, new StringReader(queryText)); TokenFilter filter = new CachingTokenFilter(stream); filter.Reset(); // This attribute way of getting token properties isn't very good, but it's the non-obsolete one. var attr1 = filter.GetAttribute<ITermAttribute>(); Func<string> getText = () => attr1 != null ? attr1.Term : null; Func<int> getPositionIncrement; if (filter.HasAttribute<IPositionIncrementAttribute>()) { var attr = filter.GetAttribute<IPositionIncrementAttribute>(); getPositionIncrement = () => attr.PositionIncrement; } else { getPositionIncrement = () => 1; } // 0 tokens if (!filter.IncrementToken()) { return new BooleanQuery(); } // 1 token? string token1 = getText(); int position = 0; if (!filter.IncrementToken()) { return new TermQuery(new Term(field, token1)); } // many tokens - handle first token PhraseQuery ret = new PhraseQuery(); ret.Add(new Term(field, token1)); do { // handle rest of tokens string tokenNext = getText(); position += getPositionIncrement(); ret.Add(new Term(field, tokenNext), position); } while (filter.IncrementToken()); return ret; }
public virtual void TestIncreasingSloppiness() { Term t1 = RandomTerm(); Term t2 = RandomTerm(); PhraseQuery q1 = new PhraseQuery(); q1.Add(t1); q1.Add(t2); PhraseQuery q2 = new PhraseQuery(); q2.Add(t1); q2.Add(t2); for (int i = 0; i < 10; i++) { q1.Slop = i; q2.Slop = i + 1; AssertSubsetOf(q1, q2); } }
public void ProcessRequest(HttpContext context) { context.Response.ContentType = "text/plain"; string searchKey = context.Request["wd"]; string indexPath = context.Server.MapPath("../IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach (string word in Picture.Utility.SplitContent.SplitWords(searchKey)) { query.Add(new Term("tag", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0,10).scoreDocs; //展示数据实体对象集合 var tagModels = new List<Picture.Model.TagModel>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document Picture.Model.TagModel tag = new Picture.Model.TagModel(); //picture.ImgSummary = doc.Get("summary"); tag.TagName= Picture.Utility.SplitContent.HightLight(searchKey, doc.Get("tag")); //book.ContentDescription = doc.Get("content");//未使用高亮 //搜索关键字高亮显示 使用盘古提供高亮插件 //book.ContentDescription = Picture.Utility.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content")); tag.TId = Convert.ToInt32(doc.Get("id")); tagModels.Add(tag); } SearchPreviewResult result = new SearchPreviewResult() { q=searchKey, p=false }; foreach (var item in tagModels) { result.s.Add(item.TagName); } System.Web.Script.Serialization.JavaScriptSerializer jss = new System.Web.Script.Serialization.JavaScriptSerializer(); context.Response.Write(jss.Serialize(result)); }
protected void AddExactFieldValueClause(Index index, BooleanQuery query, string fieldName, string fieldValue) { //if (String.IsNullOrEmpty(fieldValue)) return; fieldValue = IdHelper.ProcessGUIDs(fieldValue); var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(fieldName.ToLowerInvariant(), fieldValue)); query.Add(phraseQuery, BooleanClause.Occur.MUST); }
/// <summary> /// 搜索 /// </summary> protected void SearchContent(string kw) { string indexPath = @"D:\lucenedir"; kw = kw.ToLower();//默认情况下盘古分词区分大小写,需转换成小写进行搜索 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery queryMsg = new PhraseQuery(); foreach (string word in Common.WebCommon.PanGuSplit(kw))//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” { queryMsg.Add(new Term("msg", word));//根据文章内容进行搜索 } //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. //query.Add(new Term("body", "大学生")); queryMsg.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) PhraseQuery queryTitle = new PhraseQuery(); foreach (string word in Common.WebCommon.PanGuSplit(kw)) { queryTitle.Add(new Term("title", word)); } queryTitle.SetSlop(100); BooleanQuery query = new BooleanQuery(); query.Add(queryMsg, BooleanClause.Occur.SHOULD); query.Add(queryTitle, BooleanClause.Occur.SHOULD); //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到 300(从300开始)到320(结束)的文档内容.可以用来实现分页功能 List<SearchResult> list = new List<SearchResult>(); for (int i = 0; i < docs.Length; i++) { //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//找到文档id对应的文档详细信息 SearchResult result = new SearchResult(); result.ContentDescription = WebCommon.Highlight(kw,WebCommon.CutString(doc.Get("msg"),150));//分词高亮显示 result.Title = doc.Get("title"); result.Id = Convert.ToInt32(doc.Get("id")); result.PublishDate = Convert.ToDateTime(doc.Get("PublishDate")); result.ISBN = doc.Get("ISBN"); result.Author = doc.Get("Author"); result.UnitPrice = Convert.ToDecimal(doc.Get("UnitPrice")); list.Add(result); } this.BookListRepeater.DataSource = list; this.BookListRepeater.DataBind(); AddKeyWord(kw); }
public string GetSearchQuery(params ISearchableMetadataValue[] searchableMetadataValues) { var query = new PhraseQuery(); foreach (var searchableMetadataValue in searchableMetadataValues) { query.Add(new Term(searchableMetadataValue.Metadata.SearchName, searchableMetadataValue.Value)); } var filter = query.ToString(); return GetSearchQuery(filter, new SearchableMetadata[] { }); }
public static Query BuildExactFieldValueClause(Index index, string fieldName, string fieldValue) { Assert.ArgumentNotNull(index, "Index"); if (string.IsNullOrEmpty(fieldName) || string.IsNullOrEmpty(fieldValue)) { return null; } fieldValue = IdHelper.ProcessGUIDs(fieldValue); var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(fieldName.ToLowerInvariant(), fieldValue.ToLowerInvariant())); return phraseQuery; }
protected bool PhaseQueryHasHits(string[] phrases, int i) { using (var dir = FSDirectory.Open(TestEnvironment.TestIndexDirectory)) using (var indexSearcher = new IndexSearcher(dir)) { var phraseQuery = new PhraseQuery { Slop = 0 }; foreach (var phrase in phrases) { phraseQuery.Add(new Term("field", phrase)); } // Search, without subcategories var topDocs = indexSearcher.Search(phraseQuery, 10); return topDocs.TotalHits > 0; } }
/// <summary> /// 从索引库中检索关键字 /// </summary> private void SearchFromIndexData() { string indexPath = Context.Server.MapPath("~/IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach(string word in Common.SplitContent.SplitWords(Request.QueryString["SearchKey"])) { query.Add(new Term("content", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //展示数据实体对象集合 List<PZYM.Shop.Model.Books> bookResult = new List<PZYM.Shop.Model.Books>(); for(int i = 0; i < docs.Length; i++) { int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document PZYM.Shop.Model.Books book = new PZYM.Shop.Model.Books(); book.Title = doc.Get("title"); //book.ContentDescription = doc.Get("content");//未使用高亮 //搜索关键字高亮显示 使用盘古提供高亮插件 book.ContentDescription = Common.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content")); book.Id = Convert.ToInt32(doc.Get("id")); bookResult.Add(book); } Repeater1.DataSource = bookResult; Repeater1.DataBind(); }
/// <summary> /// Adds the content sub query. /// </summary> /// <param name="query">The boolean query.</param> /// <param name="key">The field key.</param> /// <param name="value">The field value.</param> /// <param name="matchVariant">The match variant.</param> /// <param name="condition">The condition.</param> /// <param name="isFirst">if set to <c>true</c> [is first].</param> private void AddContentSubQuery(LuceneSearch.BooleanQuery query, string key, string value, MatchVariant matchVariant, QueryCondition condition) { if (matchVariant == MatchVariant.NotEquals) { query.Add(new LuceneSearch.TermQuery(new Term(key, value)), LuceneSearch.Occur.MUST_NOT); return; } LuceneSearch.Occur occurrence = this.GetOccur(condition); LuceneSearch.TermRangeQuery rangeQuery = this.GetRangeQuery(key, value, matchVariant); if (rangeQuery != null) { query.Add(rangeQuery, occurrence); return; } string[] keywords = value.Split(' '); if (keywords.Length > 1) { LuceneSearch.PhraseQuery phraseQuery = new Lucene.Net.Search.PhraseQuery(); foreach (string keyword in keywords) { phraseQuery.Add(new Term(key, keyword)); } query.Add(phraseQuery, occurrence); } else if (matchVariant == MatchVariant.Like) { query.Add(new LuceneSearch.WildcardQuery(new Term(key, value + "*")), occurrence); } else { query.Add(new LuceneSearch.TermQuery(new Term(key, value)), occurrence); } }
public void AutomaticallyClosesDanglingQuotes() { // arrange var queryText = "title:\"dot NET version:1.2.3"; var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("Title", "dot")); phraseQuery.Add(new Term("Title", "net")); phraseQuery.Add(new Term("Title", "version")); phraseQuery.Add(new Term("Title", "1")); phraseQuery.Add(new Term("Title", "2")); phraseQuery.Add(new Term("Title", "3")); var expected = new BooleanQuery { new BooleanClause(new BooleanQuery { new BooleanClause(new BooleanQuery { new BooleanClause(phraseQuery, Occur.SHOULD) }, Occur.SHOULD) }, Occur.MUST) }; // act var actual = NuGetQuery.MakeQuery(queryText); // assert Assert.Equal(expected, actual); }
public virtual void TestPhraseQuery() { PhraseQuery q = new PhraseQuery(); q.Add(new Term("f", "b")); q.Add(new Term("f", "c")); CountingCollector c = new CountingCollector(TopScoreDocCollector.Create(10, true)); s.Search(q, null, c); int maxDocs = s.IndexReader.MaxDoc; Assert.AreEqual(maxDocs, c.DocCounts.Count); for (int i = 0; i < maxDocs; i++) { IDictionary<Query, float?> doc0 = c.DocCounts[i]; Assert.AreEqual(1, doc0.Count); Assert.AreEqual(2.0F, doc0[q], FLOAT_TOLERANCE); IDictionary<Query, float?> doc1 = c.DocCounts[++i]; Assert.AreEqual(1, doc1.Count); Assert.AreEqual(1.0F, doc1[q], FLOAT_TOLERANCE); } }
public virtual void TestPalyndrome3() { // search on non palyndrome, find phrase with no slop, using exact phrase scorer query.Slop = 0; // to use exact phrase scorer query.Add(new Term("field", "one")); query.Add(new Term("field", "two")); query.Add(new Term("field", "three")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer"); float score0 = hits[0].Score; //System.out.println("(exact) field: one two three: "+score0); QueryUtils.Check(query, searcher); // search on non palyndrome, find phrase with slop 3, though no slop required here. query.Slop = 4; // to use sloppy scorer hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score1 = hits[0].Score; //System.out.println("(sloppy) field: one two three: "+score1); Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter"); QueryUtils.Check(query, searcher); // search ordered in palyndrome, find it twice query = new PhraseQuery(); query.Slop = 4; // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "one")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "three")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score2 = hits[0].Score; //System.out.println("palindrome: one two three: "+score2); QueryUtils.Check(query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue(score1+SCORE_COMP_THRESH<score2,"ordered scores higher in palindrome"); // search reveresed in palyndrome, find it twice query = new PhraseQuery(); query.Slop = 4; // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "three")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "one")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score3 = hits[0].Score; //System.out.println("palindrome: three two one: "+score3); QueryUtils.Check(query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue(score1+SCORE_COMP_THRESH<score3,"reversed scores higher in palindrome"); //Assert.AreEqual(score2, score3, SCORE_COMP_THRESH, "ordered or reversed does not matter"); }
public virtual void TestSlopScoring() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc2); Document doc3 = new Document(); doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc3); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(directory, true); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "firstname")); query.Add(new Term("field", "lastname")); query.Slop = System.Int32.MaxValue; ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // Make sure that those matches where the terms appear closer to // each other get a higher score: Assert.AreEqual(0.71, hits[0].Score, 0.01); Assert.AreEqual(0, hits[0].Doc); Assert.AreEqual(0.44, hits[1].Score, 0.01); Assert.AreEqual(1, hits[1].Doc); Assert.AreEqual(0.31, hits[2].Score, 0.01); Assert.AreEqual(2, hits[2].Doc); QueryUtils.Check(query, searcher); }
public virtual void TestPhraseQueryWithStopAnalyzer() { RAMDirectory directory = new RAMDirectory(); StopAnalyzer stopAnalyzer = new StopAnalyzer(Util.Version.LUCENE_24); IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); // valid exact phrase query PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "stop")); query.Add(new Term("field", "words")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(query, searcher); // StopAnalyzer as of 2.4 does not leave "holes", so this matches. query = new PhraseQuery(); query.Add(new Term("field", "words")); query.Add(new Term("field", "here")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(query, searcher); searcher.Close(); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("id", "0", Field.Store.YES)); doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); // Do ordinary BooleanQuery: BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = GetSearcher(r); searcher.Similarity = new DefaultSimilarity(); TopDocs hits = searcher.Search(bq, 10); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id")); // Now, resort using PhraseQuery: PhraseQuery pq = new PhraseQuery(); pq.Slop = 5; pq.Add(new Term("field", "wizard")); pq.Add(new Term("field", "oz")); TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits2.TotalHits); Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id")); // Resort using SpanNearQuery: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard")); SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz")); SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true); TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits3.TotalHits); Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id")); r.Dispose(); dir.Dispose(); }
public virtual void TestSlopScoring() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); Lucene.Net.Documents.Document doc2 = new Lucene.Net.Documents.Document(); doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc2); Lucene.Net.Documents.Document doc3 = new Lucene.Net.Documents.Document(); doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc3); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(directory); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "firstname")); query.Add(new Term("field", "lastname")); query.SetSlop(System.Int32.MaxValue); Hits hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); // Make sure that those matches where the terms appear closer to // each other get a higher score: Assert.AreEqual(0.71, hits.Score(0), 0.01); Assert.AreEqual(0, hits.Id(0)); Assert.AreEqual(0.44, hits.Score(1), 0.01); Assert.AreEqual(1, hits.Id(1)); Assert.AreEqual(0.31, hits.Score(2), 0.01); Assert.AreEqual(2, hits.Id(2)); QueryUtils.Check(query, searcher); }
public virtual void TestNonExistingPhrase() { // phrase without repetitions that exists in 2 docs query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "found")); query.Slop = 2; // would be found this way ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "phrase without repetitions exists in 2 docs"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); // phrase with repetitions that exists in 2 docs query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Slop = 1; // would be found hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "phrase with repetitions exists in two docs"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); // phrase I with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "phrase")); query.Slop = 1000; // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); // phrase II with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Slop = 1000; // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); }
public virtual void TestNullOrSubScorer() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(NewTextField("field", "a b c d", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); // this test relies upon coord being the default implementation, // otherwise scores are different! s.Similarity = new DefaultSimilarity(); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5F, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1 / 3F), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2 / 3F), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Dispose(); w.Dispose(); dir.Dispose(); }
public virtual void TestRandomPhrases() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy())); IList <IList <string> > docs = new List <IList <string> >(); Documents.Document d = new Documents.Document(); Field f = NewTextField("f", "", Field.Store.NO); d.Add(f); Random r = Random(); int NUM_DOCS = AtLeast(10); for (int i = 0; i < NUM_DOCS; i++) { // must be > 4096 so it spans multiple chunks int termCount = TestUtil.NextInt(Random(), 4097, 8200); IList <string> doc = new List <string>(); StringBuilder sb = new StringBuilder(); while (doc.Count < termCount) { if (r.Next(5) == 1 || docs.Count == 0) { // make new non-empty-string term string term; while (true) { term = TestUtil.RandomUnicodeString(r); if (term.Length > 0) { break; } } IOException priorException = null; TokenStream ts = analyzer.TokenStream("ignore", new StringReader(term)); try { ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { string text = termAttr.ToString(); doc.Add(text); sb.Append(text).Append(' '); } ts.End(); } catch (IOException e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, ts); } } else { // pick existing sub-phrase IList <string> lastDoc = docs[r.Next(docs.Count)]; int len = TestUtil.NextInt(r, 1, 10); int start = r.Next(lastDoc.Count - len); for (int k = start; k < start + len; k++) { string t = lastDoc[k]; doc.Add(t); sb.Append(t).Append(' '); } } } docs.Add(doc); f.StringValue = sb.ToString(); w.AddDocument(d); } IndexReader reader = w.Reader; IndexSearcher s = NewSearcher(reader); w.Dispose(); // now search int num = AtLeast(10); for (int i = 0; i < num; i++) { int docID = r.Next(docs.Count); IList <string> doc = docs[docID]; int numTerm = TestUtil.NextInt(r, 2, 20); int start = r.Next(doc.Count - numTerm); PhraseQuery pq = new PhraseQuery(); StringBuilder sb = new StringBuilder(); for (int t = start; t < start + numTerm; t++) { pq.Add(new Term("f", doc[t])); sb.Append(doc[t]).Append(' '); } TopDocs hits = s.Search(pq, NUM_DOCS); bool found = false; for (int j = 0; j < hits.ScoreDocs.Length; j++) { if (hits.ScoreDocs[j].Doc == docID) { found = true; break; } } Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start); } reader.Dispose(); dir.Dispose(); }
public virtual void TestPalyndrome3() { // search on non palyndrome, find phrase with no slop, using exact phrase scorer Query.Slop = 0; // to use exact phrase scorer Query.Add(new Term("field", "one")); Query.Add(new Term("field", "two")); Query.Add(new Term("field", "three")); ScoreDoc[] hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer"); float score0 = hits[0].Score; //System.out.println("(exact) field: one two three: "+score0); QueryUtils.Check(Random(), Query, Searcher, Similarity); // just make sure no exc: Searcher.Explain(Query, 0); // search on non palyndrome, find phrase with slop 3, though no slop required here. Query.Slop = 4; // to use sloppy scorer hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score1 = hits[0].Score; //System.out.println("(sloppy) field: one two three: "+score1); Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter"); QueryUtils.Check(Random(), Query, Searcher, Similarity); // search ordered in palyndrome, find it twice Query = new PhraseQuery(); Query.Slop = 4; // must be at least four for both ordered and reversed to match Query.Add(new Term("palindrome", "one")); Query.Add(new Term("palindrome", "two")); Query.Add(new Term("palindrome", "three")); hits = Searcher.Search(Query, null, 1000).ScoreDocs; // just make sure no exc: Searcher.Explain(Query, 0); Assert.AreEqual(1, hits.Length, "just sloppy enough"); //float score2 = hits[0].Score; //System.out.println("palindrome: one two three: "+score2); QueryUtils.Check(Random(), Query, Searcher, Similarity); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2); // search reveresed in palyndrome, find it twice Query = new PhraseQuery(); Query.Slop = 4; // must be at least four for both ordered and reversed to match Query.Add(new Term("palindrome", "three")); Query.Add(new Term("palindrome", "two")); Query.Add(new Term("palindrome", "one")); hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); //float score3 = hits[0].Score; //System.out.println("palindrome: three two one: "+score3); QueryUtils.Check(Random(), Query, Searcher, Similarity); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3); //Assert.AreEqual("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH); }
public virtual void TestPhraseQueryInConjunctionScorer() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "foobar", Field.Store.YES)); doc.Add(NewTextField("source", "marketing info", Field.Store.YES)); writer.AddDocument(doc); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(Random(), phraseQuery, searcher, Similarity); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(Random(), termQuery, searcher, Similarity); reader.Dispose(); writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE)); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map entry woo", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "woo map entry", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("contents", "map foobarword entry woo", Field.Store.YES)); writer.AddDocument(doc); reader = writer.Reader; writer.Dispose(); searcher = NewSearcher(reader); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(Random(), booleanQuery, searcher, Similarity); reader.Dispose(); directory.Dispose(); }
public virtual void TestNotCloseEnough() { Query.Slop = 2; Query.Add(new Term("field", "one")); Query.Add(new Term("field", "five")); ScoreDoc[] hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); QueryUtils.Check(Random(), Query, Searcher, Similarity); }
// Random rnd is passed in so that the exact same random query may be created // more than once. public static BooleanQuery RandBoolQuery(Random rnd, bool allowMust, int level, string field, string[] vals, Callback cb) { BooleanQuery current = new BooleanQuery(rnd.Next() < 0); for (int i = 0; i < rnd.Next(vals.Length) + 1; i++) { int qType = 0; // term query if (level > 0) { qType = rnd.Next(10); } Query q; if (qType < 3) { q = new TermQuery(new Term(field, vals[rnd.Next(vals.Length)])); } else if (qType < 4) { Term t1 = new Term(field, vals[rnd.Next(vals.Length)]); Term t2 = new Term(field, vals[rnd.Next(vals.Length)]); PhraseQuery pq = new PhraseQuery(); pq.Add(t1); pq.Add(t2); pq.Slop = 10; // increase possibility of matching q = pq; } else if (qType < 7) { q = new WildcardQuery(new Term(field, "w*")); } else { q = RandBoolQuery(rnd, allowMust, level - 1, field, vals, cb); } int r = rnd.Next(10); BooleanClause.Occur occur; if (r < 2) { occur = BooleanClause.Occur.MUST_NOT; } else if (r < 5) { if (allowMust) { occur = BooleanClause.Occur.MUST; } else { occur = BooleanClause.Occur.SHOULD; } } else { occur = BooleanClause.Occur.SHOULD; } current.Add(q, occur); } if (cb != null) { cb.PostCreate(current); } return(current); }
public virtual void TestPhraseQueryInConjunctionScorer() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(phraseQuery, searcher); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(termQuery, searcher); searcher.Close(); writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(booleanQuery, searcher); searcher.Close(); directory.Close(); }
public virtual void TestExact() { // slop is zero by default query.Add(new Term("field", "four")); query.Add(new Term("field", "five")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "exact match"); QueryUtils.Check(query, searcher); query = new PhraseQuery(); query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "reverse not exact"); QueryUtils.Check(query, searcher); }
public virtual void TestPhraseQueryInConjunctionScorer() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); Hits hits = searcher.Search(phraseQuery); Assert.AreEqual(2, hits.Length()); QueryUtils.Check(phraseQuery, searcher); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery); Assert.AreEqual(1, hits.Length()); QueryUtils.Check(termQuery, searcher); searcher.Close(); writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery); Assert.AreEqual(3, hits.Length()); hits = searcher.Search(phraseQuery); Assert.AreEqual(2, hits.Length()); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery); Assert.AreEqual(2, hits.Length()); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST); booleanQuery.Add(termQuery, BooleanClause.Occur.MUST); hits = searcher.Search(booleanQuery); Assert.AreEqual(2, hits.Length()); QueryUtils.Check(booleanQuery, searcher); searcher.Close(); directory.Close(); }
public virtual void TestOrderDoesntMatter() { query.Slop = 2; // must be at least two for reverse order match query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); QueryUtils.Check(query, searcher); query = new PhraseQuery(); query.Slop = 2; query.Add(new Term("field", "three")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "not sloppy enough"); QueryUtils.Check(query, searcher); }
protected void ApplyLanguageClause(BooleanQuery query, string language, BooleanClause.Occur occurance) { if (String.IsNullOrEmpty(language)) return; var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(BuiltinFields.Language, language.ToLowerInvariant())); query.Add(phraseQuery, occurance); }
public virtual void TestExplain() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("id", "0", Field.Store.YES)); doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); // Do ordinary BooleanQuery: BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = GetSearcher(r); TopDocs hits = searcher.Search(bq, 10); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id")); // Now, resort using PhraseQuery: PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "wizard")); pq.Add(new Term("field", "oz")); Rescorer rescorer = new QueryRescorerAnonymousInnerClassHelper2(this, pq); TopDocs hits2 = rescorer.Rescore(searcher, hits, 10); // Resorting changed the order: Assert.AreEqual(2, hits2.TotalHits); Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id")); int docID = hits2.ScoreDocs[0].Doc; Explanation explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID); string s = explain.ToString(); Assert.IsTrue(s.Contains("TestQueryRescorer+")); Assert.IsTrue(s.Contains("combined first and second pass score")); Assert.IsTrue(s.Contains("first pass score")); Assert.IsTrue(s.Contains("= second pass score")); Assert.AreEqual(hits2.ScoreDocs[0].Score, explain.Value, 0.0f); docID = hits2.ScoreDocs[1].Doc; explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID); s = explain.ToString(); Assert.IsTrue(s.Contains("TestQueryRescorer+")); Assert.IsTrue(s.Contains("combined first and second pass score")); Assert.IsTrue(s.Contains("first pass score")); Assert.IsTrue(s.Contains("no second pass score")); Assert.IsFalse(s.Contains("= second pass score")); Assert.IsTrue(s.Contains("NON-MATCH")); Assert.IsTrue(Math.Abs(hits2.ScoreDocs[1].Score - explain.Value) < 0.0000001f); r.Dispose(); dir.Dispose(); }
public virtual void TestSlop1() { // Ensures slop of 1 works with terms in order. query.Slop = 1; query.Add(new Term("field", "one")); query.Add(new Term("field", "two")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "in order"); QueryUtils.Check(query, searcher); // Ensures slop of 1 does not work for phrases out of order; // must be at least 2. query = new PhraseQuery(); query.Slop = 1; query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "reversed, slop not 2 or more"); QueryUtils.Check(query, searcher); }
public virtual void TestNotCloseEnough() { query.SetSlop(2); query.Add(new Term("field", "one")); query.Add(new Term("field", "five")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); QueryUtils.Check(query, searcher); }
public virtual void TestMulipleTerms() { query.Slop = 2; query.Add(new Term("field", "one")); query.Add(new Term("field", "three")); query.Add(new Term("field", "five")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "two total moves"); QueryUtils.Check(query, searcher); query = new PhraseQuery(); query.Slop = 5; // it takes six moves to match this phrase query.Add(new Term("field", "five")); query.Add(new Term("field", "three")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "slop of 5 not close enough"); QueryUtils.Check(query, searcher); query.Slop = 6; hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "slop of 6 just right"); QueryUtils.Check(query, searcher); }
public virtual void TestNotCloseEnough() { query.Slop = 2; query.Add(new Term("field", "one")); query.Add(new Term("field", "five")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); }
public virtual void TestPhraseQueryInConjunctionScorer() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(phraseQuery, searcher); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(termQuery, searcher); searcher.Close(); writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, Occur.MUST); booleanQuery.Add(termQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(booleanQuery, searcher); searcher.Close(); directory.Close(); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); Directory store = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), store, analyzer, Similarity, TimeZone); Document d = new Document(); d.Add(NewTextField("field", "bogus", Field.Store.YES)); writer.AddDocument(d); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1")); pos.NextDoc(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2")); pos.NextDoc(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); reader.Dispose(); store.Dispose(); }
public virtual void TestNonExistingPhrase() { // phrase without repetitions that exists in 2 docs query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "found")); query.Slop = 2; // would be found this way ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "phrase without repetitions exists in 2 docs"); QueryUtils.Check(query, searcher); // phrase with repetitions that exists in 2 docs query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Slop = 1; // would be found hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "phrase with repetitions exists in two docs"); QueryUtils.Check(query, searcher); // phrase I with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "phrase")); query.Slop = 1000; // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); // phrase II with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Slop = 1000; // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc, null); IndexReader r = w.GetReader(null); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10, null).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10, null).MaxScore; Assert.AreEqual(score * .5, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10, null).MaxScore; Assert.AreEqual(score * (1.0 / 3), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10, null).MaxScore; Assert.AreEqual(score * (2.0 / 3), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10, null).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10, null).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10, null).TotalHits); r.Close(); w.Close(); dir.Close(); }
public void TestShingleAnalyzerWrapperPhraseQuery() { Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 2); Searcher = SetUpSearcher(analyzer); var q = new PhraseQuery(); var ts = analyzer.TokenStream("content", new StringReader("this sentence")); var j = -1; var posIncrAtt = ts.AddAttribute<IPositionIncrementAttribute>(); var termAtt = ts.AddAttribute<ITermAttribute>(); while (ts.IncrementToken()) { j += posIncrAtt.PositionIncrement; var termText = termAtt.Term; q.Add(new Term("content", termText), j); } var hits = Searcher.Search(q, null, 1000).ScoreDocs; var ranks = new[] {0}; CompareRanks(hits, ranks); }
public virtual void TestMissingSecondPassScore() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(NewStringField("id", "0", Field.Store.YES)); doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.GetReader(); w.Dispose(); // Do ordinary BooleanQuery: BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = GetSearcher(r); TopDocs hits = searcher.Search(bq, 10); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id")); // Now, resort using PhraseQuery, no slop: PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "wizard")); pq.Add(new Term("field", "oz")); TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits2.TotalHits); Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id")); // Resort using SpanNearQuery: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard")); SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz")); SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true); TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits3.TotalHits); Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id")); r.Dispose(); dir.Dispose(); }
public virtual void TestShingleAnalyzerWrapperPhraseQuery() { PhraseQuery q = new PhraseQuery(); TokenStream ts = analyzer.TokenStream("content", "this sentence"); try { int j = -1; IPositionIncrementAttribute posIncrAtt = ts.AddAttribute<IPositionIncrementAttribute>(); ICharTermAttribute termAtt = ts.AddAttribute<ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { j += posIncrAtt.PositionIncrement; string termText = termAtt.ToString(); q.Add(new Term("content", termText), j); } ts.End(); } finally { IOUtils.CloseWhileHandlingException(ts); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; int[] ranks = new int[] { 0 }; CompareRanks(hits, ranks); }
public virtual void TestPalyndrome2() { // search on non palyndrome, find phrase with no slop, using exact phrase scorer query.Slop = 0; // to use exact phrase scorer query.Add(new Term("field", "two")); query.Add(new Term("field", "three")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer"); float score0 = hits[0].Score; //System.out.println("(exact) field: two three: "+score0); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); // search on non palyndrome, find phrase with slop 2, though no slop required here. query.Slop = 2; // to use sloppy scorer hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score1 = hits[0].Score; //System.out.println("(sloppy) field: two three: "+score1); Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); // search ordered in palyndrome, find it twice query = new PhraseQuery(); query.Slop = 2; // must be at least two for both ordered and reversed to match query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "three")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); //float score2 = hits[0].Score; //System.out.println("palindrome: two three: "+score2); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2); // search reveresed in palyndrome, find it twice query = new PhraseQuery(); query.Slop = 2; // must be at least two for both ordered and reversed to match query.Add(new Term("palindrome", "three")); query.Add(new Term("palindrome", "two")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); //float score3 = hits[0].Score; //System.out.println("palindrome: three two: "+score3); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3); //Assert.AreEqual("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH); }
/// <exception cref="ParseException">throw in overridden method to disallow /// </exception> protected internal virtual Query GetFieldQuery(System.String field, System.String queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source = analyzer.TokenStream(field, new System.IO.StringReader(queryText)); System.Collections.ArrayList v = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); Lucene.Net.Analysis.Token t; int positionCount = 0; bool severalTokensAtSamePosition = false; while (true) { try { t = source.Next(); } catch (System.IO.IOException e) { t = null; } if (t == null) break; v.Add(t); if (t.GetPositionIncrement() != 0) positionCount += t.GetPositionIncrement(); else severalTokensAtSamePosition = true; } try { source.Close(); } catch (System.IO.IOException e) { // ignore } if (v.Count == 0) return null; else if (v.Count == 1) { t = (Lucene.Net.Analysis.Token) v[0]; return new TermQuery(new Term(field, t.TermText())); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = new BooleanQuery(true); for (int i = 0; i < v.Count; i++) { t = (Lucene.Net.Analysis.Token) v[i]; TermQuery currentQuery = new TermQuery(new Term(field, t.TermText())); q.Add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } else { // phrase query: MultiPhraseQuery mpq = new MultiPhraseQuery(); System.Collections.ArrayList multiTerms = new System.Collections.ArrayList(); for (int i = 0; i < v.Count; i++) { t = (Lucene.Net.Analysis.Token) v[i]; if (t.GetPositionIncrement() == 1 && multiTerms.Count > 0) { mpq.Add((Term[]) multiTerms.ToArray(typeof(Term))); multiTerms.Clear(); } multiTerms.Add(new Term(field, t.TermText())); } mpq.Add((Term[]) multiTerms.ToArray(typeof(Term))); return mpq; } } else { PhraseQuery q = new PhraseQuery(); q.SetSlop(phraseSlop); for (int i = 0; i < v.Count; i++) { q.Add(new Term(field, ((Lucene.Net.Analysis.Token) v[i]).TermText())); } return q; } } }