public virtual void TestPhrase() { PhraseQuery query = new PhraseQuery(); query.Add(new Term("Field", "seventy")); query.Add(new Term("Field", "seven")); CheckHits(query, new int[]{77, 177, 277, 377, 477, 577, 677, 777, 877, 977}); }
public override Query VisitPhraseQuery(PhraseQuery phraseq) { _dump.Append("PhraseQ("); var terms = phraseq.GetTerms(); PhraseQuery newQuery = null; int index = 0; int count = terms.Length; while (index < count) { var visitedTerm = VisitTerm(terms[index]); if (newQuery != null) { newQuery.Add(visitedTerm); } else if (visitedTerm != terms[index]) { newQuery = new PhraseQuery(); for (int i = 0; i < index; i++) newQuery.Add(terms[i]); newQuery.Add(visitedTerm); } index++; if (index < count) _dump.Append(", "); } _dump.Append(", Slop:").Append(phraseq.GetSlop()).Append(BoostToString(phraseq)).Append(")"); if (newQuery != null) return newQuery; return phraseq; }
internal ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity.SimScorer docScorer) : base(weight) { this.DocScorer = docScorer; ChunkStates = new ChunkState[postings.Length]; EndMinus1 = postings.Length - 1; // min(cost) Cost_Renamed = postings[0].Postings.Cost(); for (int i = 0; i < postings.Length; i++) { // Coarse optimization: advance(target) is fairly // costly, so, if the relative freq of the 2nd // rarest term is not that much (> 1/5th) rarer than // the first term, then we just use .nextDoc() when // ANDing. this buys ~15% gain for phrases where // freq of rarest 2 terms is close: bool useAdvance = postings[i].DocFreq > 5 * postings[0].DocFreq; ChunkStates[i] = new ChunkState(postings[i].Postings, -postings[i].Position, useAdvance); if (i > 0 && postings[i].Postings.NextDoc() == DocIdSetIterator.NO_MORE_DOCS) { NoDocs = true; return; } } }
public static PhraseQuery Phrase(this BooleanQuery inputQuery, BooleanClause.Occur occur = null) { BooleanQuery parentQuery = GetParentQuery(inputQuery); PhraseQuery query = new PhraseQuery(); SetOccurValue(inputQuery, ref occur); parentQuery.Add(query, occur); return query; }
public void ProcessRequest(HttpContext context) { context.Response.ContentType = "text/plain"; string searchKey = context.Request["wd"]; string indexPath = context.Server.MapPath("../IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach (string word in Picture.Utility.SplitContent.SplitWords(searchKey)) { query.Add(new Term("tag", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0,10).scoreDocs; //展示数据实体对象集合 var tagModels = new List<Picture.Model.TagModel>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document Picture.Model.TagModel tag = new Picture.Model.TagModel(); //picture.ImgSummary = doc.Get("summary"); tag.TagName= Picture.Utility.SplitContent.HightLight(searchKey, doc.Get("tag")); //book.ContentDescription = doc.Get("content");//未使用高亮 //搜索关键字高亮显示 使用盘古提供高亮插件 //book.ContentDescription = Picture.Utility.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content")); tag.TId = Convert.ToInt32(doc.Get("id")); tagModels.Add(tag); } SearchPreviewResult result = new SearchPreviewResult() { q=searchKey, p=false }; foreach (var item in tagModels) { result.s.Add(item.TagName); } System.Web.Script.Serialization.JavaScriptSerializer jss = new System.Web.Script.Serialization.JavaScriptSerializer(); context.Response.Write(jss.Serialize(result)); }
public virtual void Test1() { BooleanQuery q = new BooleanQuery(); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Slop = 1; phraseQuery.Add(new Term(FIELD, "w1")); phraseQuery.Add(new Term(FIELD, "w2")); q.Add(phraseQuery, Occur.MUST); q.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true), Occur.SHOULD); q.Add(Snear(Sf("w3", 2), St("w2"), St("w3"), 5, true), Occur.SHOULD); Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")), new ItemizedFilter(new int[] { 1, 3 })); t.Boost = 1000; q.Add(t, Occur.SHOULD); t = new ConstantScoreQuery(new ItemizedFilter(new int[] { 0, 2 })); t.Boost = 30; q.Add(t, Occur.SHOULD); DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f); dm.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true)); dm.Add(new TermQuery(new Term(FIELD, "QQ"))); BooleanQuery xxYYZZ = new BooleanQuery(); xxYYZZ.Add(new TermQuery(new Term(FIELD, "xx")), Occur.SHOULD); xxYYZZ.Add(new TermQuery(new Term(FIELD, "yy")), Occur.SHOULD); xxYYZZ.Add(new TermQuery(new Term(FIELD, "zz")), Occur.MUST_NOT); dm.Add(xxYYZZ); BooleanQuery xxW1 = new BooleanQuery(); xxW1.Add(new TermQuery(new Term(FIELD, "xx")), Occur.MUST_NOT); xxW1.Add(new TermQuery(new Term(FIELD, "w1")), Occur.MUST_NOT); dm.Add(xxW1); DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f); dm2.Add(new TermQuery(new Term(FIELD, "w1"))); dm2.Add(new TermQuery(new Term(FIELD, "w2"))); dm2.Add(new TermQuery(new Term(FIELD, "w3"))); dm.Add(dm2); q.Add(dm, Occur.SHOULD); BooleanQuery b = new BooleanQuery(); b.MinimumNumberShouldMatch = 2; b.Add(Snear("w1", "w2", 1, true), Occur.SHOULD); b.Add(Snear("w2", "w3", 1, true), Occur.SHOULD); b.Add(Snear("w1", "w3", 3, true), Occur.SHOULD); q.Add(b, Occur.SHOULD); Qtest(q, new int[] { 0, 1, 2 }); }
protected void AddExactFieldValueClause(Index index, BooleanQuery query, string fieldName, string fieldValue) { //if (String.IsNullOrEmpty(fieldValue)) return; fieldValue = IdHelper.ProcessGUIDs(fieldValue); var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(fieldName.ToLowerInvariant(), fieldValue)); query.Add(phraseQuery, BooleanClause.Occur.MUST); }
public virtual void TestExactPhraseVersusBooleanAnd() { Term t1 = RandomTerm(); Term t2 = RandomTerm(); PhraseQuery q1 = new PhraseQuery(); q1.Add(t1); q1.Add(t2); BooleanQuery q2 = new BooleanQuery(); q2.Add(new TermQuery(t1), Occur.MUST); q2.Add(new TermQuery(t2), Occur.MUST); AssertSubsetOf(q1, q2); }
/// <summary> /// 搜索 /// </summary> protected void SearchContent(string kw) { string indexPath = @"D:\lucenedir"; kw = kw.ToLower();//默认情况下盘古分词区分大小写,需转换成小写进行搜索 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery queryMsg = new PhraseQuery(); foreach (string word in Common.WebCommon.PanGuSplit(kw))//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” { queryMsg.Add(new Term("msg", word));//根据文章内容进行搜索 } //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. //query.Add(new Term("body", "大学生")); queryMsg.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) PhraseQuery queryTitle = new PhraseQuery(); foreach (string word in Common.WebCommon.PanGuSplit(kw)) { queryTitle.Add(new Term("title", word)); } queryTitle.SetSlop(100); BooleanQuery query = new BooleanQuery(); query.Add(queryMsg, BooleanClause.Occur.SHOULD); query.Add(queryTitle, BooleanClause.Occur.SHOULD); //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到 300(从300开始)到320(结束)的文档内容.可以用来实现分页功能 List<SearchResult> list = new List<SearchResult>(); for (int i = 0; i < docs.Length; i++) { //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//找到文档id对应的文档详细信息 SearchResult result = new SearchResult(); result.ContentDescription = WebCommon.Highlight(kw,WebCommon.CutString(doc.Get("msg"),150));//分词高亮显示 result.Title = doc.Get("title"); result.Id = Convert.ToInt32(doc.Get("id")); result.PublishDate = Convert.ToDateTime(doc.Get("PublishDate")); result.ISBN = doc.Get("ISBN"); result.Author = doc.Get("Author"); result.UnitPrice = Convert.ToDecimal(doc.Get("UnitPrice")); list.Add(result); } this.BookListRepeater.DataSource = list; this.BookListRepeater.DataBind(); AddKeyWord(kw); }
public string GetSearchQuery(params ISearchableMetadataValue[] searchableMetadataValues) { var query = new PhraseQuery(); foreach (var searchableMetadataValue in searchableMetadataValues) { query.Add(new Term(searchableMetadataValue.Metadata.SearchName, searchableMetadataValue.Value)); } var filter = query.ToString(); return GetSearchQuery(filter, new SearchableMetadata[] { }); }
// This is a simplified query builder which works for single Terms and single Phrases // Returns null, TermQuery, or PhraseQuery public static Lucene.Net.Search.Query GetFieldQuery(Analyzer analyzer, string field, string queryText) { TokenStream stream = analyzer.TokenStream(field, new StringReader(queryText)); TokenFilter filter = new CachingTokenFilter(stream); filter.Reset(); // This attribute way of getting token properties isn't very good, but it's the non-obsolete one. var attr1 = filter.GetAttribute<ITermAttribute>(); Func<string> getText = () => attr1 != null ? attr1.Term : null; Func<int> getPositionIncrement; if (filter.HasAttribute<IPositionIncrementAttribute>()) { var attr = filter.GetAttribute<IPositionIncrementAttribute>(); getPositionIncrement = () => attr.PositionIncrement; } else { getPositionIncrement = () => 1; } // 0 tokens if (!filter.IncrementToken()) { return new BooleanQuery(); } // 1 token? string token1 = getText(); int position = 0; if (!filter.IncrementToken()) { return new TermQuery(new Term(field, token1)); } // many tokens - handle first token PhraseQuery ret = new PhraseQuery(); ret.Add(new Term(field, token1)); do { // handle rest of tokens string tokenNext = getText(); position += getPositionIncrement(); ret.Add(new Term(field, tokenNext), position); } while (filter.IncrementToken()); return ret; }
public virtual void TestANDPhrase() { PhraseQuery phrase1 = new PhraseQuery(); phrase1.Add(new Term("field", "foo")); phrase1.Add(new Term("field", "bar")); PhraseQuery phrase2 = new PhraseQuery(); phrase2.Add(new Term("field", "star")); phrase2.Add(new Term("field", "wars")); BooleanQuery expected = new BooleanQuery(); expected.Add(phrase1, BooleanClause.Occur.MUST); expected.Add(phrase2, BooleanClause.Occur.MUST); assertEquals(expected, Parse("\"foo bar\"+\"star wars\"")); }
public virtual void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("Field", "one two three four five")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new PhraseQuery(); }
public static Query BuildExactFieldValueClause(Index index, string fieldName, string fieldValue) { Assert.ArgumentNotNull(index, "Index"); if (string.IsNullOrEmpty(fieldName) || string.IsNullOrEmpty(fieldValue)) { return null; } fieldValue = IdHelper.ProcessGUIDs(fieldValue); var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(fieldName.ToLowerInvariant(), fieldValue.ToLowerInvariant())); return phraseQuery; }
public virtual void TestNonExistingPhrase() { // phrase without repetitions that exists in 2 docs Query.Add(new Term("nonexist", "phrase")); Query.Add(new Term("nonexist", "notexist")); Query.Add(new Term("nonexist", "found")); Query.Slop = 2; // would be found this way ScoreDoc[] hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "phrase without repetitions exists in 2 docs"); QueryUtils.Check(Random(), Query, Searcher, Similarity); // phrase with repetitions that exists in 2 docs Query = new PhraseQuery(); Query.Add(new Term("nonexist", "phrase")); Query.Add(new Term("nonexist", "exist")); Query.Add(new Term("nonexist", "exist")); Query.Slop = 1; // would be found hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "phrase with repetitions exists in two docs"); QueryUtils.Check(Random(), Query, Searcher, Similarity); // phrase I with repetitions that does not exist in any doc Query = new PhraseQuery(); Query.Add(new Term("nonexist", "phrase")); Query.Add(new Term("nonexist", "notexist")); Query.Add(new Term("nonexist", "phrase")); Query.Slop = 1000; // would not be found no matter how high the slop is hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(Random(), Query, Searcher, Similarity); // phrase II with repetitions that does not exist in any doc Query = new PhraseQuery(); Query.Add(new Term("nonexist", "phrase")); Query.Add(new Term("nonexist", "exist")); Query.Add(new Term("nonexist", "exist")); Query.Add(new Term("nonexist", "exist")); Query.Slop = 1000; // would not be found no matter how high the slop is hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(Random(), Query, Searcher, Similarity); }
public virtual void TestNonExistingPhrase() { // phrase without repetitions that exists in 2 docs query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "found")); query.SetSlop(2); // would be found this way ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length, "phrase without repetitions exists in 2 docs"); QueryUtils.Check(query, searcher); // phrase with repetitions that exists in 2 docs query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.SetSlop(1); // would be found hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length, "phrase with repetitions exists in two docs"); QueryUtils.Check(query, searcher); // phrase I with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "phrase")); query.SetSlop(1000); // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); // phrase II with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.SetSlop(1000); // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); }
public virtual void TestOrderDoesntMatter() { Query.Slop = 2; // must be at least two for reverse order match Query.Add(new Term("field", "two")); Query.Add(new Term("field", "one")); ScoreDoc[] hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); QueryUtils.Check(Random(), Query, Searcher, Similarity); Query = new PhraseQuery(); Query.Slop = 2; Query.Add(new Term("field", "three")); Query.Add(new Term("field", "one")); hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "not sloppy enough"); QueryUtils.Check(Random(), Query, Searcher, Similarity); }
public virtual void TestIncreasingSloppiness() { Term t1 = RandomTerm(); Term t2 = RandomTerm(); PhraseQuery q1 = new PhraseQuery(); q1.Add(t1); q1.Add(t2); PhraseQuery q2 = new PhraseQuery(); q2.Add(t1); q2.Add(t2); for (int i = 0; i < 10; i++) { q1.Slop = i; q2.Slop = i + 1; AssertSubsetOf(q1, q2); } }
public virtual void TestExact() { // slop is zero by default query.Add(new Term("field", "four")); query.Add(new Term("field", "five")); ScoreDoc[] hits = searcher.Search(query, null, 1000, null).ScoreDocs; Assert.AreEqual(1, hits.Length, "exact match"); QueryUtils.Check(query, searcher); query = new PhraseQuery(); query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000, null).ScoreDocs; Assert.AreEqual(0, hits.Length, "reverse not exact"); QueryUtils.Check(query, searcher); }
public virtual void TestOrderDoesntMatter() { query.SetSlop(2); // must be at least two for reverse order match query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); QueryUtils.Check(query, searcher); query = new PhraseQuery(); query.SetSlop(2); query.Add(new Term("field", "three")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "not sloppy enough"); QueryUtils.Check(query, searcher); }
protected bool PhaseQueryHasHits(string[] phrases, int i) { using (var dir = FSDirectory.Open(TestEnvironment.TestIndexDirectory)) using (var indexSearcher = new IndexSearcher(dir)) { var phraseQuery = new PhraseQuery { Slop = 0 }; foreach (var phrase in phrases) { phraseQuery.Add(new Term("field", phrase)); } // Search, without subcategories var topDocs = indexSearcher.Search(phraseQuery, 10); return topDocs.TotalHits > 0; } }
public virtual void TestRepetitiveIncreasingSloppinessWithHoles() { Term t = RandomTerm(); PhraseQuery q1 = new PhraseQuery(); q1.Add(t); q1.Add(t, 2); PhraseQuery q2 = new PhraseQuery(); q2.Add(t); q2.Add(t, 2); for (int i = 0; i < 10; i++) { q1.Slop = i; q2.Slop = i + 1; AssertSubsetOf(q1, q2); } }
public virtual void TestMultiFieldBQofPQ2() { BooleanQuery query = new BooleanQuery(); PhraseQuery leftChild = new PhraseQuery(); leftChild.Add(new Term(FIELD, "w1")); leftChild.Add(new Term(FIELD, "w3")); query.Add(leftChild, Occur.SHOULD); PhraseQuery rightChild = new PhraseQuery(); rightChild.Add(new Term(ALTFIELD, "w1")); rightChild.Add(new Term(ALTFIELD, "w3")); query.Add(rightChild, Occur.SHOULD); Qtest(query, new int[] { 1, 3 }); }
public virtual void TestOrderDoesntMatter() { SetUp(); query.SetSlop(2); // must be at least two for reverse order match query.Add(new Term("Field", "two")); query.Add(new Term("Field", "one")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "just sloppy enough"); query = new PhraseQuery(); query.SetSlop(2); query.Add(new Term("Field", "three")); query.Add(new Term("Field", "one")); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "not sloppy enough"); }
public virtual void TestNegativeSlop() { PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); try { query.Slop = -2; Assert.Fail("didn't get expected exception"); } #pragma warning disable 168 catch (System.ArgumentException expected) #pragma warning restore 168 { // expected exception } }
public virtual void TestSlopScoring() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity())); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("field", "foo firstname lastname foo", Field.Store.YES)); writer.AddDocument(doc); Documents.Document doc2 = new Documents.Document(); doc2.Add(NewTextField("field", "foo firstname zzz lastname foo", Field.Store.YES)); writer.AddDocument(doc2); Documents.Document doc3 = new Documents.Document(); doc3.Add(NewTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES)); writer.AddDocument(doc3); IndexReader reader = writer.Reader; writer.Dispose(); IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new DefaultSimilarity(); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "firstname")); query.Add(new Term("field", "lastname")); query.Slop = int.MaxValue; ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // Make sure that those matches where the terms appear closer to // each other get a higher score: Assert.AreEqual(0.71, hits[0].Score, 0.01); Assert.AreEqual(0, hits[0].Doc); Assert.AreEqual(0.44, hits[1].Score, 0.01); Assert.AreEqual(1, hits[1].Doc); Assert.AreEqual(0.31, hits[2].Score, 0.01); Assert.AreEqual(2, hits[2].Doc); QueryUtils.Check(Random(), query, searcher, Similarity); reader.Dispose(); directory.Dispose(); }
public virtual void TestSlopWithHoles() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f = new Field("lyrics", "", customType); Document doc = new Document(); doc.Add(f); f.SetStringValue("drug drug"); iw.AddDocument(doc); f.SetStringValue("drug druggy drug"); iw.AddDocument(doc); f.SetStringValue("drug druggy druggy drug"); iw.AddDocument(doc); f.SetStringValue("drug druggy drug druggy drug"); iw.AddDocument(doc); IndexReader ir = iw.GetReader(); iw.Dispose(); IndexSearcher @is = NewSearcher(ir); PhraseQuery pq = new PhraseQuery(); // "drug the drug"~1 pq.Add(new Term("lyrics", "drug"), 1); pq.Add(new Term("lyrics", "drug"), 4); pq.Slop = 0; Assert.AreEqual(0, @is.Search(pq, 4).TotalHits); pq.Slop = 1; Assert.AreEqual(3, @is.Search(pq, 4).TotalHits); pq.Slop = 2; Assert.AreEqual(4, @is.Search(pq, 4).TotalHits); ir.Dispose(); dir.Dispose(); }
public virtual void TestSlop1() { // Ensures slop of 1 works with terms in order. Query.Slop = 1; Query.Add(new Term("field", "one")); Query.Add(new Term("field", "two")); ScoreDoc[] hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "in order"); QueryUtils.Check(Random(), Query, Searcher, Similarity); // Ensures slop of 1 does not work for phrases out of order; // must be at least 2. Query = new PhraseQuery(); Query.Slop = 1; Query.Add(new Term("field", "two")); Query.Add(new Term("field", "one")); hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "reversed, slop not 2 or more"); QueryUtils.Check(Random(), Query, Searcher, Similarity); }
public virtual void TestMultiFieldBQofPQ4() { BooleanQuery query = new BooleanQuery(); PhraseQuery leftChild = new PhraseQuery(); leftChild.Slop = 1; leftChild.Add(new Term(FIELD, "w2")); leftChild.Add(new Term(FIELD, "w3")); query.Add(leftChild, BooleanClause.Occur.SHOULD); PhraseQuery rightChild = new PhraseQuery(); rightChild.Slop = 1; rightChild.Add(new Term(ALTFIELD, "w2")); rightChild.Add(new Term(ALTFIELD, "w3")); query.Add(rightChild, BooleanClause.Occur.SHOULD); Qtest(query, new int[] { 0, 1, 2, 3 }); }
public virtual void TestSlopScoring() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc2); Document doc3 = new Document(); doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc3); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(directory, true); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "firstname")); query.Add(new Term("field", "lastname")); query.Slop = System.Int32.MaxValue; ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // Make sure that those matches where the terms appear closer to // each other get a higher score: Assert.AreEqual(0.71, hits[0].Score, 0.01); Assert.AreEqual(0, hits[0].Doc); Assert.AreEqual(0.44, hits[1].Score, 0.01); Assert.AreEqual(1, hits[1].Doc); Assert.AreEqual(0.31, hits[2].Score, 0.01); Assert.AreEqual(2, hits[2].Doc); QueryUtils.Check(query, searcher); }
public virtual void TestSlop1() { SetUp(); // Ensures slop of 1 works with terms in order. query.SetSlop(1); query.Add(new Term("Field", "one")); query.Add(new Term("Field", "two")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "in order"); // Ensures slop of 1 does not work for phrases out of order; // must be at least 2. query = new PhraseQuery(); query.SetSlop(1); query.Add(new Term("Field", "two")); query.Add(new Term("Field", "one")); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "reversed, slop not 2 or more"); }
public virtual void TestDemo() { Analyzer analyzer = new MockAnalyzer(random()); // Store the index in memory: Directory directory = newDirectory(); // To store an index on disk, use this instead: // Directory directory = FSDirectory.open(new File("/tmp/testindex")); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, analyzer); Document doc = new Document(); string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; string text = "this is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = newSearcher(ireader); Assert.AreEqual(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, null, 1); Assert.AreEqual(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); Assert.AreEqual(text, hitDoc.get("fieldname")); } // Test simple phrase query PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.add(new Term("fieldname", "to")); phraseQuery.add(new Term("fieldname", "be")); Assert.AreEqual(1, isearcher.search(phraseQuery, null, 1).totalHits); ireader.close(); directory.close(); }
public virtual void TestSloppyPhraseVersusBooleanAnd() { Term t1 = RandomTerm(); Term t2 = null; // semantics differ from SpanNear: SloppyPhrase handles repeats, // so we must ensure t1 != t2 do { t2 = RandomTerm(); } while (t1.Equals(t2)); PhraseQuery q1 = new PhraseQuery(); q1.Add(t1); q1.Add(t2); q1.Slop = int.MaxValue; BooleanQuery q2 = new BooleanQuery(); q2.Add(new TermQuery(t1), Occur.MUST); q2.Add(new TermQuery(t2), Occur.MUST); AssertSameSet(q1, q2); }
public virtual int DoSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter) { int ret = 0; for (int i = 0; i < iter; i++) { int nClauses = Random.Next(maxClauses - 1) + 2; // min 2 clauses PhraseQuery q = new PhraseQuery(); for (int j = 0; j < nClauses; j++) { int tnum = Random.Next(termsInIndex); q.Add(new Term("f", char.ToString((char)(tnum + 'A'))), j); } q.Slop = termsInIndex; // this could be random too CountingHitCollector hc = new CountingHitCollector(); s.Search(q, hc); ret += hc.Sum; } return(ret); }
/// <summary> /// Adds the content sub query. /// </summary> /// <param name="query">The boolean query.</param> /// <param name="key">The field key.</param> /// <param name="value">The field value.</param> /// <param name="matchVariant">The match variant.</param> /// <param name="condition">The condition.</param> /// <param name="isFirst">if set to <c>true</c> [is first].</param> private void AddContentSubQuery(LuceneSearch.BooleanQuery query, string key, string value, MatchVariant matchVariant, QueryCondition condition) { if (matchVariant == MatchVariant.NotEquals) { query.Add(new LuceneSearch.TermQuery(new Term(key, value)), LuceneSearch.Occur.MUST_NOT); return; } LuceneSearch.Occur occurrence = this.GetOccur(condition); LuceneSearch.TermRangeQuery rangeQuery = this.GetRangeQuery(key, value, matchVariant); if (rangeQuery != null) { query.Add(rangeQuery, occurrence); return; } string[] keywords = value.Split(' '); if (keywords.Length > 1) { LuceneSearch.PhraseQuery phraseQuery = new Lucene.Net.Search.PhraseQuery(); foreach (string keyword in keywords) { phraseQuery.Add(new Term(key, keyword)); } query.Add(phraseQuery, occurrence); } else if (matchVariant == MatchVariant.Like) { query.Add(new LuceneSearch.WildcardQuery(new Term(key, value + "*")), occurrence); } else { query.Add(new LuceneSearch.TermQuery(new Term(key, value)), occurrence); } }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, BooleanClause.Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).totalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); q.Add(pq, BooleanClause.Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).totalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).totalHits); r.Close(); w.Close(); dir.Close(); }
/// <summary> /// 从索引库中检索关键字 /// </summary> private void SearchFromIndexData() { string indexPath = Context.Server.MapPath("~/IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach(string word in Common.SplitContent.SplitWords(Request.QueryString["SearchKey"])) { query.Add(new Term("content", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //展示数据实体对象集合 List<PZYM.Shop.Model.Books> bookResult = new List<PZYM.Shop.Model.Books>(); for(int i = 0; i < docs.Length; i++) { int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document PZYM.Shop.Model.Books book = new PZYM.Shop.Model.Books(); book.Title = doc.Get("title"); //book.ContentDescription = doc.Get("content");//未使用高亮 //搜索关键字高亮显示 使用盘古提供高亮插件 book.ContentDescription = Common.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content")); book.Id = Convert.ToInt32(doc.Get("id")); bookResult.Add(book); } Repeater1.DataSource = bookResult; Repeater1.DataBind(); }
public virtual void TestSlopWithHoles() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f = new Field("lyrics", "", customType); Document doc = new Document(); doc.Add(f); f.StringValue = "drug drug"; iw.AddDocument(doc); f.StringValue = "drug druggy drug"; iw.AddDocument(doc); f.StringValue = "drug druggy druggy drug"; iw.AddDocument(doc); f.StringValue = "drug druggy drug druggy drug"; iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher @is = NewSearcher(ir); PhraseQuery pq = new PhraseQuery(); // "drug the drug"~1 pq.Add(new Term("lyrics", "drug"), 1); pq.Add(new Term("lyrics", "drug"), 4); pq.Slop = 0; Assert.AreEqual(0, @is.Search(pq, 4).TotalHits); pq.Slop = 1; Assert.AreEqual(3, @is.Search(pq, 4).TotalHits); pq.Slop = 2; Assert.AreEqual(4, @is.Search(pq, 4).TotalHits); ir.Dispose(); dir.Dispose(); }
public virtual void TestPhraseQuery() { PhraseQuery q = new PhraseQuery(); q.Add(new Term("f", "b")); q.Add(new Term("f", "c")); CountingCollector c = new CountingCollector(TopScoreDocCollector.Create(10, true)); s.Search(q, null, c); int maxDocs = s.IndexReader.MaxDoc; Assert.AreEqual(maxDocs, c.DocCounts.Count); for (int i = 0; i < maxDocs; i++) { IDictionary <Query, float?> doc0 = c.DocCounts[i]; Assert.AreEqual(1, doc0.Count); Assert.AreEqual(2.0F, doc0[q], FLOAT_TOLERANCE); IDictionary <Query, float?> doc1 = c.DocCounts[++i]; Assert.AreEqual(1, doc1.Count); Assert.AreEqual(1.0F, doc1[q], FLOAT_TOLERANCE); } }
public void AutomaticallyClosesDanglingQuotes() { // arrange var queryText = "title:\"dot NET version:1.2.3"; var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("Title", "dot")); phraseQuery.Add(new Term("Title", "net")); phraseQuery.Add(new Term("Title", "version")); phraseQuery.Add(new Term("Title", "1")); phraseQuery.Add(new Term("Title", "2")); phraseQuery.Add(new Term("Title", "3")); var expected = new BooleanQuery { new BooleanClause(new BooleanQuery { new BooleanClause(new BooleanQuery { new BooleanClause(phraseQuery, Occur.SHOULD) }, Occur.SHOULD) }, Occur.MUST) }; // act var actual = NuGetQuery.MakeQuery(queryText); // assert Assert.Equal(expected, actual); }
public virtual void TestMulipleTerms() { query.Slop = 2; query.Add(new Term("field", "one")); query.Add(new Term("field", "three")); query.Add(new Term("field", "five")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "two total moves"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); query = new PhraseQuery(); query.Slop = 5; // it takes six moves to match this phrase query.Add(new Term("field", "five")); query.Add(new Term("field", "three")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "slop of 5 not close enough"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); query.Slop = 6; hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "slop of 6 just right"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); }
public virtual void TestExact() { // slop is zero by default query.Add(new Term("field", "four")); query.Add(new Term("field", "five")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "exact match"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); query = new PhraseQuery(); query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "reverse not exact"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); }
public virtual void TestIncreasingSloppiness3WithHoles() { Term t1 = RandomTerm(); Term t2 = RandomTerm(); Term t3 = RandomTerm(); int pos1 = 1 + Random.Next(3); int pos2 = pos1 + 1 + Random.Next(3); PhraseQuery q1 = new PhraseQuery(); q1.Add(t1); q1.Add(t2, pos1); q1.Add(t3, pos2); PhraseQuery q2 = new PhraseQuery(); q2.Add(t1); q2.Add(t2, pos1); q2.Add(t3, pos2); for (int i = 0; i < 10; i++) { q1.Slop = i; q2.Slop = i + 1; AssertSubsetOf(q1, q2); } }
private float CheckPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) { query.SetSlop(slop); RAMDirectory ramDir = new RAMDirectory(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); IndexWriter writer = new IndexWriter(ramDir, analyzer, MaxFieldLength.UNLIMITED); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(ramDir); TopDocs td = searcher.Search(query, null, 10); //System.out.println("slop: "+slop+" query: "+query+" doc: "+doc+" Expecting number of hits: "+expectedNumResults+" maxScore="+td.getMaxScore()); Assert.AreEqual(expectedNumResults, td.TotalHits, "slop: " + slop + " query: " + query + " doc: " + doc + " Wrong number of hits"); //QueryUtils.check(query,searcher); searcher.Close(); ramDir.Close(); return(td.GetMaxScore()); }
public virtual void TestMulipleTerms() { Query.Slop = 2; Query.Add(new Term("field", "one")); Query.Add(new Term("field", "three")); Query.Add(new Term("field", "five")); ScoreDoc[] hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "two total moves"); QueryUtils.Check(Random(), Query, Searcher, Similarity); Query = new PhraseQuery(); Query.Slop = 5; // it takes six moves to match this phrase Query.Add(new Term("field", "five")); Query.Add(new Term("field", "three")); Query.Add(new Term("field", "one")); hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "slop of 5 not close enough"); QueryUtils.Check(Random(), Query, Searcher, Similarity); Query.Slop = 6; hits = Searcher.Search(Query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "slop of 6 just right"); QueryUtils.Check(Random(), Query, Searcher, Similarity); }
public virtual void TestMulipleTerms() { SetUp(); query.SetSlop(2); query.Add(new Term("Field", "one")); query.Add(new Term("Field", "three")); query.Add(new Term("Field", "five")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "two total moves"); query = new PhraseQuery(); query.SetSlop(5); // it takes six moves to match this phrase query.Add(new Term("Field", "five")); query.Add(new Term("Field", "three")); query.Add(new Term("Field", "one")); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "slop of 5 not close enough"); query.SetSlop(6); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "slop of 6 just right"); }
/// <exception cref="ParseException">throw in overridden method to disallow /// </exception> protected internal virtual Query GetFieldQuery(System.String field, System.String queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source = analyzer.TokenStream(field, new System.IO.StringReader(queryText)); System.Collections.ArrayList v = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); Lucene.Net.Analysis.Token t; int positionCount = 0; bool severalTokensAtSamePosition = false; while (true) { try { t = source.Next(); } catch (System.IO.IOException e) { t = null; } if (t == null) break; v.Add(t); if (t.GetPositionIncrement() != 0) positionCount += t.GetPositionIncrement(); else severalTokensAtSamePosition = true; } try { source.Close(); } catch (System.IO.IOException e) { // ignore } if (v.Count == 0) return null; else if (v.Count == 1) { t = (Lucene.Net.Analysis.Token) v[0]; return new TermQuery(new Term(field, t.TermText())); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = new BooleanQuery(true); for (int i = 0; i < v.Count; i++) { t = (Lucene.Net.Analysis.Token) v[i]; TermQuery currentQuery = new TermQuery(new Term(field, t.TermText())); q.Add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } else { // phrase query: MultiPhraseQuery mpq = new MultiPhraseQuery(); System.Collections.ArrayList multiTerms = new System.Collections.ArrayList(); for (int i = 0; i < v.Count; i++) { t = (Lucene.Net.Analysis.Token) v[i]; if (t.GetPositionIncrement() == 1 && multiTerms.Count > 0) { mpq.Add((Term[]) multiTerms.ToArray(typeof(Term))); multiTerms.Clear(); } multiTerms.Add(new Term(field, t.TermText())); } mpq.Add((Term[]) multiTerms.ToArray(typeof(Term))); return mpq; } } else { PhraseQuery q = new PhraseQuery(); q.SetSlop(phraseSlop); for (int i = 0; i < v.Count; i++) { q.Add(new Term(field, ((Lucene.Net.Analysis.Token) v[i]).TermText())); } return q; } } }
public virtual void TestShingleAnalyzerWrapperPhraseQuery() { PhraseQuery q = new PhraseQuery(); TokenStream ts = analyzer.TokenStream("content", "this sentence"); try { int j = -1; IPositionIncrementAttribute posIncrAtt = ts.AddAttribute<IPositionIncrementAttribute>(); ICharTermAttribute termAtt = ts.AddAttribute<ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { j += posIncrAtt.PositionIncrement; string termText = termAtt.ToString(); q.Add(new Term("content", termText), j); } ts.End(); } finally { IOUtils.CloseWhileHandlingException(ts); } ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; int[] ranks = new int[] { 0 }; CompareRanks(hits, ranks); }
public virtual void TestPhraseQuery() { PhraseQuery q = new PhraseQuery(); q.Add(new Term("f", "b")); q.Add(new Term("f", "c")); CountingCollector c = new CountingCollector(TopScoreDocCollector.Create(10, true)); s.Search(q, null, c); int maxDocs = s.IndexReader.MaxDoc; Assert.AreEqual(maxDocs, c.DocCounts.Count); for (int i = 0; i < maxDocs; i++) { IDictionary<Query, float?> doc0 = c.DocCounts[i]; Assert.AreEqual(1, doc0.Count); Assert.AreEqual(2.0F, doc0[q], FLOAT_TOLERANCE); IDictionary<Query, float?> doc1 = c.DocCounts[++i]; Assert.AreEqual(1, doc1.Count); Assert.AreEqual(1.0F, doc1[q], FLOAT_TOLERANCE); } }
public void TestShingleAnalyzerWrapperPhraseQuery() { Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 2); Searcher = SetUpSearcher(analyzer); var q = new PhraseQuery(); var ts = analyzer.TokenStream("content", new StringReader("this sentence")); var j = -1; var posIncrAtt = ts.AddAttribute<IPositionIncrementAttribute>(); var termAtt = ts.AddAttribute<ITermAttribute>(); while (ts.IncrementToken()) { j += posIncrAtt.PositionIncrement; var termText = termAtt.Term; q.Add(new Term("content", termText), j); } var hits = Searcher.Search(q, null, 1000).ScoreDocs; var ranks = new[] {0}; CompareRanks(hits, ranks); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); Analyzer analyzer = new AnonymousClassAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED)); IFieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED); doc.Add(repeatedField); doc.Add(new Field("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); query = new PhraseQuery(); }
public virtual void TestPalyndrome3() { // search on non palyndrome, find phrase with no slop, using exact phrase scorer query.Slop = 0; // to use exact phrase scorer query.Add(new Term("field", "one")); query.Add(new Term("field", "two")); query.Add(new Term("field", "three")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer"); float score0 = hits[0].Score; //System.out.println("(exact) field: one two three: "+score0); QueryUtils.Check(query, searcher); // search on non palyndrome, find phrase with slop 3, though no slop required here. query.Slop = 4; // to use sloppy scorer hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score1 = hits[0].Score; //System.out.println("(sloppy) field: one two three: "+score1); Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter"); QueryUtils.Check(query, searcher); // search ordered in palyndrome, find it twice query = new PhraseQuery(); query.Slop = 4; // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "one")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "three")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score2 = hits[0].Score; //System.out.println("palindrome: one two three: "+score2); QueryUtils.Check(query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue(score1+SCORE_COMP_THRESH<score2,"ordered scores higher in palindrome"); // search reveresed in palyndrome, find it twice query = new PhraseQuery(); query.Slop = 4; // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "three")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "one")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score3 = hits[0].Score; //System.out.println("palindrome: three two one: "+score3); QueryUtils.Check(query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue(score1+SCORE_COMP_THRESH<score3,"reversed scores higher in palindrome"); //Assert.AreEqual(score2, score3, SCORE_COMP_THRESH, "ordered or reversed does not matter"); }
public virtual void TestNonExistingPhrase() { // phrase without repetitions that exists in 2 docs query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "found")); query.Slop = 2; // would be found this way ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "phrase without repetitions exists in 2 docs"); QueryUtils.Check(query, searcher); // phrase with repetitions that exists in 2 docs query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Slop = 1; // would be found hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "phrase with repetitions exists in two docs"); QueryUtils.Check(query, searcher); // phrase I with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "phrase")); query.Slop = 1000; // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); // phrase II with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Slop = 1000; // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); }
public virtual void TestPalyndrome3() { // search on non palyndrome, find phrase with no slop, using exact phrase scorer query.Slop = 0; // to use exact phrase scorer query.Add(new Term("field", "one")); query.Add(new Term("field", "two")); query.Add(new Term("field", "three")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer"); float score0 = hits[0].Score; //System.out.println("(exact) field: one two three: "+score0); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); // just make sure no exc: searcher.Explain(query, 0); // search on non palyndrome, find phrase with slop 3, though no slop required here. query.Slop = 4; // to use sloppy scorer hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score1 = hits[0].Score; //System.out.println("(sloppy) field: one two three: "+score1); Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter"); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); // search ordered in palyndrome, find it twice query = new PhraseQuery(); query.Slop = 4; // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "one")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "three")); hits = searcher.Search(query, null, 1000).ScoreDocs; // just make sure no exc: searcher.Explain(query, 0); Assert.AreEqual(1, hits.Length, "just sloppy enough"); //float score2 = hits[0].Score; //System.out.println("palindrome: one two three: "+score2); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2); // search reveresed in palyndrome, find it twice query = new PhraseQuery(); query.Slop = 4; // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "three")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "one")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); //float score3 = hits[0].Score; //System.out.println("palindrome: three two one: "+score3); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3); //Assert.AreEqual("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH); }
public virtual void TestRandomPhrases() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy())); IList <IList <string> > docs = new List <IList <string> >(); Documents.Document d = new Documents.Document(); Field f = NewTextField("f", "", Field.Store.NO); d.Add(f); Random r = Random; int NUM_DOCS = AtLeast(10); for (int i = 0; i < NUM_DOCS; i++) { // must be > 4096 so it spans multiple chunks int termCount = TestUtil.NextInt32(Random, 4097, 8200); IList <string> doc = new List <string>(); StringBuilder sb = new StringBuilder(); while (doc.Count < termCount) { if (r.Next(5) == 1 || docs.Count == 0) { // make new non-empty-string term string term; while (true) { term = TestUtil.RandomUnicodeString(r); if (term.Length > 0) { break; } } Exception priorException = null; // LUCENENET: No need to cast to IOExcpetion TokenStream ts = analyzer.GetTokenStream("ignore", new StringReader(term)); try { ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { string text = termAttr.ToString(); doc.Add(text); sb.Append(text).Append(' '); } ts.End(); } catch (Exception e) when(e.IsIOException()) { priorException = e; } finally { IOUtils.DisposeWhileHandlingException(priorException, ts); } } else { // pick existing sub-phrase IList <string> lastDoc = docs[r.Next(docs.Count)]; int len = TestUtil.NextInt32(r, 1, 10); int start = r.Next(lastDoc.Count - len); for (int k = start; k < start + len; k++) { string t = lastDoc[k]; doc.Add(t); sb.Append(t).Append(' '); } } } docs.Add(doc); f.SetStringValue(sb.ToString()); w.AddDocument(d); } IndexReader reader = w.GetReader(); IndexSearcher s = NewSearcher(reader); w.Dispose(); // now search int num = AtLeast(10); for (int i = 0; i < num; i++) { int docID = r.Next(docs.Count); IList <string> doc = docs[docID]; int numTerm = TestUtil.NextInt32(r, 2, 20); int start = r.Next(doc.Count - numTerm); PhraseQuery pq = new PhraseQuery(); StringBuilder sb = new StringBuilder(); for (int t = start; t < start + numTerm; t++) { pq.Add(new Term("f", doc[t])); sb.Append(doc[t]).Append(' '); } TopDocs hits = s.Search(pq, NUM_DOCS); bool found = false; for (int j = 0; j < hits.ScoreDocs.Length; j++) { if (hits.ScoreDocs[j].Doc == docID) { found = true; break; } } Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start); } reader.Dispose(); dir.Dispose(); }
private void InitBlock(PhraseQuery enclosingInstance) { this.enclosingInstance = enclosingInstance; }
public virtual void TestPhraseQueryInConjunctionScorer() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("source", "marketing")); phraseQuery.Add(new Term("source", "info")); ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(phraseQuery, searcher); TermQuery termQuery = new TermQuery(new Term("contents", "foobar")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(termQuery, searcher); searcher.Close(); writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); termQuery = new TermQuery(new Term("contents", "woo")); phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("contents", "map")); phraseQuery.Add(new Term("contents", "entry")); hits = searcher.Search(termQuery, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(phraseQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); booleanQuery = new BooleanQuery(); booleanQuery.Add(phraseQuery, Occur.MUST); booleanQuery.Add(termQuery, Occur.MUST); hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length); QueryUtils.Check(booleanQuery, searcher); searcher.Close(); directory.Close(); }
public PhraseWeight(PhraseQuery enclosingInstance, Searcher searcher) { InitBlock(enclosingInstance); this.similarity = Enclosing_Instance.GetSimilarity(searcher); idf = similarity.Idf(Enclosing_Instance.terms, searcher); }