public virtual void TestMulipleTerms() { query.SetSlop(2); query.Add(new Term("field", "one")); query.Add(new Term("field", "three")); query.Add(new Term("field", "five")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "two total moves"); QueryUtils.Check(query, searcher); query = new PhraseQuery(); query.SetSlop(5); // it takes six moves to match this phrase query.Add(new Term("field", "five")); query.Add(new Term("field", "three")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "slop of 5 not close enough"); QueryUtils.Check(query, searcher); query.SetSlop(6); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "slop of 6 just right"); QueryUtils.Check(query, searcher); }
public virtual void TestNotCloseEnough() { query.SetSlop(2); query.Add(new Term("Field", "one")); query.Add(new Term("Field", "five")); Hits hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); }
public void ProcessRequest(HttpContext context) { context.Response.ContentType = "text/plain"; string searchKey = context.Request["wd"]; string indexPath = context.Server.MapPath("../IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach (string word in Picture.Utility.SplitContent.SplitWords(searchKey)) { query.Add(new Term("tag", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0,10).scoreDocs; //展示数据实体对象集合 var tagModels = new List<Picture.Model.TagModel>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document Picture.Model.TagModel tag = new Picture.Model.TagModel(); //picture.ImgSummary = doc.Get("summary"); tag.TagName= Picture.Utility.SplitContent.HightLight(searchKey, doc.Get("tag")); //book.ContentDescription = doc.Get("content");//未使用高亮 //搜索关键字高亮显示 使用盘古提供高亮插件 //book.ContentDescription = Picture.Utility.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content")); tag.TId = Convert.ToInt32(doc.Get("id")); tagModels.Add(tag); } SearchPreviewResult result = new SearchPreviewResult() { q=searchKey, p=false }; foreach (var item in tagModels) { result.s.Add(item.TagName); } System.Web.Script.Serialization.JavaScriptSerializer jss = new System.Web.Script.Serialization.JavaScriptSerializer(); context.Response.Write(jss.Serialize(result)); }
public virtual void TestPalyndrome3() { // search on non palyndrome, find phrase with no slop, using exact phrase scorer query.SetSlop(0); // to use exact phrase scorer query.Add(new Term("field", "one")); query.Add(new Term("field", "two")); query.Add(new Term("field", "three")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer"); float score0 = hits[0].score; //System.out.println("(exact) field: one two three: "+score0); QueryUtils.Check(query, searcher); // search on non palyndrome, find phrase with slop 3, though no slop required here. query.SetSlop(4); // to use sloppy scorer hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score1 = hits[0].score; //System.out.println("(sloppy) field: one two three: "+score1); Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter"); QueryUtils.Check(query, searcher); // search ordered in palyndrome, find it twice query = new PhraseQuery(); query.SetSlop(4); // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "one")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "three")); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score2 = hits[0].score; //System.out.println("palindrome: one two three: "+score2); QueryUtils.Check(query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue(score1+SCORE_COMP_THRESH<score2,"ordered scores higher in palindrome"); // search reveresed in palyndrome, find it twice query = new PhraseQuery(); query.SetSlop(4); // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "three")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "one")); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); float score3 = hits[0].score; //System.out.println("palindrome: three two one: "+score3); QueryUtils.Check(query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //Assert.IsTrue(score1+SCORE_COMP_THRESH<score3,"reversed scores higher in palindrome"); //Assert.AreEqual(score2, score3, SCORE_COMP_THRESH, "ordered or reversed does not matter"); }
/// <summary> /// 搜索 /// </summary> protected void SearchContent(string kw) { string indexPath = @"D:\lucenedir"; kw = kw.ToLower();//默认情况下盘古分词区分大小写,需转换成小写进行搜索 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery queryMsg = new PhraseQuery(); foreach (string word in Common.WebCommon.PanGuSplit(kw))//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” { queryMsg.Add(new Term("msg", word));//根据文章内容进行搜索 } //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. //query.Add(new Term("body", "大学生")); queryMsg.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) PhraseQuery queryTitle = new PhraseQuery(); foreach (string word in Common.WebCommon.PanGuSplit(kw)) { queryTitle.Add(new Term("title", word)); } queryTitle.SetSlop(100); BooleanQuery query = new BooleanQuery(); query.Add(queryMsg, BooleanClause.Occur.SHOULD); query.Add(queryTitle, BooleanClause.Occur.SHOULD); //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到 300(从300开始)到320(结束)的文档内容.可以用来实现分页功能 List<SearchResult> list = new List<SearchResult>(); for (int i = 0; i < docs.Length; i++) { //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//找到文档id对应的文档详细信息 SearchResult result = new SearchResult(); result.ContentDescription = WebCommon.Highlight(kw,WebCommon.CutString(doc.Get("msg"),150));//分词高亮显示 result.Title = doc.Get("title"); result.Id = Convert.ToInt32(doc.Get("id")); result.PublishDate = Convert.ToDateTime(doc.Get("PublishDate")); result.ISBN = doc.Get("ISBN"); result.Author = doc.Get("Author"); result.UnitPrice = Convert.ToDecimal(doc.Get("UnitPrice")); list.Add(result); } this.BookListRepeater.DataSource = list; this.BookListRepeater.DataBind(); AddKeyWord(kw); }
public virtual void TestSimilarity_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); writer.SetSimilarity(new SimpleSimilarity()); Document d1 = new Document(); d1.Add(Field.Text("Field", "a c")); Document d2 = new Document(); d2.Add(Field.Text("Field", "a b c")); writer.AddDocument(d1); writer.AddDocument(d2); writer.Optimize(); writer.Close(); float[] scores = new float[4]; Searcher searcher = new IndexSearcher(store); searcher.SetSimilarity(new SimpleSimilarity()); Term a = new Term("Field", "a"); Term b = new Term("Field", "b"); Term c = new Term("Field", "c"); searcher.Search(new TermQuery(b), new AnonymousClassHitCollector(this)); BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(a), false, false); bq.Add(new TermQuery(b), false, false); //System.out.println(bq.toString("Field")); searcher.Search(bq, new AnonymousClassHitCollector1(this)); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); //System.out.println(pq.toString("Field")); searcher.Search(pq, new AnonymousClassHitCollector2(this)); pq.SetSlop(2); //System.out.println(pq.toString("Field")); searcher.Search(pq, new AnonymousClassHitCollector3(this)); }
public virtual void TestMulipleTerms() { SetUp(); query.SetSlop(2); query.Add(new Term("Field", "one")); query.Add(new Term("Field", "three")); query.Add(new Term("Field", "five")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "two total moves"); query = new PhraseQuery(); query.SetSlop(5); // it takes six moves to match this phrase query.Add(new Term("Field", "five")); query.Add(new Term("Field", "three")); query.Add(new Term("Field", "one")); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "slop of 5 not close enough"); query.SetSlop(6); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "slop of 6 just right"); }
public virtual void TestSimilarity_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(new SimpleSimilarity()); Document d1 = new Document(); d1.Add(new Field("field", "a c", Field.Store.YES, Field.Index.ANALYZED)); Document d2 = new Document(); d2.Add(new Field("field", "a b c", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); writer.AddDocument(d2); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(store); searcher.SetSimilarity(new SimpleSimilarity()); Term a = new Term("field", "a"); Term b = new Term("field", "b"); Term c = new Term("field", "c"); searcher.Search(new TermQuery(b), new AnonymousClassCollector(this)); BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(a), BooleanClause.Occur.SHOULD); bq.Add(new TermQuery(b), BooleanClause.Occur.SHOULD); //System.out.println(bq.toString("field")); searcher.Search(bq, new AnonymousClassCollector1(this)); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); //System.out.println(pq.toString("field")); searcher.Search(pq, new AnonymousClassCollector2(this)); pq.SetSlop(2); //System.out.println(pq.toString("field")); searcher.Search(pq, new AnonymousClassCollector3(this)); }
public virtual void TestNonExistingPhrase() { // phrase without repetitions that exists in 2 docs query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "found")); query.SetSlop(2); // would be found this way ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length, "phrase without repetitions exists in 2 docs"); QueryUtils.Check(query, searcher); // phrase with repetitions that exists in 2 docs query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.SetSlop(1); // would be found hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(2, hits.Length, "phrase with repetitions exists in two docs"); QueryUtils.Check(query, searcher); // phrase I with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "phrase")); query.SetSlop(1000); // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); // phrase II with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.SetSlop(1000); // would not be found no matter how high the slop is hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); }
public virtual void TestOrderDoesntMatter() { SetUp(); query.SetSlop(2); // must be at least two for reverse order match query.Add(new Term("Field", "two")); query.Add(new Term("Field", "one")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "just sloppy enough"); query = new PhraseQuery(); query.SetSlop(2); query.Add(new Term("Field", "three")); query.Add(new Term("Field", "one")); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "not sloppy enough"); }
public virtual void TestOrderDoesntMatter() { query.SetSlop(2); // must be at least two for reverse order match query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "just sloppy enough"); QueryUtils.Check(query, searcher); query = new PhraseQuery(); query.SetSlop(2); query.Add(new Term("field", "three")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "not sloppy enough"); QueryUtils.Check(query, searcher); }
public virtual void TestSlopScoring() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc2); Document doc3 = new Document(); doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc3); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(directory); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "firstname")); query.Add(new Term("field", "lastname")); query.SetSlop(System.Int32.MaxValue); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); // Make sure that those matches where the terms appear closer to // each other get a higher score: Assert.AreEqual(0.71, hits[0].score, 0.01); Assert.AreEqual(0, hits[0].doc); Assert.AreEqual(0.44, hits[1].score, 0.01); Assert.AreEqual(1, hits[1].doc); Assert.AreEqual(0.31, hits[2].score, 0.01); Assert.AreEqual(2, hits[2].doc); QueryUtils.Check(query, searcher); }
public virtual void TestSlop1() { SetUp(); // Ensures slop of 1 works with terms in order. query.SetSlop(1); query.Add(new Term("Field", "one")); query.Add(new Term("Field", "two")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "in order"); // Ensures slop of 1 does not work for phrases out of order; // must be at least 2. query = new PhraseQuery(); query.SetSlop(1); query.Add(new Term("Field", "two")); query.Add(new Term("Field", "one")); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "reversed, slop not 2 or more"); }
public virtual void TestSlop1() { // Ensures slop of 1 works with terms in order. query.SetSlop(1); query.Add(new Term("field", "one")); query.Add(new Term("field", "two")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "in order"); QueryUtils.Check(query, searcher); // Ensures slop of 1 does not work for phrases out of order; // must be at least 2. query = new PhraseQuery(); query.SetSlop(1); query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "reversed, slop not 2 or more"); QueryUtils.Check(query, searcher); }
public virtual int DoSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter) { int ret = 0; for (int i = 0; i < iter; i++) { int nClauses = r.Next(maxClauses - 1) + 2; // min 2 clauses PhraseQuery q = new PhraseQuery(); for (int j = 0; j < nClauses; j++) { int tnum = r.Next(termsInIndex); q.Add(new Term("f", System.Convert.ToString((char)(tnum + 'A'))), j); } q.SetSlop(termsInIndex); // this could be random too CountingHitCollector hc = new CountingHitCollector(); s.Search(q, hc); ret += hc.GetSum(); } return(ret); }
/// <summary> /// 从索引库中检索关键字 /// </summary> private void SearchFromIndexData() { string indexPath = Context.Server.MapPath("~/IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach(string word in Common.SplitContent.SplitWords(Request.QueryString["SearchKey"])) { query.Add(new Term("content", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //展示数据实体对象集合 List<PZYM.Shop.Model.Books> bookResult = new List<PZYM.Shop.Model.Books>(); for(int i = 0; i < docs.Length; i++) { int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document PZYM.Shop.Model.Books book = new PZYM.Shop.Model.Books(); book.Title = doc.Get("title"); //book.ContentDescription = doc.Get("content");//未使用高亮 //搜索关键字高亮显示 使用盘古提供高亮插件 book.ContentDescription = Common.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content")); book.Id = Convert.ToInt32(doc.Get("id")); bookResult.Add(book); } Repeater1.DataSource = bookResult; Repeater1.DataBind(); }
private float CheckPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) { query.SetSlop(slop); RAMDirectory ramDir = new RAMDirectory(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); IndexWriter writer = new IndexWriter(ramDir, analyzer, MaxFieldLength.UNLIMITED); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(ramDir); TopDocs td = searcher.Search(query, null, 10); //System.out.println("slop: "+slop+" query: "+query+" doc: "+doc+" Expecting number of hits: "+expectedNumResults+" maxScore="+td.getMaxScore()); Assert.AreEqual(expectedNumResults, td.TotalHits, "slop: " + slop + " query: " + query + " doc: " + doc + " Wrong number of hits"); //QueryUtils.check(query,searcher); searcher.Close(); ramDir.Close(); return(td.GetMaxScore()); }
/// <summary> /// 从索引库中检索关键字 /// </summary> private static List<CommentSet> SearchFromIndexData(string IndexSavePath, string searchKey) { string indexPath = IndexSavePath; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach (string word in Picture.Utility.SplitContent.SplitWords(searchKey)) { query.Add(new Term("content", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //展示数据实体对象集合 var commontModels = new List<CommentSet>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document CommentSet commont = new CommentSet(); commont.Content = doc.Get("content"); //book.ContentDescription = doc.Get("content");//未使用高亮 //搜索关键字高亮显示 使用盘古提供高亮插件 //book.ContentDescription = Picture.Utility.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content")); commont.Id = Convert.ToInt32(doc.Get("id")); commontModels.Add(commont); } return commontModels; }
protected Query Pq(float boost, int slop, String field, params String[] texts) { PhraseQuery query = new PhraseQuery(); foreach (String text in texts) { query.Add(new Term(field, text)); } query.SetBoost(boost); query.SetSlop(slop); return query; }
public void CompositeTermPhraseWildcardTests() { BooleanQuery originalQuery = new BooleanQuery(); Term term = new Term("_name", "value"); TermQuery termQuery = new TermQuery(term); originalQuery.Add(termQuery, BooleanClause.Occur.MUST); PhraseQuery phraseQuery = new PhraseQuery(); Term phraseTerm = new Term("_name", "phrase"); phraseQuery.SetSlop(2); phraseQuery.Add(phraseTerm); originalQuery.Add(phraseQuery, BooleanClause.Occur.MUST); Term wildcardTerm = new Term("_name", "*wildcard*"); WildcardQuery wildcardQuery = new WildcardQuery(wildcardTerm); originalQuery.Add(wildcardQuery, BooleanClause.Occur.SHOULD); string queryString = originalQuery.ToString(); QueryBuilder builder = new QueryBuilder(); builder.Setup ( x => x.Term("_name", "value"), x => x.Phrase(2).AddTerm("_name", "phrase"), x => x.WildCard("_name", "*wildcard*", BooleanClause.Occur.SHOULD) ); Query replacementQuery = builder.Build(); string newQueryString = replacementQuery.ToString(); Assert.AreEqual(queryString, newQueryString); Console.Write(queryString); }
public void BoostedCaseInsensitivePhrase() { BooleanQuery originalQuery = new BooleanQuery(); Term term = new Term("_name", "value"); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.SetSlop(2); phraseQuery.Add(term); phraseQuery.SetBoost(10); originalQuery.Add(phraseQuery, BooleanClause.Occur.MUST); string queryString = originalQuery.ToString(); QueryBuilder builder = new QueryBuilder(); builder.Setup(x => x.Phrase(2, 10).AddTerm("_name", "Value")); Query replacementQuery = builder.Build(); string newQueryString = replacementQuery.ToString(); Assert.AreEqual(queryString, newQueryString); Console.Write(queryString); }
/* * Check if src and dest have overlapped part and if it is, create PhraseQueries and add expandQueries. * * ex1) src="a b", dest="c d" => no overlap * ex2) src="a b", dest="a b c" => no overlap * ex3) src="a b", dest="b c" => overlap; expandQueries={"a b c"} * ex4) src="a b c", dest="b c d" => overlap; expandQueries={"a b c d"} * ex5) src="a b c", dest="b c" => no overlap * ex6) src="a b c", dest="b" => no overlap * ex7) src="a a a a", dest="a a a" => overlap; * expandQueries={"a a a a a","a a a a a a"} * ex8) src="a b c d", dest="b c" => no overlap */ private void CheckOverlap(Dictionary<Query,Query> expandQueries, Term[] src, Term[] dest, int slop, float boost) { // beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms // is guaranteed in flatten() method (if PhraseQuery has only one term, flatten() // converts PhraseQuery to TermQuery) for (int i = 1; i < src.Length; i++) { bool overlap = true; for (int j = i; j < src.Length; j++) { if ((j - i) < dest.Length && !src[j].Text().Equals(dest[j - i].Text())) { overlap = false; break; } } if (overlap && src.Length - i < dest.Length) { PhraseQuery pq = new PhraseQuery(); foreach (Term srcTerm in src) pq.Add(srcTerm); for (int k = src.Length - i; k < dest.Length; k++) { pq.Add(new Term(src[0].Field(), dest[k].Text())); } pq.SetSlop(slop); pq.SetBoost(boost); if (!expandQueries.ContainsKey(pq)) expandQueries.Add(pq,pq); } } }
public virtual void TestNotCloseEnough() { query.SetSlop(2); query.Add(new Term("field", "one")); query.Add(new Term("field", "five")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); QueryUtils.Check(query, searcher); }
private Query CreateStringValueQuery(QueryFieldValue value, FieldInfo currentField) { switch (value.Token) { case SnLucLexer.Token.Number: case SnLucLexer.Token.String: if(value.StringValue == ContentQuery.EmptyText) return new TermQuery(new Term(currentField.Name, value.StringValue)); if (value.StringValue == ContentQuery.EmptyInnerQueryText) return new TermQuery(new Term("Id", NumericUtils.IntToPrefixCoded(0))); var words = GetAnalyzedText(currentField.Name, value.StringValue); if (words.Length == 0) words = new String[] { String.Empty }; //return null; if (words.Length == 1) { var term = new Term(currentField.Name, words[0]); if(value.FuzzyValue == null) return new TermQuery(term); return new FuzzyQuery(term, Convert.ToSingle(value.FuzzyValue)); } var phraseQuery = new PhraseQuery(); foreach(var word in words) phraseQuery.Add(new Term(currentField.Name, word)); if (value.FuzzyValue != null) { var slop = Convert.ToInt32(value.FuzzyValue.Value); phraseQuery.SetSlop(slop); } return phraseQuery; case SnLucLexer.Token.WildcardString: if (!value.StringValue.EndsWith("*")) return new WildcardQuery(new Term(currentField.Name, value.StringValue)); var s = value.StringValue.TrimEnd('*'); if (s.Contains('?') || s.Contains('*')) return new WildcardQuery(new Term(currentField.Name, value.StringValue)); return new PrefixQuery(new Term(currentField.Name, s)); default: throw new NotImplementedException("CreateValueQuery with Token: " + value.Token); } }
public virtual int DoSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter) { int ret = 0; for (int i = 0; i < iter; i++) { int nClauses = r.Next(maxClauses - 1) + 2; // min 2 clauses PhraseQuery q = new PhraseQuery(); for (int j = 0; j < nClauses; j++) { int tnum = r.Next(termsInIndex); q.Add(new Term("f", System.Convert.ToString((char)(tnum + 'A'))), j); } q.SetSlop(termsInIndex); // this could be random too CountingHitCollector hc = new CountingHitCollector(); s.Search(q, hc); ret += hc.GetSum(); } return ret; }
public void PhraseQueryText(string[] frase, string textoParaProcurar, int distanciaEntrePalavras) { const string texto = "texto"; using (var directory = new RAMDirectory()) { using (var indexWriter = new IndexWriter(directory, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED)) { var doc = new Document(); doc.Add(new Field(texto, textoParaProcurar, Field.Store.YES, Field.Index.ANALYZED)); indexWriter.AddDocument(doc); } using (var searcher = new IndexSearcher(directory, true)) { var phraseQuery = new PhraseQuery(); phraseQuery.SetSlop(distanciaEntrePalavras); foreach (var palavra in frase) phraseQuery.Add(new Term(texto, palavra)); var matches = searcher.Search(phraseQuery, 10); var encontrou = matches.TotalHits > 0; var textoResultado = NaoEncontrou(textoParaProcurar, distanciaEntrePalavras, frase); Assert.IsTrue(encontrou, textoResultado); } } }
public virtual void TestPalyndrome3() { // search on non palyndrome, find phrase with no slop, using exact phrase scorer query.SetSlop(0); // to use exact phrase scorer query.Add(new Term("field", "one")); query.Add(new Term("field", "two")); query.Add(new Term("field", "three")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "phrase found with exact phrase scorer"); float score0 = hits.Score(0); //System.out.println("(exact) field: one two three: "+score0); QueryUtils.Check(query, searcher); // search on non palyndrome, find phrase with slop 3, though no slop required here. query.SetSlop(4); // to use sloppy scorer hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "just sloppy enough"); float score1 = hits.Score(0); //System.out.println("(sloppy) field: one two three: "+score1); Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter"); QueryUtils.Check(query, searcher); // search ordered in palyndrome, find it twice query = new PhraseQuery(); query.SetSlop(4); // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "one")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "three")); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "just sloppy enough"); float score2 = hits.Score(0); //System.out.println("palindrome: one two three: "+score2); QueryUtils.Check(query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2); // search reveresed in palyndrome, find it twice query = new PhraseQuery(); query.SetSlop(4); // must be at least four for both ordered and reversed to match query.Add(new Term("palindrome", "three")); query.Add(new Term("palindrome", "two")); query.Add(new Term("palindrome", "one")); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "just sloppy enough"); float score3 = hits.Score(0); //System.out.println("palindrome: three two one: "+score3); QueryUtils.Check(query, searcher); //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). //assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3); //assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH); }
public virtual void TestSlopScoring() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); Lucene.Net.Documents.Document doc2 = new Lucene.Net.Documents.Document(); doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc2); Lucene.Net.Documents.Document doc3 = new Lucene.Net.Documents.Document(); doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc3); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(directory); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "firstname")); query.Add(new Term("field", "lastname")); query.SetSlop(System.Int32.MaxValue); Hits hits = searcher.Search(query); Assert.AreEqual(3, hits.Length()); // Make sure that those matches where the terms appear closer to // each other get a higher score: Assert.AreEqual(0.71, hits.Score(0), 0.01); Assert.AreEqual(0, hits.Id(0)); Assert.AreEqual(0.44, hits.Score(1), 0.01); Assert.AreEqual(1, hits.Id(1)); Assert.AreEqual(0.31, hits.Score(2), 0.01); Assert.AreEqual(2, hits.Id(2)); QueryUtils.Check(query, searcher); }
public virtual void TestOrderDoesntMatter() { query.SetSlop(2); // must be at least two for reverse order match query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "just sloppy enough"); QueryUtils.Check(query, searcher); query = new PhraseQuery(); query.SetSlop(2); query.Add(new Term("field", "three")); query.Add(new Term("field", "one")); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "not sloppy enough"); QueryUtils.Check(query, searcher); }
private float CheckPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) { query.SetSlop(slop); RAMDirectory ramDir = new RAMDirectory(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); IndexWriter writer = new IndexWriter(ramDir, analyzer, MaxFieldLength.UNLIMITED); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(ramDir); TopDocs td = searcher.Search(query, null, 10); //System.out.println("slop: "+slop+" query: "+query+" doc: "+doc+" Expecting number of hits: "+expectedNumResults+" maxScore="+td.getMaxScore()); Assert.AreEqual(expectedNumResults, td.totalHits, "slop: " + slop + " query: " + query + " doc: " + doc + " Wrong number of hits"); //QueryUtils.check(query,searcher); searcher.Close(); ramDir.Close(); return td.GetMaxScore(); }
/// <summary> /// 添加PhraseQuery /// </summary> /// <param name="fieldName">待搜索的字段名称</param> /// <param name="phrase">待搜索的短语</param> /// <param name="boostLevel">权重级别</param> /// <param name="asFilter">是否作为过滤条件</param> /// <returns>LuceneSearchBuilder</returns> public LuceneSearchBuilder WithPhrase(string fieldName, string phrase, BoostLevel? boostLevel = null, bool asFilter = false) { string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase); if (string.IsNullOrEmpty(filteredPhrase)) return this; if (filteredPhrase.Length == 1) return WithField(fieldName, filteredPhrase, false, boostLevel, asFilter); string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase); PhraseQuery phraseQuery = new PhraseQuery(); foreach (var nameSegment in nameSegments) phraseQuery.Add(new Term(fieldName, nameSegment)); phraseQuery.SetSlop(PhraseQuerySlop); if (boostLevel.HasValue) SetBoost(phraseQuery, boostLevel.Value); if (asFilter) filters.Add(new BooleanClause(phraseQuery, BooleanClause.Occur.MUST)); else clauses.Add(new BooleanClause(phraseQuery, BooleanClause.Occur.MUST)); return this; }
/// <summary> /// Sets up and adds a phrase query object allowing the search for an explcit term in the field /// To add terms, use the AddTerm() query extension /// </summary> /// <param name="occur">Whether it must, must not or should occur in the field</param> /// <param name="slop">The allowed distance between the terms</param> /// <param name="boost">A boost multiplier (1 is default / normal).</param> /// <param name="key">The dictionary key to allow reference beyond the initial scope</param> /// <returns>The generated phrase query object</returns> public virtual PhraseQuery Phrase(int slop, float? boost = null, BooleanClause.Occur occur = null, string key = null) { PhraseQuery query = new PhraseQuery(); SetBoostValue(query, boost); query.SetSlop(slop); Add(query, occur, key); return query; }
/// <summary> /// 批量添加PhraseQuery /// </summary> /// <param name="phrase">待搜索的短语</param> /// <param name="fieldNameAndBoosts">字段名称及权重集合</param> /// <param name="occur">搜索条件间的关系</param> /// <param name="asFilter">是否作为过滤器条件</param> /// <returns></returns> public LuceneSearchBuilder WithPhrases(Dictionary<string, BoostLevel> fieldNameAndBoosts, string phrase, BooleanClause.Occur occur, bool asFilter = false) { string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase); if (string.IsNullOrEmpty(filteredPhrase)) return this; string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase); if (nameSegments.Length == 1) { return WithFields(fieldNameAndBoosts, nameSegments[0], false, occur, asFilter); } else { BooleanQuery query = new BooleanQuery(); foreach (var fieldNameAndBoost in fieldNameAndBoosts) { PhraseQuery phraseQuery = new PhraseQuery(); foreach (var nameSegment in nameSegments) phraseQuery.Add(new Term(fieldNameAndBoost.Key, nameSegment)); phraseQuery.SetSlop(PhraseQuerySlop); SetBoost(phraseQuery, fieldNameAndBoost.Value); query.Add(phraseQuery, occur); } if (asFilter) filters.Add(new BooleanClause(query, BooleanClause.Occur.MUST)); else clauses.Add(new BooleanClause(query, BooleanClause.Occur.MUST)); return this; } }
//搜索 protected void SearchContent() { string indexPath = @"D:\lucenedir"; string kw = Request.Form["txtContent"]; kw = kw.ToLower();//默认情况下盘古分词区分大小写,需转换成小写进行搜索 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); foreach (string word in Common.WebCommon.PanGuSplit(kw))//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” { query.Add(new Term("msg", word));//根据文章内容进行搜索 } //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. //query.Add(new Term("body", "大学生")); query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到 300(从300开始)到320(结束)的文档内容.可以用来实现分页功能 List<SearchResult> list = new List<SearchResult>(); for (int i = 0; i < docs.Length; i++) { //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//找到文档id对应的文档详细信息 SearchResult result = new SearchResult(); //result.Msg = Common.WebCommon.Highlight(kw,doc.Get("msg"));//分词高亮显示 //result.Title = doc.Get("title"); //result.Url = "/BookDeatail.apsx?id=" + doc.Get("id"); list.Add(result); } this.SearchRepeater.DataSource = list; this.SearchRepeater.DataBind(); }
/// <summary> /// 批量添加PhraseQuery /// </summary> /// <param name="phrases">待搜索的短语集合</param> /// <param name="fieldNameAndBoosts">字段名称及权重集合</param> /// <param name="occur">搜索条件间的关系</param> /// <param name="asFilter">是否作为过滤器条件</param> /// <returns></returns> public LuceneSearchBuilder WithPhrases(Dictionary<string, BoostLevel> fieldNameAndBoosts, IEnumerable<string> phrases, BooleanClause.Occur occur, bool asFilter = false) { foreach (var fieldNameAndBoost in fieldNameAndBoosts) { BooleanQuery query = new BooleanQuery(); foreach (string phrase in phrases) { string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase); if (string.IsNullOrEmpty(filteredPhrase)) continue; if (filteredPhrase.Length == 1) { Term term = new Term(fieldNameAndBoost.Key, filteredPhrase); Query q = new PrefixQuery(term); SetBoost(q, fieldNameAndBoost.Value); query.Add(q, BooleanClause.Occur.SHOULD); continue; } string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase); PhraseQuery phraseQuery = new PhraseQuery(); foreach (var nameSegment in nameSegments) phraseQuery.Add(new Term(fieldNameAndBoost.Key, nameSegment)); phraseQuery.SetSlop(PhraseQuerySlop); SetBoost(phraseQuery, fieldNameAndBoost.Value); query.Add(phraseQuery, BooleanClause.Occur.SHOULD); } if (asFilter) filters.Add(new BooleanClause(query, occur)); else clauses.Add(new BooleanClause(query, occur)); } return this; }
public virtual void TestSlop1() { // Ensures slop of 1 works with terms in order. query.SetSlop(1); query.Add(new Term("field", "one")); query.Add(new Term("field", "two")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "in order"); QueryUtils.Check(query, searcher); // Ensures slop of 1 does not work for phrases out of order; // must be at least 2. query = new PhraseQuery(); query.SetSlop(1); query.Add(new Term("field", "two")); query.Add(new Term("field", "one")); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "reversed, slop not 2 or more"); QueryUtils.Check(query, searcher); }
public virtual void TestMulipleTerms() { query.SetSlop(2); query.Add(new Term("field", "one")); query.Add(new Term("field", "three")); query.Add(new Term("field", "five")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "two total moves"); QueryUtils.Check(query, searcher); query = new PhraseQuery(); query.SetSlop(5); // it takes six moves to match this phrase query.Add(new Term("field", "five")); query.Add(new Term("field", "three")); query.Add(new Term("field", "one")); hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "slop of 5 not close enough"); QueryUtils.Check(query, searcher); query.SetSlop(6); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "slop of 6 just right"); QueryUtils.Check(query, searcher); }
public virtual void TestSlopScoring() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc2); Document doc3 = new Document(); doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc3); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(directory); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "firstname")); query.Add(new Term("field", "lastname")); query.SetSlop(System.Int32.MaxValue); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // Make sure that those matches where the terms appear closer to // each other get a higher score: Assert.AreEqual(0.71, hits[0].Score, 0.01); Assert.AreEqual(0, hits[0].Doc); Assert.AreEqual(0.44, hits[1].Score, 0.01); Assert.AreEqual(1, hits[1].Doc); Assert.AreEqual(0.31, hits[2].Score, 0.01); Assert.AreEqual(2, hits[2].Doc); QueryUtils.Check(query, searcher); }
public virtual void TestNonExistingPhrase() { // phrase without repetitions that exists in 2 docs query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "found")); query.SetSlop(2); // would be found this way Hits hits = searcher.Search(query); Assert.AreEqual(2, hits.Length(), "phrase without repetitions exists in 2 docs"); QueryUtils.Check(query, searcher); // phrase with repetitions that exists in 2 docs query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.SetSlop(1); // would be found hits = searcher.Search(query); Assert.AreEqual(2, hits.Length(), "phrase with repetitions exists in two docs"); QueryUtils.Check(query, searcher); // phrase I with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "notexist")); query.Add(new Term("nonexist", "phrase")); query.SetSlop(1000); // would not be found no matter how high the slop is hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); // phrase II with repetitions that does not exist in any doc query = new PhraseQuery(); query.Add(new Term("nonexist", "phrase")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.Add(new Term("nonexist", "exist")); query.SetSlop(1000); // would not be found no matter how high the slop is hits = searcher.Search(query); Assert.AreEqual(0, hits.Length(), "nonexisting phrase with repetitions does not exist in any doc"); QueryUtils.Check(query, searcher); }
/// <exception cref="ParseException">throw in overridden method to disallow /// </exception> protected internal virtual Query GetFieldQuery(System.String field, System.String queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source = analyzer.TokenStream(field, new System.IO.StringReader(queryText)); System.Collections.ArrayList v = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); Lucene.Net.Analysis.Token t; int positionCount = 0; bool severalTokensAtSamePosition = false; while (true) { try { t = source.Next(); } catch (System.IO.IOException e) { t = null; } if (t == null) break; v.Add(t); if (t.GetPositionIncrement() != 0) positionCount += t.GetPositionIncrement(); else severalTokensAtSamePosition = true; } try { source.Close(); } catch (System.IO.IOException e) { // ignore } if (v.Count == 0) return null; else if (v.Count == 1) { t = (Lucene.Net.Analysis.Token) v[0]; return new TermQuery(new Term(field, t.TermText())); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = new BooleanQuery(true); for (int i = 0; i < v.Count; i++) { t = (Lucene.Net.Analysis.Token) v[i]; TermQuery currentQuery = new TermQuery(new Term(field, t.TermText())); q.Add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } else { // phrase query: MultiPhraseQuery mpq = new MultiPhraseQuery(); System.Collections.ArrayList multiTerms = new System.Collections.ArrayList(); for (int i = 0; i < v.Count; i++) { t = (Lucene.Net.Analysis.Token) v[i]; if (t.GetPositionIncrement() == 1 && multiTerms.Count > 0) { mpq.Add((Term[]) multiTerms.ToArray(typeof(Term))); multiTerms.Clear(); } multiTerms.Add(new Term(field, t.TermText())); } mpq.Add((Term[]) multiTerms.ToArray(typeof(Term))); return mpq; } } else { PhraseQuery q = new PhraseQuery(); q.SetSlop(phraseSlop); for (int i = 0; i < v.Count; i++) { q.Add(new Term(field, ((Lucene.Net.Analysis.Token) v[i]).TermText())); } return q; } } }
/// <summary> /// 搜索内容 /// </summary> /// <returns></returns> public ActionResult SearchContent() { if (Request["btnCreate"] != null) { return Redirect("/Search/GenrateSearchLibrary"); } string indexPath = ConfigurationManager.AppSettings["LuceneNetPath"]; string searchStr = Request["txtSearchContent"] ?? ""; if (searchStr == "") { return View("Index"); } //将用户搜索的词加入热词库 SearchDetailsBll.AddEntity(new SearchDetails() { Id = Guid.NewGuid(), KeyWords = searchStr, SearchDateTime = DateTime.Now }); List<string> stringList = Common.SearchHelper.ChangeStringToSegment(searchStr); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); foreach (string word in stringList)//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” { query.Add(new Term("content", word)); } //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. // query.Add(new Term("body", "大学生")); //query.Add(new Term("body", searchStr));//body中含有kw的文章 query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档的ID,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容. //可以用来实现分页功能 //this.listBox1.Items.Clear(); List<SearchResultViewModel> list = new List<SearchResultViewModel>(); for (int i = 0; i < docs.Length; i++) { // //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId);//找到文档id对应的文档详细信息 list.Add(new SearchResultViewModel() { Id = doc.Get("id"), Title = doc.Get("title"), Content = Common.SearchHelper.ChangeStringToHighLight(searchStr, doc.Get("content")) }); } ViewData["list"] = list; ViewData["searchContent"] = searchStr; return View("Index"); }