public virtual void TestSpanNearVersusPhrase() { Term t1 = RandomTerm(); Term t2 = RandomTerm(); SpanQuery[] subquery = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; SpanNearQuery q1 = new SpanNearQuery(subquery, 0, true); PhraseQuery q2 = new PhraseQuery(); q2.Add(t1); q2.Add(t2); AssertSameSet(q1, q2); }
static Query ExecuteAnalyzer(Analyzer analyzer, string field, string text) { TokenStream tokenStream = analyzer.TokenStream(field, new StringReader(text)); ITermAttribute termAttribute = tokenStream.AddAttribute <ITermAttribute>(); IPositionIncrementAttribute positionIncrementAttribute = tokenStream.AddAttribute <IPositionIncrementAttribute>(); List <List <Term> > terms = new List <List <Term> >(); List <Term> current = null; while (tokenStream.IncrementToken()) { if (positionIncrementAttribute.PositionIncrement > 0) { current = new List <Term>(); terms.Add(current); } if (current != null) { current.Add(new Term(field, termAttribute.Term)); } } if (terms.Count == 1 && terms[0].Count == 1) { return(new TermQuery(terms[0][0])); } else if (terms.Select(l => l.Count).Sum() == terms.Count) { PhraseQuery phraseQuery = new PhraseQuery(); foreach (var positionList in terms) { phraseQuery.Add(positionList[0]); } return(phraseQuery); } else { MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); foreach (var positionList in terms) { multiPhraseQuery.Add(positionList.ToArray()); } return(multiPhraseQuery); } }
public override Query VisitPhraseQuery(PhraseQuery phraseq) { var terms = phraseq.GetTerms(); var field = terms[0].Field(); _text.Append(field); _text.Append(":\""); var positions = new int[terms.Length]; for (var i = 0; i < positions.Length; i++) positions[i] = i; var pieces = new string[terms.Length]; for (var i = 0; i < terms.Length; i++) { var pos = positions[i]; var s = pieces[pos]; if (s == null) s = (terms[i]).Text(); else s += "|" + (terms[i]).Text(); pieces[pos] = s; } for (var i = 0; i < pieces.Length; i++) { if (i > 0) _text.Append(' '); var s = pieces[i]; if (s == null) _text.Append('?'); else _text.Append(s); } _text.Append("\""); var slop = phraseq.GetSlop(); if (slop != 0) { _text.Append("~"); _text.Append(slop); } _text.Append(BoostToString(phraseq.GetBoost())); return base.VisitPhraseQuery(phraseq); }
public virtual void TestORPhrase() { PhraseQuery phrase1 = new PhraseQuery(); phrase1.Add(new Term("field", "foo")); phrase1.Add(new Term("field", "bar")); PhraseQuery phrase2 = new PhraseQuery(); phrase2.Add(new Term("field", "star")); phrase2.Add(new Term("field", "wars")); BooleanQuery expected = new BooleanQuery(); expected.Add(phrase1, BooleanClause.Occur.SHOULD); expected.Add(phrase2, BooleanClause.Occur.SHOULD); assertEquals(expected, Parse("\"foo bar\"|\"star wars\"")); }
private QueryProvider <TLuceneEntity> _Phrase <TResult>(string value, Expression <Func <TLuceneEntity, TResult> > selector, Occur occur) { TrimTerm(ref value); if (IsNull(ref value)) { return(this); } var q = new PhraseQuery(); q.Add(new Term(selector.GetName(), value)); _current.Add(q, occur); return(this); }
public static Query BuildExactFieldValueClause(Index index, string fieldName, string fieldValue) { Assert.ArgumentNotNull(index, "Index"); if (string.IsNullOrEmpty(fieldName) || string.IsNullOrEmpty(fieldValue)) { return(null); } fieldValue = IdHelper.ProcessGUIDs(fieldValue); var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(fieldName.ToLowerInvariant(), fieldValue.ToLowerInvariant())); return(phraseQuery); }
public virtual void TestANDPhrase() { PhraseQuery phrase1 = new PhraseQuery(); phrase1.Add(new Term("field", "foo")); phrase1.Add(new Term("field", "bar")); PhraseQuery phrase2 = new PhraseQuery(); phrase2.Add(new Term("field", "star")); phrase2.Add(new Term("field", "wars")); BooleanQuery expected = new BooleanQuery(); expected.Add(phrase1, Occur.MUST); expected.Add(phrase2, Occur.MUST); assertEquals(expected, Parse("\"foo bar\"+\"star wars\"")); }
private List <Models.ViewSearchContentModel> SearchBookContent() { var indexPath = @"C:\Users\Victor\Desktop\LuceneNetDir"; var kw = Common.WebCommon.GetPanGuWord(Request["txtContent"]); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); foreach (var word in kw)//先用空格,让用户去分词,空格分隔的就是词"计算机 专业" { query.Add(new Term("body", word)); } //WITH_POSITIONS_OFFSETS -> 盘古分词在建立索引的时候已经将干扰词间距记录,lucene.net只要匹配就可以了 query.Slop = 100;//多个查询条件词之间的最大距离,在文章中相隔太远也就无意义(例如 “大学生”这个查询条件和“简历”这个查询条件之间如果间隔的词太多也就没有意义了。) //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); searcher.Search(query, null, collector); //根据query查询条件进行查询,查询结果放入cllector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.TotalHits).ScoreDocs; //得到所有查询结果中的文档,collector.TotalHits:表示总条数;TopDocs(300,20):表示得到300(从300开始),到320(结束)的文档内容 var list = new List <Models.ViewSearchContentModel>(); //可以用来实现分页内容 for (int i = 0; i < docs.Length; i++) { //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存的压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象document int docId = docs[i].Doc; //得到查询结果文档的id(Lucene内容分配的id) Document doc = searcher.Doc(docId); //找到文档id对应的文档详细信息 list.Add(new Models.ViewSearchContentModel { Id = doc.Get("Id"), Title = doc.Get("Title"), Content = Common.WebCommon.CreateHighLight(Request["txtContent"], doc.Get("Content"))//搜索内容关键字高亮显示 }); } SearchDetailsBll.AddEntity(new SearchDetails { Id = Guid.NewGuid(), KeyWords = Request["txtContent"], SearchDateTime = DateTime.Now, }); return(list); }
public Query CreateQuery(ILuceneQueryService builder, LuceneQueryContext context, string type, JObject query) { if (type != "match_phrase") { return(null); } var first = query.Properties().First(); var phraseQuery = new PhraseQuery(); JToken value; switch (first.Value.Type) { case JTokenType.String: value = first.Value; break; case JTokenType.Object: var obj = (JObject)first.Value; if (!obj.TryGetValue("value", out value)) { throw new ArgumentException("Missing value in match phrase query"); } // TODO: read "analyzer" property if (obj.TryGetValue("slop", out var slop)) { phraseQuery.Slop = slop.Value <int>(); } break; default: throw new ArgumentException("Invalid wildcard query"); } foreach (var term in LuceneQueryService.Tokenize(first.Name, value.Value <string>(), context.DefaultAnalyzer)) { phraseQuery.Add(new Term(first.Name, term)); } return(phraseQuery); }
/// <summary> /// Create the quoted query. /// </summary> /// <param name="text">The quoted text.</param> /// <param name="searchFieldName">The name of the field to search.</param> /// <returns>The query result.</returns> private PhraseQuery CreateQuotedQuery(string text, string searchFieldName) { // Create the query. PhraseQuery query = new PhraseQuery(); query.Slop = 2; // Quoted search exact phase. string[] words = text.Words(); for (int i = 0; i < words.Length; i++) { // Add the query. query.Add(new Term(searchFieldName, words[i].Trim().ToLower())); } // Return the query. return(query); }
/// <summary> /// Match a multi-word phrase exactly. (This is like how QueryParser handles quoted phrases) /// </summary> /// <param name="field"></param> /// <param name="phrase"></param> /// <param name="slop"></param> /// <returns></returns> public QueryBuilder MatchPhrase(string field, string phrase, int slop = 0) { if (string.IsNullOrWhiteSpace(phrase)) { return(this); } var query = new PhraseQuery(); foreach (var token in _analyzer.TokenListFromString(phrase)) { query.Add(new Term(field, token)); } query.SetSlop(slop); return(AddSubQuery(query)); }
public void TestSmallerFragSizeThanPhraseQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(F, "abcdefgh")); phraseQuery.Add(new Term(F, "jklmnopqrs")); FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "abcdefgh jklmnopqrs"), sflb.minFragCharSize); assertEquals(1, ffl.FragInfos.size()); if (VERBOSE) { Console.WriteLine(ffl.FragInfos[0].toString()); } assertEquals("subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.FragInfos[0].toString()); }
public void TestSmallerFragSizeThanPhraseQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(F, "abcdefgh")); phraseQuery.Add(new Term(F, "jklmnopqrs")); FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "abcdefgh jklmnopqrs"), sflb.minFragCharSize); assertEquals(1, ffl.FragInfos.size()); if (Verbose) { Console.WriteLine(ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware } assertEquals("subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.FragInfos[0].ToString(CultureInfo.InvariantCulture)); // LUCENENET specific: use invariant culture, since we are culture-aware }
public virtual void TestDemo() { Analyzer analyzer = new MockAnalyzer(Random()); // Store the index in memory: using (Directory directory = NewDirectory()) { string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; string text = "this is the text to be indexed. " + longTerm; // To store an index on disk, use this instead: // Directory directory = FSDirectory.open(new File("/tmp/testindex")); using (RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, analyzer))) { Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("fieldname", text, Field.Store.YES)); iwriter.AddDocument(doc); } // Now search the index: using (IndexReader ireader = DirectoryReader.Open(directory)) // read-only=true { IndexSearcher isearcher = NewSearcher(ireader); Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.Search(query, null, 1); Assert.AreEqual(1, hits.TotalHits); // Iterate through the results: for (int i = 0; i < hits.ScoreDocs.Length; i++) { Documents.Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc); Assert.AreEqual(text, hitDoc.Get("fieldname")); } // Test simple phrase query PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term("fieldname", "to")); phraseQuery.Add(new Term("fieldname", "be")); Assert.AreEqual(1, isearcher.Search(phraseQuery, null, 1).TotalHits); } } }
/// <summary> /// 获取搜索结果 /// </summary> protected void btnGetSearchResult_Click(object sender, EventArgs e) { string keyword = txtKeyWords.Text; string indexPath = Context.Server.MapPath("~/Index"); // 索引文档保存位置 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); // 查询条件 PhraseQuery query = new PhraseQuery(); // 等同于 where contains("msg",kw) query.Add(new Term("msg", keyword)); // 两个词的距离大于100(经验值)就不放入搜索结果,因为距离太远相关度就不高了 query.SetSlop(100); // TopScoreDocCollector:盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); // 使用query这个查询条件进行搜索,搜索结果放入collector searcher.Search(query, null, collector); // 从查询结果中取出第m条到第n条的数据 // collector.GetTotalHits()表示总的结果条数 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; // 遍历查询结果 IList <SearchResult> resultList = new List <SearchResult>(); for (int i = 0; i < docs.Length; i++) { // 拿到文档的id,因为Document可能非常占内存(DataSet和DataReader的区别) int docId = docs[i].doc; // 所以查询结果中只有id,具体内容需要二次查询 // 根据id查询内容:放进去的是Document,查出来的还是Document Document doc = searcher.Doc(docId); SearchResult result = new SearchResult(); result.Id = Convert.ToInt32(doc.Get("id")); result.Msg = HighlightHelper.HighLight(keyword, doc.Get("msg")); resultList.Add(result); } // 绑定到Repeater rptSearchResult.DataSource = resultList; rptSearchResult.DataBind(); }
public static List <Record> SearchFromIndex(string searchKey) { string indexPath = HttpContext.Current.Server.MapPath("~/IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 BooleanQuery bQuery = new BooleanQuery(); PhraseQuery tQuery = new PhraseQuery(); PhraseQuery cQuery = new PhraseQuery(); //把用户输入的关键字进行分词 foreach (string word in SplitWords(searchKey)) { tQuery.Add(new Term("title", word)); cQuery.Add(new Term("content", word)); } cQuery.Slop = 100; //指定关键词相隔最大距离 bQuery.Add(tQuery, Occur.SHOULD); bQuery.Add(cQuery, Occur.SHOULD); //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); searcher.Search(bQuery, null, collector); //根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0, collector.TotalHits).ScoreDocs; //展示数据实体对象集合 List <Record> records = new List <Record>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].Doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //根据文档id来获得文档对象Document var record = new Record(); record.Title = doc.Get("title"); record.Summary = doc.Get("summary"); record.ID = Guid.Parse(doc.Get("id")); records.Add(record); } return(records); }
private IList <Query> BuildQueries() { IList <Query> queries = new List <Query>(); BooleanQuery booleanAB = new BooleanQuery(); booleanAB.Add(new TermQuery(new Term("contents", "a")), Occur.SHOULD); booleanAB.Add(new TermQuery(new Term("contents", "b")), Occur.SHOULD); queries.Add(booleanAB); PhraseQuery phraseAB = new PhraseQuery(); phraseAB.Add(new Term("contents", "a")); phraseAB.Add(new Term("contents", "b")); queries.Add(phraseAB); PhraseQuery phraseABC = new PhraseQuery(); phraseABC.Add(new Term("contents", "a")); phraseABC.Add(new Term("contents", "b")); phraseABC.Add(new Term("contents", "c")); queries.Add(phraseABC); BooleanQuery booleanAC = new BooleanQuery(); booleanAC.Add(new TermQuery(new Term("contents", "a")), Occur.SHOULD); booleanAC.Add(new TermQuery(new Term("contents", "c")), Occur.SHOULD); queries.Add(booleanAC); PhraseQuery phraseAC = new PhraseQuery(); phraseAC.Add(new Term("contents", "a")); phraseAC.Add(new Term("contents", "c")); queries.Add(phraseAC); PhraseQuery phraseACE = new PhraseQuery(); phraseACE.Add(new Term("contents", "a")); phraseACE.Add(new Term("contents", "c")); phraseACE.Add(new Term("contents", "e")); queries.Add(phraseACE); return(queries); }
// This is a simplified query builder which works for single Terms and single Phrases // Returns null, TermQuery, or PhraseQuery public static Lucene.Net.Search.Query GetFieldQuery(Analyzer analyzer, string field, string queryText) { TokenStream stream = analyzer.TokenStream(field, new StringReader(queryText)); TokenFilter filter = new CachingTokenFilter(stream); filter.Reset(); // This attribute way of getting token properties sucks, but it's the non-obsolete one. var attr1 = (TermAttribute)filter.GetAttribute(typeof(TermAttribute)); var attr2 = (PositionIncrementAttribute)filter.GetAttribute(typeof(PositionIncrementAttribute)); Func <string> getText = () => attr1 != null?attr1.Term() : null; Func <int> getPositionIncrement = () => attr2 != null?attr2.GetPositionIncrement() : 1; // 0 tokens if (!filter.IncrementToken()) { return(new BooleanQuery()); } // 1 token? string token1 = getText(); int position = 0; if (!filter.IncrementToken()) { return(new TermQuery(new Term(field, token1))); } // many tokens - handle first token PhraseQuery ret = new PhraseQuery(); ret.Add(new Term(field, token1)); do { // handle rest of tokens string tokenNext = getText(); position += getPositionIncrement(); ret.Add(new Term(field, tokenNext), position); }while (filter.IncrementToken()); return(ret); }
/// <summary> /// 从索引库中检索关键字 /// </summary> private void SearchFromIndexData() { string indexPath = Context.Server.MapPath("~/IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach (string word in Common.SplitContent.SplitWords(Request.QueryString["SearchKey"])) { query.Add(new Term("content", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //展示数据实体对象集合 List <PZYM.Shop.Model.Books> bookResult = new List <PZYM.Shop.Model.Books>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //根据文档id来获得文档对象Document PZYM.Shop.Model.Books book = new PZYM.Shop.Model.Books(); book.Title = doc.Get("title"); //book.ContentDescription = doc.Get("content");//未使用高亮 //搜索关键字高亮显示 使用盘古提供高亮插件 book.ContentDescription = Common.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content")); book.Id = Convert.ToInt32(doc.Get("id")); bookResult.Add(book); } Repeater1.DataSource = bookResult; Repeater1.DataBind(); }
public static List <ViewModelContent> ShowSearchContent(HttpRequestBase Request, string msg) { string indexPath = ConfigurationManager.AppSettings["lucenedirPath"]; List <string> list = Common.WebCommon.PanGuSplitWord(msg);//对用户输入的搜索条件进行拆分。 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); foreach (string word in list)//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” { query.Add(new Term("Title", word)); } //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. // query.Add(new Term("body", "大学生")); // query.Add(new Term("body", kw));//body中含有kw的文章 query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector); //根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容. //可以用来实现分页功能 List <Models.ViewModelContent> viewModelList = new List <Models.ViewModelContent>(); for (int i = 0; i < docs.Length; i++) { // //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. Models.ViewModelContent viewModel = new Models.ViewModelContent(); int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //找到文档id对应的文档详细信息 viewModel.Id = Convert.ToInt32(doc.Get("Id")); // 取出放进字段的值 viewModel.Title = doc.Get("Title"); // viewModel.Content = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Content"));//将搜索的关键字高亮显示。 viewModelList.Add(viewModel); } //删除汇总表中所有的数据 //再将搜索的词插入到明细表中 return(viewModelList); }
public Query ProcessQuery(string QueryString, bool PreProcess) { BooleanQuery FinalQuery = new BooleanQuery(); InitializeMultiFieldQueryParser(PreProcess); if (PreProcess) { //Extract all phrases List <string> PhraseList = _PosTaggerLexicalParser.FindPhrases(QueryString); QueryString = _PosTaggerLexicalParser.Parse(QueryString); foreach (string phrase in PhraseList) { PhraseQuery abstractPhraseQuery = new PhraseQuery(); PhraseQuery titlePhraseQuery = new PhraseQuery(); abstractPhraseQuery.Add(new Term(SEDocument.ABSTRACT_FN, phrase)); titlePhraseQuery.Add(new Term(SEDocument.TITLE_FN, phrase)); abstractPhraseQuery.Boost = 1.2F; abstractPhraseQuery.Slop = 3; FinalQuery.Add(abstractPhraseQuery, Occur.SHOULD); titlePhraseQuery.Boost = 4.0F; titlePhraseQuery.Slop = 3; FinalQuery.Add(titlePhraseQuery, Occur.SHOULD); } } else { FinalQuery.Add(_MultiFieldQueryParser.Parse(QueryString), Occur.SHOULD); } string[] tokens = _PosTaggerLexicalParser.TokeniseString(QueryString.Replace('\"', ' ').Replace('[', ' ').Replace(']', ' ')); foreach (string term in tokens) { FinalQuery.Add(_MultiFieldQueryParser.Parse(term.Replace("~", "") + "~"), Occur.SHOULD); } FinalQuery.MinimumNumberShouldMatch = 2; return(FinalQuery); }
public void SearchFromIndexData() { string indexPath = System.Web.HttpContext.Current.Server.MapPath("~/IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach (string word in WitKeyDu.Site.Web.SplitContent.SplitWords(Request.QueryString["SearchKey"])) { query.Add(new Term("ForumContent", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //展示数据实体对象集合 List <Forum> ForumResult = new List <Forum>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //根据文档id来获得文档对象Document Forum forum = new Forum(); forum.ForumName = doc.Get("ForumName"); //book.Title = doc.Get("title"); ////book.ContentDescription = doc.Get("content");//未使用高亮 ////搜索关键字高亮显示 使用盘古提供高亮插件 forum.ForumContent = WitKeyDu.Site.Web.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("ForumContent")); forum.ForumTypeID = Convert.ToInt32(doc.Get("ID")); ForumResult.Add(forum); } }
public virtual void TestDemo() { Analyzer analyzer = new MockAnalyzer(random()); // Store the index in memory: Directory directory = newDirectory(); // To store an index on disk, use this instead: // Directory directory = FSDirectory.open(new File("/tmp/testindex")); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, analyzer); Document doc = new Document(); string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; string text = "this is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = newSearcher(ireader); Assert.AreEqual(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, null, 1); Assert.AreEqual(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); Assert.AreEqual(text, hitDoc.get("fieldname")); } // Test simple phrase query PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.add(new Term("fieldname", "to")); phraseQuery.add(new Term("fieldname", "be")); Assert.AreEqual(1, isearcher.search(phraseQuery, null, 1).totalHits); ireader.close(); directory.close(); }
public override Query VisitPhraseQuery(PhraseQuery phraseq) { var terms = phraseq.GetTerms(); PhraseQuery newQuery = null; int index = 0; int count = terms.Length; while (index < count) { var visitedTerm = VisitTerm(terms[index]); if (newQuery != null) { if (visitedTerm != null) { newQuery.Add(visitedTerm); } } else if (visitedTerm != terms[index]) { newQuery = new PhraseQuery(); for (int i = 0; i < index; i++) { newQuery.Add(terms[i]); } if (visitedTerm != null) { newQuery.Add(visitedTerm); } } index++; } if (newQuery != null) { if (newQuery.GetTerms().Length > 0) { return(newQuery); } return(null); } return(phraseq); }
/// <summary> /// 全文搜索 /// </summary> /// <param name="keyword"></param> /// <param name="startRowIndex"></param> /// <param name="pageSize"></param> /// <param name="totalCount"></param> /// <returns></returns> public static List <SearchResult> DoSearch(string keyword, int startRowIndex, int pageSize, out int totalCount) { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); //IndexSearcher是进行搜索的类 var searcher = new IndexSearcher(reader); var query = new PhraseQuery(); foreach (string word in GetKeyWords(keyword)) { query.Add(new Term("body", word)); } query.SetSlop(100); //相聚100以内才算是查询到 TopScoreDocCollector collector = TopScoreDocCollector.create(1024, true); //最大1024条记录 searcher.Search(query, null, collector); totalCount = collector.GetTotalHits(); //返回总条数 ScoreDoc[] docs = collector.TopDocs(startRowIndex, pageSize).scoreDocs; //分页,下标应该从0开始吧,0是第一条记录 var list = new List <SearchResult>(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc; //取文档的编号,这个是主键,lucene.net分配 //检索结果中只有文档的id,如果要取Document,则需要Doc再去取 //降低内容占用 Document doc = searcher.Doc(docId); string number = doc.Get("number"); string title = doc.Get("title"); string fullPath = doc.Get("fullPath"); string body = doc.Get("body"); var searchResult = new SearchResult { Number = number, Title = title, FullPath = fullPath, BodyPreview = Preview(body, keyword) }; list.Add(searchResult); } return(list); }
/// <summary> /// 批量添加PhraseQuery /// </summary> /// <param name="phrase">待搜索的短语</param> /// <param name="fieldNameAndBoosts">字段名称及权重集合</param> /// <param name="occur">搜索条件间的关系</param> /// <param name="asFilter">是否作为过滤器条件</param> /// <returns></returns> public LuceneSearchBuilder WithPhrases(Dictionary <string, BoostLevel> fieldNameAndBoosts, string phrase, BooleanClause.Occur occur, bool asFilter = false) { string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase); if (string.IsNullOrEmpty(filteredPhrase)) { return(this); } string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase); if (nameSegments.Length == 1) { return(WithFields(fieldNameAndBoosts, nameSegments[0], false, occur, asFilter)); } else { BooleanQuery query = new BooleanQuery(); foreach (var fieldNameAndBoost in fieldNameAndBoosts) { PhraseQuery phraseQuery = new PhraseQuery(); foreach (var nameSegment in nameSegments) { phraseQuery.Add(new Term(fieldNameAndBoost.Key, nameSegment)); } phraseQuery.SetSlop(PhraseQuerySlop); SetBoost(phraseQuery, fieldNameAndBoost.Value); query.Add(phraseQuery, occur); } if (asFilter) { filters.Add(new BooleanClause(query, BooleanClause.Occur.MUST)); } else { clauses.Add(new BooleanClause(query, BooleanClause.Occur.MUST)); } return(this); } }
private void SearchFromIndexData(string searchkey) { string indexPath = Context.Server.MapPath("~/IndexData"); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); //把用户输入的关键字进行分词 foreach (string word in SplitContent.SplitWords(searchkey)) { query.Add(new Term("TITLE", word)); } //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系 query.SetSlop(100); //指定关键词相隔最大距离 //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector); //根据query查询条件进行查询,查询结果放入collector容器 //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //collector.GetTotalHits() //展示数据实体对象集合 for (int i = 0; i < docs.Length; i++) { int docID = docs[i].doc; //得到查询结果文档的ID(Lucene内部分配的ID) Document doc = searcher.Doc(docID); //根据文档ID来获得文档对象Document SUC_NEWS mod = new SUC_NEWS(); mod.TITLE = SplitContent.HightLight(searchkey, doc.Get("TITLE")); mod.TITLE = string.IsNullOrEmpty(mod.TITLE) ? doc.Get("TITLE") : mod.TITLE; //book.ContentDESCRPTION = doc.Get("content");//未使用高亮 //搜索关键字高亮显示 使用盘古提供高亮插件 mod.CONTENT = SplitContent.HightLight(searchkey, doc.Get("CONTENT")); mod.CONTENT = string.IsNullOrEmpty(mod.CONTENT) ? doc.Get("CONTENT") : mod.CONTENT; mod.CONTENT = mod.CONTENT.Replace("<b>", ""); mod.ID = Convert.ToInt32(doc.Get("ID")); mod.pandaWebUrl = doc.Get("URL"); modResult.Add(mod); } }
public void flatten(Query sourceQuery, Dictionary<Query,Query> flatQueries) { if (sourceQuery is BooleanQuery) { BooleanQuery bq = (BooleanQuery)sourceQuery; foreach (BooleanClause clause in bq.GetClauses()) { if (!clause.IsProhibited()) flatten(clause.GetQuery(), flatQueries); } } else if (sourceQuery is DisjunctionMaxQuery) { DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery; System.Collections.IEnumerator en = dmq.Iterator(); while (en.MoveNext()) { Query query = (Query)en.Current; flatten(query, flatQueries); } } else if (sourceQuery is TermQuery) { if (!flatQueries.ContainsKey(sourceQuery)) flatQueries.Add(sourceQuery, sourceQuery); } else if (sourceQuery is PhraseQuery) { if (!flatQueries.ContainsKey(sourceQuery)) { PhraseQuery pq = (PhraseQuery)sourceQuery; if (pq.GetTerms().Length > 1) flatQueries.Add(pq, pq); else if (pq.GetTerms().Length == 1) { Query q = new TermQuery(pq.GetTerms()[0]); flatQueries.Add(q, q); } } } // else discard queries }
//搜索包含关键词 public static List <JobSerach> SearchContent(string kw, int index, int skipCount) { //string indexPath = lucenePath;//最好将该项放在配置文件中。 kw = kw.ToLower(); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(lucenePath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); foreach (string word in SplitWord(kw))//将用户输入的搜索内容进行了盘古分词、 { query.Add(new Term("Title", word)); //query.Add(new Term("Content", word)); //query.Add(new Term("MaiDian", word)); } query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector); //根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(index - 1, skipCount).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容. //可以用来实现分页功能 List <JobSerach> list = new List <JobSerach>(); for (int i = 0; i < docs.Length; i++) { // //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //找到文档id对应的文档详细信息 JobSerach result = new JobSerach(); result.Title = Highlight(kw, doc.Get("Title")); result.Id = Convert.ToInt32(doc.Get("Id")); result.ImageAddress = doc.Get("ImageAddress"); result.MaiDian = doc.Get("MaiDian"); result.Price = double.Parse(doc.Get("Price")); result.Content = doc.Get("Content"); list.Add(result); } return(list); }
private Query clause(String field, float boost, params String[] terms) { Query q; if (terms.Length == 1) { q = new TermQuery(new Term(field, terms[0])); } else { PhraseQuery pq = new PhraseQuery(); foreach (String term in terms) { pq.Add(new Term(field, term)); } q = pq; } q.Boost = (boost); return(q); }
public void TestPhraseQuery() { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(F, "a")); phraseQuery.Add(new Term(F, "b")); FieldFragList ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "c d e"), 20); assertEquals(0, ffl.FragInfos.size()); ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "a c b"), 20); assertEquals(0, ffl.FragInfos.size()); ffl = sflb.CreateFieldFragList(fpl(phraseQuery, "a b c"), 20); assertEquals(1, ffl.FragInfos.size()); assertEquals("subInfos=(ab((0,3)))/1.0(0,20)", ffl.FragInfos[0].toString()); }
public virtual void TestWithPendingDeletes3() { // main directory Directory dir = NewDirectory(); // auxiliary directory Directory aux = NewDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND)); // Adds 10 docs, then replaces them with another 10 // docs, so 10 pending deletes: for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + (i % 10), Field.Store.NO)); doc.Add(NewTextField("content", "bbb " + i, Field.Store.NO)); writer.UpdateDocument(new Term("id", "" + (i % 10)), doc); } // Deletes one of the 10 added docs, leaving 9: PhraseQuery q = new PhraseQuery(); q.Add(new Term("content", "bbb")); q.Add(new Term("content", "14")); writer.DeleteDocuments(q); writer.AddIndexes(aux); writer.ForceMerge(1); writer.Commit(); VerifyNumDocs(dir, 1039); VerifyTermDocs(dir, new Term("content", "aaa"), 1030); VerifyTermDocs(dir, new Term("content", "bbb"), 9); writer.Dispose(); dir.Dispose(); aux.Dispose(); }
public void TestGetBestFragmentsFilteredPhraseQuery() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; var rf = new TermRangeFilter("contents", "john", "john", true, true); var pq = new PhraseQuery(); pq.Add(new Term("contents", "john")); pq.Add(new Term("contents", "kennedy")); var fq = new FilteredQuery(pq, rf); DoSearching(fq); helper.DoStandardHighlights(analyzer, searcher, hits, query, this); // Currently highlights "John" and "Kennedy" separately Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
protected internal virtual void SmokeTestSearcher(IndexSearcher s) { RunQuery(s, new TermQuery(new Term("body", "united"))); RunQuery(s, new TermQuery(new Term("titleTokenized", "states"))); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("body", "united")); pq.Add(new Term("body", "states")); RunQuery(s, pq); }
public virtual void TestPositionIncrementMultiFields() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); writer.AddDocument(MakeDocumentWithFields()); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); PhraseQuery query = new PhraseQuery(); query.Add(new Term("indexed_not_tokenized", "test1")); query.Add(new Term("indexed_not_tokenized", "test2")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); DoAssert(searcher.Doc(hits[0].Doc), true); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestCJKSloppyPhrase() { // individual CJK chars as terms SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(this); PhraseQuery expected = new PhraseQuery(); expected.Slop = 3; expected.Add(new Term("field", "中")); expected.Add(new Term("field", "国")); QueryBuilder builder = new QueryBuilder(analyzer); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国", 3)); }
public virtual void TestPhraseQueryPositionIncrements() { PhraseQuery expected = new PhraseQuery(); expected.Add(new Term("field", "1")); expected.Add(new Term("field", "2"), 2); CharacterRunAutomaton stopList = new CharacterRunAutomaton((new RegExp("[sS][tT][oO][pP]")).ToAutomaton()); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopList); QueryBuilder builder = new QueryBuilder(analyzer); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "1 stop 2")); }
public virtual void TestPhrase() { PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "seventy")); query.Add(new Term("field", "seven")); CheckHits(query, new int[] { 77, 177, 277, 377, 477, 577, 677, 777, 877, 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, 1777, 1877, 1977 }); }
public virtual void TestPhrase2() { PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "seventish")); query.Add(new Term("field", "sevenon")); CheckHits(query, new int[] { }); }
/* * This shows how to construct a phrase query containing shingles. */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testShingleAnalyzerWrapperPhraseQuery() throws Exception public virtual void testShingleAnalyzerWrapperPhraseQuery() { PhraseQuery q = new PhraseQuery(); TokenStream ts = analyzer.tokenStream("content", "this sentence"); try { int j = -1; PositionIncrementAttribute posIncrAtt = ts.addAttribute(typeof(PositionIncrementAttribute)); CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute)); ts.reset(); while (ts.incrementToken()) { j += posIncrAtt.PositionIncrement; string termText = termAtt.ToString(); q.add(new Term("content", termText), j); } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; int[] ranks = new int[] {0}; compareRanks(hits, ranks); }
/// <summary> /// Adds a standard type clause to this instance /// </summary> /// <param name="term">Term to add to this query.</param> /// <param name="occurrence">Defines how the term is added to this query.</param> /// <param name="slop">The amount of allowed slop in a phrase query.</param> /// <remarks> /// Slop is the amount of movement each word is allowed in a non-exact phrase query. /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then /// only results with that term is allowed. If you set the slop to 2 then two movements can be /// made, max, for each word. In the same example with slop set to 2 results would be returned /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated", /// and "Systems Incorporated Adobe". /// </remarks> public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop) { if (term == null) throw new ArgumentNullException("term", "term cannot be null"); IncrementTotalClauses(1); if (term.IsPhrase) { PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(term.GetLuceneTerm()); phraseQuery.SetSlop(slop); phraseQuery.SetBoost(term.Boost); this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence)); phraseQuery = null; } else { TermQuery termQuery = new TermQuery(term.GetLuceneTerm()); termQuery.SetBoost(term.Boost); this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence)); termQuery = null; } }