/// <summary> /// 搜索文档内容且精确匹配 /// </summary> /// <param name="text"></param> /// <returns></returns> public List <Doc> SearchDocumentContentAndNotSplitWord(string text) { List <Doc> result = new List <Doc>(); try { bool isExistIndex = IndexReader.IndexExists(this._fsDir); if (isExistIndex) { this._indexReader = IndexReader.Open(this._fsDir, false); IndexSearcher searcher = new IndexSearcher(this._indexReader); //搜索条件 BooleanQuery queryOr1 = new BooleanQuery(); PhraseQuery query1 = new PhraseQuery(); foreach (string word in SplitContent.SplitWords(text)) { query1.Add(new Term(DocStruct.CONTENT, word)); queryOr1.Add(query1, BooleanClause.Occur.MUST); } MultiSearcher multiSearch = new MultiSearcher(new[] { searcher }); //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(300, true); multiSearch.Search(queryOr1, collector); ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs.OrderByDescending(t => t.score).ToArray(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //根据文档id来获得文档对象Document var d = new Doc(); d.Id = doc.Get(DocStruct.ID); result.Add(d); } } } catch (Exception ex) { LogHelper.WriteError(this.GetType(), ex); } return(result); }
/// <summary> /// 搜索 /// </summary> /// <param name="text"></param> /// <returns></returns> public List <Doc> Search(string text) { List <Doc> result = new List <Doc>(); try { bool isExistIndex = IndexReader.IndexExists(this._fsDir); if (isExistIndex) { this._indexReader = IndexReader.Open(this._fsDir, false); IndexSearcher searcher = new IndexSearcher(this._indexReader); //搜索条件 BooleanQuery shouldQuery = new BooleanQuery(); BooleanQuery titleQuery = new BooleanQuery(); BooleanQuery contentQuery = new BooleanQuery(); BooleanQuery categoryQuery = new BooleanQuery(); //把用户输入的关键字进行分词 foreach (string word in SplitContent.SplitWords(text)) { PhraseQuery query1 = new PhraseQuery(); PhraseQuery query2 = new PhraseQuery(); PhraseQuery query3 = new PhraseQuery(); query1.Add(new Term(DocStruct.TITLE, word)); query2.Add(new Term(DocStruct.CONTENT, word)); query3.Add(new Term(DocStruct.CATEGORY, word)); // 引号括起来的词语,必须出现 if (Regex.IsMatch(text, "^\".*\"$")) { titleQuery.Add(query1, BooleanClause.Occur.MUST); contentQuery.Add(query2, BooleanClause.Occur.MUST); categoryQuery.Add(query3, BooleanClause.Occur.MUST); } else { titleQuery.Add(query1, BooleanClause.Occur.SHOULD); contentQuery.Add(query2, BooleanClause.Occur.SHOULD); categoryQuery.Add(query3, BooleanClause.Occur.SHOULD); } } shouldQuery.Add(titleQuery, BooleanClause.Occur.SHOULD); shouldQuery.Add(contentQuery, BooleanClause.Occur.SHOULD); shouldQuery.Add(categoryQuery, BooleanClause.Occur.SHOULD); MultiSearcher multiSearch = new MultiSearcher(new[] { searcher }); //TopScoreDocCollector盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(300, true); multiSearch.Search(shouldQuery, collector); //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs.OrderByDescending(t => t.score).ToArray(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //根据文档id来获得文档对象Document var d = new Doc(); d.Id = doc.Get(DocStruct.ID); result.Add(d); } //LogHelper.WriteInfo(this.GetType(), string.Format("Searched results count:{0}", docs.Length)); } } catch (Exception ex) { LogHelper.WriteError(this.GetType(), ex); } return(result); }
public List <CodeIndex> Search(string text, string language, int size = 300) { List <CodeIndex> result = new List <CodeIndex>(); try { bool isExistIndex = this._codeIndexLucene.IndexExists(); if (isExistIndex && !text.IsNullOrEmpty()) { IndexSearcher searcher = new IndexSearcher(this._codeIndexLucene.OpenReader()); BooleanQuery allQuery = new BooleanQuery(); allQuery.Add(new TermQuery(new Term(IndexField.Language, language)), BooleanClause.Occur.MUST); //搜索条件 BooleanQuery shouldQuery = new BooleanQuery(); //把用户输入的关键字进行分词 foreach (string word in SplitContent.SplitWords(text)) { PhraseQuery query1 = new PhraseQuery(); query1.Add(new Term(IndexField.SearchText, word)); shouldQuery.Add(query1, BooleanClause.Occur.SHOULD); } allQuery.Add(shouldQuery, BooleanClause.Occur.MUST); MultiSearcher multiSearch = new MultiSearcher(new[] { searcher }); TopScoreDocCollector collector = TopScoreDocCollector.create(size, true); multiSearch.Search(allQuery, collector); ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs.OrderByDescending(t => t.score).ToArray(); for (int i = 0; i < docs.Length; i++) { int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //根据文档id来获得文档对象Document var m = new CodeIndex { Id = doc.Get(IndexField.Id), SearchText = doc.Get(IndexField.SearchText), CodeBody = doc.Get(IndexField.CodeBody), UserId = doc.Get(IndexField.UserId), Language = doc.Get(IndexField.Language) }; var codeDoc = this._codeFileLucene.GetDoc(m.FileId); if (codeDoc != null) { m.FileContent = codeDoc.Get(nameof(CodeFile.FileContent)); } result.Add(m); // 当完全匹配时,只返回此条 if (text != null && m.SearchText != null && m.SearchText.Trim().ToLower() == text.Trim().ToLower()) { break; } } } } catch (Exception ex) { LogHelper.WriteError(this.GetType(), ex); } return(result); }