public virtual void TestSpanNearVersusPhrase() { Term t1 = RandomTerm(); Term t2 = RandomTerm(); SpanQuery[] subquery = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; SpanNearQuery q1 = new SpanNearQuery(subquery, 0, true); PhraseQuery q2 = new PhraseQuery(); q2.Add(t1); q2.Add(t2); AssertSameSet(q1, q2); }
public void TestGetBestFragmentsFilteredPhraseQuery() { var helper = new TestHighlightRunner(); helper.TestAction = () => { numHighlights = 0; var rf = new TermRangeFilter("contents", "john", "john", true, true); var pq = new PhraseQuery(); pq.Add(new Term("contents", "john")); pq.Add(new Term("contents", "kennedy")); var fq = new FilteredQuery(pq, rf); DoSearching(fq); helper.DoStandardHighlights(analyzer, searcher, hits, query, this); // Currently highlights "John" and "Kennedy" separately Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }; helper.Start(); }
public virtual void TestCJKSloppyPhrase() { // individual CJK chars as terms SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(this); PhraseQuery expected = new PhraseQuery(); expected.Slop = 3; expected.Add(new Term("field", "中")); expected.Add(new Term("field", "国")); QueryBuilder builder = new QueryBuilder(analyzer); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国", 3)); }
public virtual void TestPhraseQueryPositionIncrements() { PhraseQuery expected = new PhraseQuery(); expected.Add(new Term("field", "1")); expected.Add(new Term("field", "2"), 2); CharacterRunAutomaton stopList = new CharacterRunAutomaton((new RegExp("[sS][tT][oO][pP]")).ToAutomaton()); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopList); QueryBuilder builder = new QueryBuilder(analyzer); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "1 stop 2")); }
/// <summary> /// Searches the specified query. /// </summary> /// <param name="query">The query.</param> /// <param name="searchType">Type of the search.</param> /// <param name="entities">The entities.</param> /// <param name="fieldCriteria">The field criteria.</param> /// <param name="size">The size.</param> /// <param name="from">From.</param> /// <param name="totalResultsAvailable">The total results available.</param> /// <returns></returns> public override List <IndexModelBase> Search(string query, SearchType searchType, List <int> entities, SearchFieldCriteria fieldCriteria, int?size, int?from, out long totalResultsAvailable) { List <IndexModelBase> documents = new List <IndexModelBase>(); totalResultsAvailable = 0; bool allEntities = false; BooleanQuery queryContainer = new BooleanQuery(); List <string> combinedFields = new List <string>(); List <Type> indexModelTypes = new List <Type>(); Dictionary <string, Analyzer> combinedFieldAnalyzers = new Dictionary <string, Analyzer>(); using (RockContext rockContext = new RockContext()) { var entityTypeService = new EntityTypeService(rockContext); if (entities == null || entities.Count == 0) { //add all entities allEntities = true; var selectedEntityTypes = EntityTypeCache.All().Where(e => e.IsIndexingSupported && e.IsIndexingEnabled && e.FriendlyName != "Site"); foreach (var entityTypeCache in selectedEntityTypes) { entities.Add(entityTypeCache.Id); } } foreach (var entityId in entities) { // get entities search model name var entityType = entityTypeService.GetNoTracking(entityId); indexModelTypes.Add(entityType.IndexModelType); // check if this is a person model, if so we need to add two model types one for person and the other for businesses // wish there was a cleaner way to do this if (entityType.Guid == SystemGuid.EntityType.PERSON.AsGuid()) { indexModelTypes.Add(typeof(BusinessIndex)); } } indexModelTypes = indexModelTypes.Distinct().ToList(); CombineIndexTypes(indexModelTypes, out combinedFields, out combinedFieldAnalyzers); if (entities != null && entities.Count != 0 && !allEntities) { var indexModelTypesQuery = new BooleanQuery(); indexModelTypes.ForEach(f => indexModelTypesQuery.Add(new TermQuery(new Term("type", f.Name.ToLower())), Occur.SHOULD)); queryContainer.Add(indexModelTypesQuery, Occur.MUST); } } TopDocs topDocs = null; // Use the analyzer in fieldAnalyzers if that field is in that dictionary, otherwise use StandardAnalyzer. PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer: new StandardAnalyzer(_matchVersion), fieldAnalyzers: combinedFieldAnalyzers); if (fieldCriteria != null && fieldCriteria.FieldValues?.Count > 0) { Occur occur = fieldCriteria.SearchType == CriteriaSearchType.And ? Occur.MUST : Occur.SHOULD; foreach (var match in fieldCriteria.FieldValues) { BooleanClause booleanClause = new BooleanClause(new TermQuery(new Term(match.Field, match.Value)), occur); booleanClause.Query.Boost = match.Boost; queryContainer.Add(booleanClause); } } switch (searchType) { case SearchType.ExactMatch: { var wordQuery = new BooleanQuery(); if (!string.IsNullOrWhiteSpace(query)) { var words = query.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); foreach (var word in words) { var innerQuery = new BooleanQuery(); combinedFields.ForEach(f => innerQuery.Add(new PrefixQuery(new Term(f, word.ToLower())), Occur.SHOULD)); wordQuery.Add(innerQuery, Occur.SHOULD); } } if (wordQuery.Count() != 0) { queryContainer.Add(wordQuery, Occur.MUST); } // special logic to support emails if (query.Contains("@")) { queryContainer.Add(new BooleanClause(new TermQuery(new Term("Email", query)), Occur.SHOULD)); } // special logic to support phone search if (query.IsDigitsOnly()) { queryContainer.Add(new BooleanClause(new WildcardQuery(new Term("PhoneNumbers", "*" + query + "*")), Occur.SHOULD)); } // add a search for all the words as one single search term foreach (var field in combinedFields) { var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(field, query.ToLower())); queryContainer.Add(phraseQuery, Occur.SHOULD); } break; } case SearchType.Fuzzy: { foreach (var field in combinedFields) { queryContainer.Add(new FuzzyQuery(new Term(field, query.ToLower())), Occur.SHOULD); } break; } case SearchType.Wildcard: { bool enablePhraseSearch = true; if (!string.IsNullOrWhiteSpace(query)) { BooleanQuery wildcardQuery = new BooleanQuery(); // break each search term into a separate query and add the * to the end of each var queryTerms = query.Split(' ').Select(p => p.Trim()).ToList(); // special logic to support emails if (queryTerms.Count == 1 && query.Contains("@")) { wildcardQuery.Add(new WildcardQuery(new Term("Email", "*" + query.ToLower() + "*")), Occur.SHOULD); enablePhraseSearch = false; } else { foreach (var queryTerm in queryTerms) { if (!string.IsNullOrWhiteSpace(queryTerm)) { var innerQuery = new BooleanQuery(); combinedFields.ForEach(f => innerQuery.Add(new PrefixQuery(new Term(f, queryTerm.ToLower())), Occur.SHOULD)); wildcardQuery.Add(innerQuery, Occur.MUST); } } // add special logic to help boost last names if (queryTerms.Count() > 1 && (indexModelTypes.Contains(typeof(PersonIndex)) || indexModelTypes.Contains(typeof(BusinessIndex)))) { BooleanQuery nameQuery = new BooleanQuery { { new PrefixQuery(new Term("FirstName", queryTerms.First().ToLower())), Occur.MUST }, { new PrefixQuery(new Term("LastName", queryTerms.Last().ToLower())) { Boost = 30 }, Occur.MUST } }; wildcardQuery.Add(nameQuery, Occur.SHOULD); nameQuery = new BooleanQuery { { new PrefixQuery(new Term("NickName", queryTerms.First().ToLower())), Occur.MUST }, { new PrefixQuery(new Term("LastName", queryTerms.Last().ToLower())) { Boost = 30 }, Occur.MUST } }; wildcardQuery.Add(nameQuery, Occur.SHOULD); } // special logic to support phone search if (query.IsDigitsOnly()) { wildcardQuery.Add(new PrefixQuery(new Term("PhoneNumbers", queryTerms.First().ToLower())), Occur.SHOULD); } } queryContainer.Add(wildcardQuery, Occur.MUST); } // add a search for all the words as one single search term if (enablePhraseSearch) { // add a search for all the words as one single search term foreach (var field in combinedFields) { var phraseQuery = new PhraseQuery(); phraseQuery.Add(new Term(field, query.ToLower())); queryContainer.Add(phraseQuery, Occur.SHOULD); } } break; } } int returnSize = 10; if (size.HasValue) { returnSize = size.Value; } OpenReader(); if (from.HasValue) { TopScoreDocCollector collector = TopScoreDocCollector.Create(returnSize * 10, true); // Search for 10 pages with returnSize entries in each page _indexSearcher.Search(queryContainer, collector); topDocs = collector.GetTopDocs(from.Value, returnSize); } else { topDocs = _indexSearcher.Search(queryContainer, returnSize); } totalResultsAvailable = topDocs.TotalHits; if (topDocs != null) { foreach (var hit in topDocs.ScoreDocs) { var document = LuceneDocToIndexModel(queryContainer, hit); if (document != null) { documents.Add(document); } } } return(documents); }
protected internal virtual void SmokeTestSearcher(IndexSearcher s) { RunQuery(s, new TermQuery(new Term("body", "united"))); RunQuery(s, new TermQuery(new Term("titleTokenized", "states"))); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("body", "united")); pq.Add(new Term("body", "states")); RunQuery(s, pq); }
/// <summary> /// 搜索 /// </summary> protected void SearchContent(int page) { string indexPath = @"C:\lucenedir";//如果不存在就创建lucenedir文件夹 测试环境直接在网站根目录创建,上线后需要独立创建 string[] kw = BookShop.Common.WebCommon.PanGuSplit(Request["searchText"]); //string kw = "面向对象"; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件 PhraseQuery query = new PhraseQuery(); foreach (string word in kw)//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” { //query.Add(new Term("title", word)); query.Add(new Term("msg", word)); } //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. // query.Add(new Term("body", "大学生")); //query.Add(new Term("body", kw));//body中含有kw的文章 query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器 //分页 int pageSize = 12; page = page < 1 ? 1 : page; ScoreDoc[] docs = collector.TopDocs((page - 1) * pageSize, pageSize).scoreDocs; int pageCount = (int)Math.Ceiling((double)collector.GetTotalHits() / pageSize); page = page > pageCount ? pageCount : page; //ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容. //可以用来实现分页功能 //this.listBox1.Items.Clear(); List <SearchContent> list = new List <SearchContent>(); for (int i = 0; i < docs.Length; i++) { SearchContent viewmodel = new SearchContent(); // //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //找到文档id对应的文档详细信息 viewmodel.Id = Convert.ToInt32(doc.Get("id")); // 取出放进字段的值 viewmodel.Title = doc.Get("title"); viewmodel.ISBN = doc.Get("isbn"); viewmodel.Price = Convert.ToDecimal(doc.Get("price")); viewmodel.Discount = Convert.ToInt32(doc.Get("discount")); viewmodel.Msg = Common.WebCommon.CreateHightLight(Request["searchText"], doc.Get("msg")); list.Add(viewmodel); } //用户搜索一个词就向数据库中插入一条记录 SearchDatails searchmodel = new SearchDatails(); searchmodel.Id = Guid.NewGuid(); searchmodel.KeyWords = Request["searchText"]; searchmodel.SearchDateTime = DateTime.Now; sbll.AddEntity(searchmodel); ViewData["list"] = list; ViewBag.PageIndex = page; ViewBag.PageCount = pageCount; ViewData["booktop"] = "<div class='bottom-grid'>"; ViewData["bookfoot"] = "<div class='clearfix'></div></div>"; ViewData["searchname"] = Request["searchText"]; }
public virtual void TestPhrase() { PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "seventy")); query.Add(new Term("field", "seven")); CheckHits(query, new int[] { 77, 177, 277, 377, 477, 577, 677, 777, 877, 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, 1777, 1877, 1977 }); }
public static Query GenerateQuery(string fieldName, string query, Analyzer analyzer) { if (query == null) { return(null); } var resultQuery = new BooleanQuery(); var phraseQuery = new PhraseQuery { Slop = 0 }; //not much to search, only do exact match if (query.Length < 4) { phraseQuery.Add(new Term(fieldName, query)); resultQuery.Add(phraseQuery, Occur.MUST); return(resultQuery); } //add phrase match with boost, we will add the terms to the phrase below phraseQuery.Boost = 20; resultQuery.Add(phraseQuery, Occur.SHOULD); var tokenStream = analyzer.TokenStream("SearchText", new StringReader(query)); var termAttribute = tokenStream.AddAttribute <ITermAttribute>(); while (tokenStream.IncrementToken()) { var term = termAttribute.Term; phraseQuery.Add(new Term(fieldName, term)); var exactMatch = new TermQuery(new Term(fieldName, term)); //if the term is larger than 3, we'll do both exact match and wildcard/prefix if (term.Length >= 3) { var innerQuery = new BooleanQuery(); //add exact match with boost exactMatch.Boost = 10; innerQuery.Add(exactMatch, Occur.SHOULD); //add wildcard var pq = new PrefixQuery(new Term(fieldName, term)); //needed so that wildcard searches will return a score pq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; //new ErrorCheckingScoringBooleanQueryRewrite(); innerQuery.Add(pq, Occur.SHOULD); resultQuery.Add(innerQuery, Occur.MUST); } else { resultQuery.Add(exactMatch, Occur.MUST); } } return(resultQuery.Clauses.Count > 0 ? resultQuery : null); }
public virtual void TestBasic() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2))); StringBuilder sb = new StringBuilder(265); string term = "term"; for (int i = 0; i < 30; i++) { Document doc = new Document(); sb.Append(term).Append(" "); string content = sb.ToString(); Field noTf = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType); doc.Add(noTf); Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType); doc.Add(tf); writer.AddDocument(doc); //System.out.println(d); } writer.ForceMerge(1); // flush writer.Dispose(); /* * Verify the index */ IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); try { searcher.Search(pq, 10); Assert.Fail("did not hit expected exception"); } catch (Exception e) { Exception cause = e; // If the searcher uses an executor service, the IAE is wrapped into other exceptions while (cause.InnerException != null) { cause = cause.InnerException; } if (!(cause is InvalidOperationException)) { throw new InvalidOperationException("Expected an IAE", e); } // else OK because positions are not indexed } searcher.Search(q1, new CountingHitCollectorAnonymousInnerClassHelper(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new CountingHitCollectorAnonymousInnerClassHelper2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new CountingHitCollectorAnonymousInnerClassHelper3(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new CountingHitCollectorAnonymousInnerClassHelper4(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new CountingHitCollectorAnonymousInnerClassHelper5(this)); Assert.AreEqual(15, CountingHitCollector.Count); reader.Dispose(); dir.Dispose(); }
/// <summary> /// Creates a query from the analysis chain. /// <para/> /// Expert: this is more useful for subclasses such as queryparsers. /// If using this class directly, just use <see cref="CreateBooleanQuery(string, string)"/> /// and <see cref="CreatePhraseQuery(string, string)"/>. </summary> /// <param name="analyzer"> Analyzer used for this query. </param> /// <param name="operator"> Default boolean operator used for this query. </param> /// <param name="field"> Field to create queries against. </param> /// <param name="queryText"> Text to be passed to the analysis chain. </param> /// <param name="quoted"> <c>true</c> if phrases should be generated when terms occur at more than one position. </param> /// <param name="phraseSlop"> Slop factor for phrase/multiphrase queries. </param> protected Query CreateFieldQuery(Analyzer analyzer, Occur @operator, string field, string queryText, bool quoted, int phraseSlop) { Debug.Assert(@operator == Occur.SHOULD || @operator == Occur.MUST); // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count CachingTokenFilter buffer = null; ITermToBytesRefAttribute termAtt = null; IPositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; bool severalTokensAtSamePosition = false; bool hasMoreTokens = false; TokenStream source = null; try { source = analyzer.GetTokenStream(field, new StringReader(queryText)); source.Reset(); buffer = new CachingTokenFilter(source); buffer.Reset(); if (buffer.HasAttribute <ITermToBytesRefAttribute>()) { termAtt = buffer.GetAttribute <ITermToBytesRefAttribute>(); } if (buffer.HasAttribute <IPositionIncrementAttribute>()) { posIncrAtt = buffer.GetAttribute <IPositionIncrementAttribute>(); } if (termAtt != null) { try { hasMoreTokens = buffer.IncrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.IncrementToken(); } } catch (System.IO.IOException) { // ignore } } } catch (System.IO.IOException e) { throw new Exception("Error analyzing query text", e); } finally { IOUtils.DisposeWhileHandlingException(source); } // rewind the buffer stream buffer.Reset(); BytesRef bytes = termAtt == null ? null : termAtt.BytesRef; if (numTokens == 0) { return(null); } else if (numTokens == 1) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } return(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)))); } else { if (severalTokensAtSamePosition || (!quoted)) { if (positionCount == 1 || (!quoted)) { // no phrase query: if (positionCount == 1) { // simple case: only one position, with synonyms BooleanQuery q = NewBooleanQuery(true); for (int i = 0; i < numTokens; i++) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } Query currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))); q.Add(currentQuery, Occur.SHOULD); } return(q); } else { // multiple positions BooleanQuery q = NewBooleanQuery(false); Query currentQuery = null; for (int i = 0; i < numTokens; i++) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0) { if (!(currentQuery is BooleanQuery)) { Query t = currentQuery; currentQuery = NewBooleanQuery(true); ((BooleanQuery)currentQuery).Add(t, Occur.SHOULD); } ((BooleanQuery)currentQuery).Add(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))), Occur.SHOULD); } else { if (currentQuery != null) { q.Add(currentQuery, @operator); } currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))); } } q.Add(currentQuery, @operator); return(q); } } else { // phrase query: MultiPhraseQuery mpq = NewMultiPhraseQuery(); mpq.Slop = phraseSlop; IList <Term> multiTerms = new List <Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.Count > 0) { if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } multiTerms.Clear(); } position += positionIncrement; multiTerms.Add(new Term(field, BytesRef.DeepCopyOf(bytes))); } if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } return(mpq); } } else { PhraseQuery pq = NewPhraseQuery(); pq.Slop = phraseSlop; int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)), position); } else { pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes))); } } return(pq); } } }
/// <summary> /// 获得搜索内 /// </summary> /// <returns></returns> private List <ViewModelContent> ShowSearchContent() { //string indexPath = @"C:\lucenedir"; string indexPath = Server.MapPath("~/lucenedir"); List <string> list = Common.WebCommon.PanGuSplitWord(Request["txtSearch"].Trim());//对用户输入的搜索条件进行拆分。 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索条件-简单 //PhraseQuery query = new PhraseQuery(); //foreach (string word in list)//先用空格,让用户去分词,空格分隔的就是词“计算机 专业” //{ // query.Add(new Term("Content", word));//根据内容 //} //query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) //改造搜索条件 PhraseQuery query = new PhraseQuery(); foreach (string word in list) //先用空格,让用户去分词,空格分隔的就是词“计算机 专业” { query.Add(new Term("Content", word)); //根据内容 } query.SetSlop(100); //多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。) PhraseQuery queryTitle = new PhraseQuery(); foreach (string word in list) //先用空格,让用户去分词,空格分隔的就是词“计算机 专业” { queryTitle.Add(new Term("Title", word)); //根据内容 } queryTitle.SetSlop(100); //组合搜索条件 BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(query, BooleanClause.Occur.SHOULD); booleanQuery.Add(queryTitle, BooleanClause.Occur.SHOULD); //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系. // query.Add(new Term("body", "大学生")); // query.Add(new Term("body", kw));//body中含有kw的文章 //TopScoreDocCollector是盛放查询结果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(booleanQuery, null, collector); //根据query查询条件进行查询,查询结果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数 TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容. //可以用来实现分页功能 List <ViewModelContent> viewModelList = new List <ViewModelContent>(); for (int i = 0; i < docs.Length; i++) { // //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document. ViewModelContent viewModel = new ViewModelContent(); int docId = docs[i].doc; //得到查询结果文档的id(Lucene内部分配的id) Document doc = searcher.Doc(docId); //找到文档id对应的文档详细信息 viewModel.Id = Convert.ToInt32(doc.Get("Id")); // 取出放进字段的值 //viewModel.Title = doc.Get("Title"); viewModel.Title = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Title")); //将搜索的关键字高亮显示。 viewModel.Content = Common.WebCommon.CreateHightLight(Request["txtSearch"], doc.Get("Content")); //将搜索的关键字高亮显示。 viewModelList.Add(viewModel); } //先将搜索的词插入到明细表。 SearchDetail searchDetail = new SearchDetail(); //searchDetail.Id = Guid.NewGuid(); searchDetail.KeyWords = Request["txtSearch"].Trim(); searchDetail.SearchDateTime = DateTime.Now; Db.SearchDetail.Add(searchDetail); Db.SaveChanges(); return(viewModelList); }
public static Query AnalyzedTerm(string fieldName, string term, LuceneTermType type, Analyzer analyzer, float?boost = null, float?similarity = null) { if (type != LuceneTermType.String && type != LuceneTermType.Prefix && type != LuceneTermType.WildCard) { throw new InvalidOperationException("Analyzed terms can be only created from string values."); } if (boost.HasValue == false) { boost = 1; } if (type == LuceneTermType.WildCard) { return(new WildcardQuery(GetAnalyzedWildcardTerm(fieldName, term, analyzer)) { Boost = boost.Value }); } var tokenStream = analyzer.ReusableTokenStream(fieldName, new StringReader(term)); var terms = new List <string>(); while (tokenStream.IncrementToken()) { var attribute = (TermAttribute)tokenStream.GetAttribute <ITermAttribute>(); terms.Add(attribute.Term); } if (type == LuceneTermType.Prefix) { if (terms.Count != 0) { var first = terms[0]; var actualTerm = first[first.Length - 1] == AsteriskChar?first.Substring(0, first.Length - 1) : first; return(new PrefixQuery(new Term(fieldName, actualTerm)) { Boost = boost.Value }); } // if the term that we are trying to prefix has been removed entirely by the analyzer, then we are going // to cheat a bit, and check for both the term in as specified and the term in lower case format so we can // find it regardless of casing var removeStar = term.Substring(0, term.Length - 1); var booleanQuery = new BooleanQuery { Clauses = { new BooleanClause(new PrefixQuery(new Term(fieldName, removeStar)), Occur.SHOULD), new BooleanClause(new PrefixQuery(new Term(fieldName, removeStar.ToLowerInvariant())), Occur.SHOULD) }, Boost = boost.Value }; return(booleanQuery); } if (terms.Count == 1) { return(new TermQuery(new Term(fieldName, terms[0])) { Boost = boost.Value }); } var pq = new PhraseQuery { Boost = boost.Value }; foreach (var t in terms) { pq.Add(new Term(fieldName, t)); } return(pq); }
/// <summary> /// Adds a standard type clause to this instance /// </summary> /// <param name="term">Term to add to this query.</param> /// <param name="occurrence">Defines how the term is added to this query.</param> /// <param name="slop">The amount of allowed slop in a phrase query.</param> /// <remarks> /// Slop is the amount of movement each word is allowed in a non-exact phrase query. /// For instance if you search for "Adobe Systems Incorporated" and the slop is set to 0 then /// only results with that term is allowed. If you set the slop to 2 then two movements can be /// made, max, for each word. In the same example with slop set to 2 results would be returned /// for "Adobe Systems Incorporated", "Adobe Incorporated Systems", "Systems Adobe Incorporated", /// and "Systems Incorporated Adobe". /// </remarks> public void AddBooleanClause(SearchTerm term, ClauseOccurrence occurrence, int slop) { if (term == null) throw new ArgumentNullException("term", "term cannot be null"); IncrementTotalClauses(1); if (term.IsPhrase) { PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.Add(term.GetLuceneTerm()); phraseQuery.SetSlop(slop); phraseQuery.SetBoost(term.Boost); this.luceneQuery.Add(phraseQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence)); phraseQuery = null; } else { TermQuery termQuery = new TermQuery(term.GetLuceneTerm()); termQuery.SetBoost(term.Boost); this.luceneQuery.Add(termQuery, TypeConverter.ConvertToLuceneClauseOccurrence(occurrence)); termQuery = null; } }
public void TestPhraseHighlightTest() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_STORED); type.StoreTermVectorOffsets = (true); type.StoreTermVectorPositions = (true); type.StoreTermVectors = (true); type.Freeze(); Field longTermField = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type); Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type); doc.Add(longTermField); doc.Add(noLongTermField); writer.AddDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.Open(writer, true); int docId = 0; String field = "no_long_term"; { BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term(field, "test")), Occur.MUST); query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST); query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("<b>foo</b> is <b>highlighed</b> and", bestFragments[0]); } { BooleanQuery query = new BooleanQuery(); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term(field, "test")); pq.Add(new Term(field, "foo")); pq.Add(new Term(field, "highlighed")); pq.Slop = (5); query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST); query.Add(pq, Occur.MUST); query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); // highlighted results are centered assertEquals(0, bestFragments.Length); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 30, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]); } { PhraseQuery query = new PhraseQuery(); query.Add(new Term(field, "test")); query.Add(new Term(field, "foo")); query.Add(new Term(field, "highlighed")); query.Slop = (3); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); // highlighted results are centered assertEquals(0, bestFragments.Length); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 30, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]); } { PhraseQuery query = new PhraseQuery(); query.Add(new Term(field, "test")); query.Add(new Term(field, "foo")); query.Add(new Term(field, "highlighted")); query.Slop = (30); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); assertEquals(0, bestFragments.Length); } { BooleanQuery query = new BooleanQuery(); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term(field, "test")); pq.Add(new Term(field, "foo")); pq.Add(new Term(field, "highlighed")); pq.Slop = (5); BooleanQuery inner = new BooleanQuery(); inner.Add(pq, Occur.MUST); inner.Add(new TermQuery(new Term(field, "foo")), Occur.MUST); query.Add(inner, Occur.MUST); query.Add(pq, Occur.MUST); query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); assertEquals(0, bestFragments.Length); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 30, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]); } field = "long_term"; { BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term(field, "thisisaverylongwordandmakessurethisfails")), Occur.MUST); query.Add(new TermQuery(new Term(field, "foo")), Occur.MUST); query.Add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); FieldQuery fieldQuery = highlighter.GetFieldQuery(query, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, field, 18, 1); // highlighted results are centered assertEquals(1, bestFragments.Length); assertEquals("<b>thisisaverylongwordandmakessurethisfails</b>", bestFragments[0]); } reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestPositionIncrementMultiFields() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); writer.AddDocument(MakeDocumentWithFields()); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); PhraseQuery query = new PhraseQuery(); query.Add(new Term("indexed_not_tokenized", "test1")); query.Add(new Term("indexed_not_tokenized", "test2")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); DoAssert(searcher.Doc(hits[0].Doc), true); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public void TestBooleanPhraseWithSynonym() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_NOT_STORED); type.StoreTermVectorOffsets = (true); type.StoreTermVectorPositions = (true); type.StoreTermVectors = (true); type.Freeze(); Token syn = new Token("httpwwwfacebookcom", 6, 29); syn.PositionIncrement = (0); CannedTokenStream ts = new CannedTokenStream( new Token("test", 0, 4), new Token("http", 6, 10), syn, new Token("www", 13, 16), new Token("facebook", 17, 25), new Token("com", 26, 29) ); Field field = new Field("field", ts, type); doc.Add(field); doc.Add(new StoredField("field", "Test: http://www.facebook.com")); writer.AddDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.Open(writer, true); int docId = 0; // query1: match PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "test")); pq.Add(new Term("field", "http")); pq.Add(new Term("field", "www")); pq.Add(new Term("field", "facebook")); pq.Add(new Term("field", "com")); FieldQuery fieldQuery = highlighter.GetFieldQuery(pq, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query2: match PhraseQuery pq2 = new PhraseQuery(); pq2.Add(new Term("field", "test")); pq2.Add(new Term("field", "httpwwwfacebookcom")); pq2.Add(new Term("field", "www")); pq2.Add(new Term("field", "facebook")); pq2.Add(new Term("field", "com")); fieldQuery = highlighter.GetFieldQuery(pq2, reader); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query3: OR query1 and query2 together BooleanQuery bq = new BooleanQuery(); bq.Add(pq, Occur.SHOULD); bq.Add(pq2, Occur.SHOULD); fieldQuery = highlighter.GetFieldQuery(bq, reader); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestPhrase2() { PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "seventish")); query.Add(new Term("field", "sevenon")); CheckHits(query, new int[] { }); }
private Query CreateStringValueQuery(QueryFieldValue value, FieldInfo currentField) { switch (value.Token) { case SnLucLexer.Token.Number: case SnLucLexer.Token.String: if (value.StringValue == ContentQuery.EmptyText) { return(new TermQuery(new Term(currentField.Name, value.StringValue))); } if (value.StringValue == ContentQuery.EmptyInnerQueryText) { return(new TermQuery(new Term("Id", NumericUtils.IntToPrefixCoded(0)))); } var words = GetAnalyzedText(currentField.Name, value.StringValue); if (words.Length == 0) { words = new String[] { String.Empty } } ; //return null; if (words.Length == 1) { var term = new Term(currentField.Name, words[0]); if (value.FuzzyValue == null) { return(new TermQuery(term)); } return(new FuzzyQuery(term, Convert.ToSingle(value.FuzzyValue))); } var phraseQuery = new PhraseQuery(); foreach (var word in words) { phraseQuery.Add(new Term(currentField.Name, word)); } if (value.FuzzyValue != null) { var slop = Convert.ToInt32(value.FuzzyValue.Value); phraseQuery.SetSlop(slop); } return(phraseQuery); case SnLucLexer.Token.WildcardString: if (!value.StringValue.EndsWith("*")) { return(new WildcardQuery(new Term(currentField.Name, value.StringValue))); } var s = value.StringValue.TrimEnd('*'); if (s.Contains('?') || s.Contains('*')) { return(new WildcardQuery(new Term(currentField.Name, value.StringValue))); } return(new PrefixQuery(new Term(currentField.Name, s))); default: throw new NotImplementedException("CreateValueQuery with Token: " + value.Token); } }
public virtual void TestWithPendingDeletes3() { // main directory Directory dir = NewDirectory(); // auxiliary directory Directory aux = NewDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND)); // Adds 10 docs, then replaces them with another 10 // docs, so 10 pending deletes: for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + (i % 10), Field.Store.NO)); doc.Add(NewTextField("content", "bbb " + i, Field.Store.NO)); writer.UpdateDocument(new Term("id", "" + (i % 10)), doc); } // Deletes one of the 10 added docs, leaving 9: PhraseQuery q = new PhraseQuery(); q.Add(new Term("content", "bbb")); q.Add(new Term("content", "14")); writer.DeleteDocuments(q); writer.AddIndexes(aux); writer.ForceMerge(1); writer.Commit(); VerifyNumDocs(dir, 1039); VerifyTermDocs(dir, new Term("content", "aaa"), 1030); VerifyTermDocs(dir, new Term("content", "bbb"), 9); writer.Dispose(); dir.Dispose(); aux.Dispose(); }
//或查询 public List <SearchResult> LenuceOrSearch(string kw, int pageNo, int pageLen, out int recCount) { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(IndexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); List <PhraseQuery> lstQuery = new List <PhraseQuery>(); List <string> lstkw = PanGuSplitWord(kw);//对用户输入的搜索条件进行拆分。 foreach (string word in lstkw) { PhraseQuery query = new PhraseQuery(); //查询条件 query.Slop = 100; //两个词的距离大于100(经验值)就不放入搜索结果,因为距离太远相关度就不高了 query.Add(new Term("Content", word)); //contains("Content",word) PhraseQuery titleQuery = new PhraseQuery(); //查询条件 titleQuery.Add(new Term("Title", word)); lstQuery.Add(query); lstQuery.Add(titleQuery); } BooleanQuery bq = new BooleanQuery(); foreach (var v in lstQuery) { //Occur.Should 表示 Or , Must 表示 and 运算 bq.Add(v, Occur.SHOULD); } TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); //盛放查询结果的容器 searcher.Search(bq, null, collector); //使用query这个查询条件进行搜索,搜索结果放入collector recCount = collector.TotalHits; //总的结果条数 ScoreDoc[] docs = collector.TopDocs((pageNo - 1) * pageLen, pageNo * pageLen).ScoreDocs; //从查询结果中取出第m条到第n条的数据 List <SearchResult> resultList = new List <SearchResult>(); string msg = string.Empty; string title = string.Empty; for (int i = 0; i < docs.Length; i++) //遍历查询结果 { int docId = docs[i].Doc; //拿到文档的id,因为Document可能非常占内存(思考DataSet和DataReader的区别) //所以查询结果中只有id,具体内容需要二次查询 Document doc = searcher.Doc(docId); //根据id查询内容。放进去的是Document,查出来的还是Document SearchResult result = new SearchResult(); result.Id = Convert.ToInt32(doc.Get("Id")); msg = doc.Get("Content");//只有 Field.Store.YES的字段才能用Get查出来 title = doc.Get("Title"); //将搜索的关键字高亮显示。 foreach (string word in lstkw) { title = title.Replace(word, "<span style='color:red;'>" + word + "</span>"); } result.Msg = CreateHightLight(kw, msg); result.Title = title; result.CreateTime = Convert.ToDateTime(doc.Get("CreateTime")); result.Url = "/Article/Details?Id=" + result.Id + "&kw=" + kw; resultList.Add(result); } return(resultList); }