public T ExecuteScalar <T>(QueryModel queryModel) { var luceneQueryModel = PrepareQuery(queryModel); var searcherHandle = CheckoutSearcher(); using (searcherHandle) { var searcher = searcherHandle.Searcher; var skipResults = luceneQueryModel.SkipResults; var maxResults = Math.Min(luceneQueryModel.MaxResults, searcher.MaxDoc - skipResults); TopFieldDocs hits; if (maxResults > 0) { var executionContext = new QueryExecutionContext(searcher, luceneQueryModel.Query, luceneQueryModel.Filter); PrepareSearchSettings(executionContext); hits = searcher.Search(executionContext.Query, executionContext.Filter, maxResults, luceneQueryModel.Sort); } else { hits = new TopFieldDocs(0, new ScoreDoc[0], new SortField[0], 0); } var handler = ScalarResultHandlerRegistry.Instance.GetItem(luceneQueryModel.ResultSetOperator.GetType()); return(handler.Execute <T>(luceneQueryModel, hits)); } }
public Task <SearchResult> Search(string searchQuery, int skip, int limit, string searchField = "") { if (_isRebuilding || string.IsNullOrWhiteSpace(searchQuery.Replace("*", string.Empty).Replace("?", string.Empty))) { return(new SearchResult(new List <SearchItem>(), 0).AsTask()); } using var dir = FSDirectory.Open(FileSystemLayout.SearchIndexFolder); using var reader = DirectoryReader.Open(dir); var searcher = new IndexSearcher(reader); int hitsLimit = skip + limit; using var analyzer = new StandardAnalyzer(AppLuceneVersion); QueryParser parser = !string.IsNullOrWhiteSpace(searchField) ? new QueryParser(AppLuceneVersion, searchField, analyzer) : new MultiFieldQueryParser(AppLuceneVersion, new[] { TitleField }, analyzer); parser.AllowLeadingWildcard = true; Query query = ParseQuery(searchQuery, parser); var filter = new DuplicateFilter(TitleAndYearField); var sort = new Sort(new SortField(SortTitleField, SortFieldType.STRING)); TopFieldDocs topDocs = searcher.Search(query, filter, hitsLimit, sort, true, true); IEnumerable <ScoreDoc> selectedHits = topDocs.ScoreDocs.Skip(skip).Take(limit); var searchResult = new SearchResult( selectedHits.Map(d => ProjectToSearchItem(searcher.Doc(d.Doc))).ToList(), topDocs.TotalHits); searchResult.PageMap = GetSearchPageMap(searcher, query, filter, sort, limit); return(searchResult.AsTask()); }
static ScoreDoc[] SearchTime(IndexSearcher searcher, string queryString, string field, int numHit, bool inOrder) { //TopScoreDocCollector collector = TopScoreDocCollector.create(numHit, inOrder); Analyzer analyser = new PanGuAnalyzer(); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, field, analyser); var querys = queryString.Split('&'); if (querys != null || querys.Length > 1) { BooleanQuery query = new BooleanQuery(); foreach (var str in querys) { query.Add(parser.Parse(str), BooleanClause.Occur.MUST); } TopFieldDocs topField = searcher.Search(query, null, 20, new Sort(new SortField("Time", SortField.STRING_VAL, true))); return(topField.scoreDocs); } else { Query query = parser.Parse(queryString); TopFieldDocs topField = searcher.Search(query, null, 20, new Sort(new SortField("Time", SortField.STRING_VAL, true))); //searcher.Search(query, collector); return(topField.scoreDocs); } }
/// <summary> /// 搜索LUCENE数据 /// </summary> /// <param name="indexType"></param> /// <param name="query"></param> /// <param name="sort"></param> /// <param name="pagerInfo"></param> /// <param name="callback"></param> /// <returns></returns> public static List <Document> SearchLuceneData(string directoryPath, Query query, Sort sort, PagerInfo pagerInfo, Action <Document> callback) { List <Document> list = new List <Document>(); FSDirectory directory = FSDirectory.Open(new System.IO.DirectoryInfo(directoryPath), new NoLockFactory()); IndexReader indexReader = IndexReader.Open(directory, true); IndexSearcher indexSearcher = new IndexSearcher(indexReader); ScoreDoc[] docs; int totalCount; int startOffset; int endOffset; if (sort != null) { TopFieldDocs resultFieldDocs = indexSearcher.Search(query, null, indexSearcher.MaxDoc(), sort); totalCount = resultFieldDocs.totalHits; pagerInfo.RecordCount = totalCount; startOffset = (pagerInfo.PageIndex - 1) * pagerInfo.PageSize; endOffset = pagerInfo.PageIndex * pagerInfo.PageSize; if (endOffset >= totalCount) { endOffset = totalCount; } docs = resultFieldDocs.scoreDocs; } else { TopDocs resultFieldDocs = indexSearcher.Search(query, null, indexSearcher.MaxDoc()); totalCount = resultFieldDocs.totalHits; pagerInfo.RecordCount = totalCount; startOffset = (pagerInfo.PageIndex - 1) * pagerInfo.PageSize; endOffset = pagerInfo.PageIndex * pagerInfo.PageSize; if (endOffset >= totalCount) { endOffset = totalCount; } docs = resultFieldDocs.scoreDocs; } if (totalCount > 0) { for (int i = startOffset; i < endOffset; i++) { ScoreDoc hit = docs[i]; Document doc = indexSearcher.Doc(hit.doc); list.Add(doc); if (callback != null) { callback(doc); } } } indexSearcher.Close(); directory.Close(); return(list); }
// TODO: ideally we'd test > Short.MAX_VALUE too, but compilation is currently recursive. // so if we want to test such huge expressions, we need to instead change parser to use an explicit Stack /// <exception cref="System.Exception"></exception> private void DoTestLotsOfBindings(int n) { SimpleBindings bindings = new SimpleBindings(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < n; i++) { if (i > 0) { sb.Append("+"); } sb.Append("x" + i); bindings.Add(new SortField("x" + i, SortFieldType.SCORE)); } var expr = JavascriptCompiler.Compile(sb.ToString()); var sort = new Sort(expr.GetSortField(bindings, true)); Query query = new TermQuery(new Term("body", "contents")); TopFieldDocs td = searcher.Search(query, null, 3, sort, true, true); for (int i_1 = 0; i_1 < 3; i_1++) { FieldDoc d = (FieldDoc)td.ScoreDocs[i_1]; float expected = n * d.Score; float actual = (float)((double)d.Fields[0]); AreEqual(expected, actual, CheckHits.ExplainToleranceDelta(expected, actual)); } }
/// <summary> /// 根据时间倒叙查询日志, 注意,如果是并搜索,那么需要&,否则是空格 /// 如 仅搜索 1 and 123 传递的参数 是 "1&123" /// 如果搜索 1 or 123 传递的参数 空格拆分 "1 123" /// </summary> /// <param name="searcher"></param> /// <param name="queryString"></param> /// <param name="field"></param> /// <param name="numHit"></param> /// <param name="inOrder"></param> /// <returns></returns> static ScoreDoc[] SearchTime(IndexSearcher searcher, string queryString, string field, int numHit, bool inOrder) { //TopScoreDocCollector collector = TopScoreDocCollector.create(numHit, inOrder); Analyzer analyser = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //Analyzer analyser = new PanGuAnalyzer(); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyser); var querys = queryString.Split('&'); if (querys != null && querys.Length > 1) { BooleanQuery query = new BooleanQuery(); foreach (var str in querys) { if (String.IsNullOrWhiteSpace(str)) { continue; } TermQuery term = new TermQuery(new Term("Content", str)); query.Add(parser.Parse(str), Occur.MUST); } TopFieldDocs topField = searcher.Search(query, null, 20, new Sort(new SortField("Time", SortField.STRING_VAL, true))); return(topField.ScoreDocs); } else { Query query = parser.Parse(queryString); TopFieldDocs topField = searcher.Search(query, null, 20, new Sort(new SortField("Time", SortField.STRING_VAL, true))); return(topField.ScoreDocs); } }
public IList <TEntity> GetAll(Query query = null, Filter filter = null, Sort sort = null) { TopFieldDocs topDocs = IndexSearcher.Search(query, filter, int.MaxValue, sort ?? Sort.RELEVANCE); IEnumerable <TEntity> entities = Definition.Convert(topDocs.ScoreDocs.Select(doc => IndexSearcher.Doc(doc.Doc))); return(entities.ToList()); }
public IList <TSubclass> GetAll <TSubclass>(Query query = null, Filter filter = null, Sort sort = null) where TSubclass : TEntity { BooleanQuery booleanQuery = UpdateQuery <TSubclass>(query); TopFieldDocs topDocs = IndexSearcher.Search(booleanQuery, filter, int.MaxValue, sort ?? Sort.RELEVANCE); IEnumerable <TSubclass> entities = Definition.Convert <TSubclass>(topDocs.ScoreDocs.Select(doc => IndexSearcher.Doc(doc.Doc))); return(entities.ToList()); }
public int[] Search(string searchText, string[] columnNames) { Query query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_30, new List <string>(Enumerable.Repeat <string>(searchText, columnNames.Length)).ToArray(), columnNames, this.analyzer); TopFieldDocs topFieldDocs = this.searcher.Search(query, null, this.IndexLength, Sort.INDEXORDER); int[] array = new int[topFieldDocs.ScoreDocs.Length]; for (int i = 0; i < topFieldDocs.ScoreDocs.Length; i++) { array[i] = topFieldDocs.ScoreDocs[i].Doc + 1; } return(array); }
public List <News> Search(string keywords) { Directory dir = FSDirectory.Open(new io.DirectoryInfo(HttpContext.Current.Server.MapPath("/Indexs/")), new SimpleFSLockFactory()); IndexReader reader = IndexReader.Open(dir, true); IndexSearcher search = new IndexSearcher(reader); MultiFieldQueryParser multifield = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { "Title", "Content" }, new PanGuAnalyzer()); multifield.PhraseSlop = 3; multifield.DefaultOperator = QueryParser.Operator.AND; Query muqu = multifield.Parse(keywords); //MultiPhraseQuery multi = new MultiPhraseQuery(); //multi.Add(new Term[] {new Term("Content","中国"), new Term("Content", "智慧"), new Term("Title", "中国"), new Term("Title", "智慧") }); //PhraseQuery query = new PhraseQuery(); //query.Add(new Term("Content", keywords)); NumericRangeFilter <int> filter = NumericRangeFilter.NewIntRange("NewsId", 1, 10, true, true); Sort sort = new Sort(); sort.SetSort(new SortField("OrderId", SortField.LONG, true)); TopFieldDocs fields = search.Search(muqu, filter, 1000, sort); ScoreDoc[] docs = fields.ScoreDocs; List <News> newslist = new List <News>(); for (int i = 0; i < docs.Length; i++) { News news = new News(); Document doc = search.Doc(docs[i].Doc); news.NewsId = Convert.ToInt32(doc.Get("NewsId")); news.Title = doc.Get("Title"); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style=\"color:red\">", "</span>"); Highlighter high = new Highlighter(formatter, new PanGu.Segment()); high.FragmentSize = 120; news.Content = high.GetBestFragment(keywords, doc.Get("Content")); news.AddTime = Convert.ToDateTime(doc.Get("Date")); news.OrderId = Convert.ToInt64(doc.Get("OrderId")); newslist.Add(news); } return(newslist); }
public T ExecuteScalar <T>(QueryModel queryModel) { var watch = new Stopwatch(); watch.Start(); var luceneQueryModel = PrepareQuery(queryModel); var searcherHandle = CheckoutSearcher(); using (searcherHandle) { var searcher = searcherHandle.Searcher; var skipResults = luceneQueryModel.SkipResults; var maxResults = Math.Min(luceneQueryModel.MaxResults, searcher.MaxDoc - skipResults); var executionContext = new QueryExecutionContext(searcher, luceneQueryModel.Query, luceneQueryModel.Filter); TopFieldDocs hits; TimeSpan elapsedPreparationTime; TimeSpan elapsedSearchTime; if (maxResults > 0) { PrepareSearchSettings(executionContext); elapsedPreparationTime = watch.Elapsed; hits = searcher.Search(executionContext.Query, executionContext.Filter, maxResults, luceneQueryModel.Sort); elapsedSearchTime = watch.Elapsed - elapsedPreparationTime; } else { hits = new TopFieldDocs(0, new ScoreDoc[0], new SortField[0], 0); elapsedPreparationTime = watch.Elapsed; elapsedSearchTime = TimeSpan.Zero; } executionContext.Phase = QueryExecutionPhase.ConvertResults; executionContext.Hits = hits; var handler = ScalarResultHandlerRegistry.Instance.GetItem(luceneQueryModel.ResultSetOperator.GetType()); var result = handler.Execute <T>(luceneQueryModel, hits); var elapsedRetrievalTime = watch.Elapsed - elapsedPreparationTime - elapsedSearchTime; RaiseStatisticsCallback(luceneQueryModel, executionContext, elapsedPreparationTime, elapsedSearchTime, elapsedRetrievalTime, 0, 0); return(result); } }
public IPagedList <TEntity> Search(Query query, int pageNumber, int?pageSize = null, Filter filter = null, Sort sort = null) { int size = pageSize ?? _siteSettings.DefaultPageSize; TopFieldDocs topDocs = IndexSearcher.Search(query, filter, pageNumber * size, sort ?? Sort.RELEVANCE); IEnumerable <TEntity> entities = Definition.Convert(topDocs.ScoreDocs.Skip((pageNumber - 1) * size) .Take(size) .Select(doc => IndexSearcher.Doc(doc.Doc))); return(new StaticPagedList <TEntity>(entities, pageNumber, size, topDocs.TotalHits)); }
public string[] Search(string s, int MaxDoc = 10) { // MaxDoc = Searcher.MaxDoc; Query q = Parser.Parse(s); TopFieldDocs hits = Searcher.Search(q, null, MaxDoc, Sort); ScoreDoc[] scoreDocs = hits.ScoreDocs; int docCount = scoreDocs.Length; string[] result = new string[docCount]; for (int i = 0; i < docCount; i += 1) { result[i] = Searcher.Doc(scoreDocs[i].Doc).Get(FieldName); } return(result); }
public Task <SearchResult> Search(string searchQuery, int skip, int limit, string searchField = "") { if (string.IsNullOrWhiteSpace(searchQuery.Replace("*", string.Empty).Replace("?", string.Empty))) { return(new SearchResult(new List <SearchItem>(), 0).AsTask()); } using DirectoryReader reader = _writer.GetReader(true); var searcher = new IndexSearcher(reader); int hitsLimit = limit == 0 ? searcher.IndexReader.MaxDoc : skip + limit; using var analyzer = new StandardAnalyzer(AppLuceneVersion); var customAnalyzers = new Dictionary <string, Analyzer> { { ContentRatingField, new KeywordAnalyzer() }, { StateField, new KeywordAnalyzer() } }; using var analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer, customAnalyzers); QueryParser parser = !string.IsNullOrWhiteSpace(searchField) ? new CustomQueryParser(AppLuceneVersion, searchField, analyzerWrapper) : new CustomMultiFieldQueryParser(AppLuceneVersion, new[] { TitleField }, analyzerWrapper); parser.AllowLeadingWildcard = true; Query query = ParseQuery(searchQuery, parser); var filter = new DuplicateFilter(TitleAndYearField); var sort = new Sort(new SortField(SortTitleField, SortFieldType.STRING)); TopFieldDocs topDocs = searcher.Search(query, filter, hitsLimit, sort, true, true); IEnumerable <ScoreDoc> selectedHits = topDocs.ScoreDocs.Skip(skip); if (limit > 0) { selectedHits = selectedHits.Take(limit); } var searchResult = new SearchResult( selectedHits.Map(d => ProjectToSearchItem(searcher.Doc(d.Doc))).ToList(), topDocs.TotalHits); if (limit > 0) { searchResult.PageMap = GetSearchPageMap(searcher, query, filter, sort, limit); } return(searchResult.AsTask()); }
public virtual void TestDistanceSort() { var distance = JavascriptCompiler.Compile("haversin(40.7143528,-74.0059731,latitude,longitude)"); SimpleBindings bindings = new SimpleBindings(); bindings.Add(new SortField("latitude", SortFieldType.DOUBLE)); bindings.Add(new SortField("longitude", SortFieldType.DOUBLE)); Sort sort = new Sort(distance.GetSortField(bindings, false)); TopFieldDocs td = searcher.Search(new MatchAllDocsQuery(), null, 3, sort); FieldDoc d = (FieldDoc)td.ScoreDocs[0]; AreEqual(0.4619D, (double)d.Fields[0], 1E-4); d = (FieldDoc)td.ScoreDocs[1]; AreEqual(1.0546D, (double)d.Fields[0], 1E-4); d = (FieldDoc)td.ScoreDocs[2]; AreEqual(5.2842D, (double)d.Fields[0], 1E-4); }
public virtual void TestSortValues() { var expr = JavascriptCompiler.Compile("sqrt(_score)"); SimpleBindings bindings = new SimpleBindings(); bindings.Add(new SortField("_score", SortFieldType.SCORE)); Sort sort = new Sort(expr.GetSortField(bindings, true)); Query query = new TermQuery(new Term("body", "contents")); TopFieldDocs td = searcher.Search(query, null, 3, sort, true, true); for (int i = 0; i < 3; i++) { FieldDoc d = (FieldDoc)td.ScoreDocs[i]; float expected = (float)Math.Sqrt(d.Score); float actual = (float)((double)d.Fields[0]); AreEqual(expected, actual, CheckHits.ExplainToleranceDelta(expected, actual)); } }
public virtual void TestTwoOfSameBinding() { var expr = JavascriptCompiler.Compile("_score + _score"); SimpleBindings bindings = new SimpleBindings(); bindings.Add(new SortField("_score", SortFieldType.SCORE)); Sort sort = new Sort(expr.GetSortField(bindings, true)); Query query = new TermQuery(new Term("body", "contents")); TopFieldDocs td = searcher.Search(query, null, 3, sort, true, true); for (int i = 0; i < 3; i++) { FieldDoc d = (FieldDoc)td.ScoreDocs[i]; float expected = 2 * d.Score; float actual = ((J2N.Numerics.Double)d.Fields[0]).ToSingle(); Assert.AreEqual(expected, actual, CheckHits.ExplainToleranceDelta (expected, actual)); } }
public List <Product> Search(string keyWords) { List <Product> product = new List <Product>(); Query query = new TermQuery(new Term("Content", keyWords)); System.IO.DirectoryInfo IndexDir = new System.IO.DirectoryInfo(path); Directory dict = FSDirectory.Open(IndexDir, new SimpleFSLockFactory()); //搜索器 IndexSearcher search = new IndexSearcher(dict, true); //过滤器 NumericRangeFilter <int> filter = NumericRangeFilter.NewIntRange("ProductId", 1, 6, true, true); //排序字段 Sort sort = new Sort(new SortField("OrderId", SortField.LONG, true)); //执行搜索 TopFieldDocs docs = search.Search(query, null, 1000, sort); foreach (var p in docs.ScoreDocs) { Product pro = new Product(); Document doc = search.Doc(p.Doc); pro.ProductId = Convert.ToInt32(doc.Get("ProductId")); pro.ProductName = $"{doc.Get("ProductName")}文档ID:{p.Doc},内部ID:{pro.ProductId}"; SimpleHTMLFormatter html = new SimpleHTMLFormatter("<span style=\"color:red\">", "</span>"); Highlighter high = new Highlighter(html, new PanGu.Segment()); high.FragmentSize = 120; pro.Detail = high.GetBestFragment(keyWords, doc.Get("Content")); pro.Detail = doc.Get("Content"); pro.CreateTime = doc.Get("CreateTime"); pro.OrderId = doc.Get("OrderId"); product.Add(pro); } search.Dispose(); dict.Dispose(); return(product); }
public string[] MultiSearch(string s1, string s2, int MaxDoc = 5) { // MaxDoc = Searcher.MaxDoc; BooleanQuery q = new BooleanQuery(); q.Add(Parser.Parse(s1), Occur.MUST); q.Add(Parser.Parse(s2), Occur.MUST); TopFieldDocs hits = Searcher.Search(q, null, MaxDoc, Sort); ScoreDoc[] scoreDocs = hits.ScoreDocs; int docCount = scoreDocs.Length; string[] result = new string[docCount]; for (int i = 0; i < docCount; i += 1) { result[i] = Searcher.Doc(scoreDocs[i].Doc).Get(FieldName); } return(result); }
public List <News> Search(string Keywords) { List <News> newsList = new List <News>(); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(Server.MapPath("/Index")), new SimpleFSLockFactory()); IndexSearcher search = new IndexSearcher(dir); PhraseQuery query = new PhraseQuery(); query.Add(new Term("Content", Keywords)); //query.Slop = 8; NumericRangeFilter <int> range = NumericRangeFilter.NewIntRange("NewsId", 1, 10, true, true); Sort sort = new Sort(new SortField("OrderId", SortField.LONG, true)); TopFieldDocs fileds = search.Search(query, range, 1000, sort); ScoreDoc[] docs = fileds.ScoreDocs; for (int i = 0; i < docs.Length; i++) { News news = new News(); int docid = docs[i].Doc; Document doc = search.Doc(docid); news.NewsId = Convert.ToInt32(doc.Get("NewsId")); news.Title = doc.Get("Title"); SimpleHTMLFormatter html = new SimpleHTMLFormatter("<span style=\"color:red\" >", "</span>"); Highlighter high = new Highlighter(html, new PanGu.Segment()); high.FragmentSize = 120; news.Content = high.GetBestFragment(Keywords, doc.Get("Content")); news.AddTime = Convert.ToDateTime(doc.Get("AddTime")); news.OrderId = Convert.ToInt64(doc.Get("OrderId")); newsList.Add(news); } return(newsList); }
public virtual void TestExpressionRefersToExpression() { var expr1 = JavascriptCompiler.Compile("_score"); var expr2 = JavascriptCompiler.Compile("2*expr1"); var bindings = new SimpleBindings(); bindings.Add(new SortField("_score", SortFieldType.SCORE)); bindings.Add("expr1", expr1); Sort sort = new Sort(expr2.GetSortField(bindings, true)); Query query = new TermQuery(new Term("body", "contents")); TopFieldDocs td = searcher.Search(query, null, 3, sort, true, true); for (int i = 0; i < 3; i++) { FieldDoc d = (FieldDoc)td.ScoreDocs[i]; float expected = 2 * d.Score; float actual = (float)((double)d.Fields[0]); Assert.AreEqual(expected, actual, CheckHits.ExplainToleranceDelta (expected, actual)); } }
public List <ArticleLinkInfo> Seach(String queryString, int fetchTopResult = -1) { List <ArticleLinkInfo> result = new List <ArticleLinkInfo>(); try { QueryParser queryParser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { "Title", "ScrapTime" }, new ChineseAnalyzer()); Query query = queryParser.Parse(queryString); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(query, Occur.MUST); List <SortField> sortFieldList = new List <SortField>(); sortFieldList.Add(new SortField("ScrapTime", SortField.DOUBLE, true)); Sort sort = new Sort(sortFieldList.ToArray()); using (IndexSearcher indexSearcher = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(this.indexDir)))) { if (fetchTopResult < 0) { fetchTopResult = indexSearcher.MaxDoc; } TopFieldDocs docs = indexSearcher.Search(booleanQuery, null, fetchTopResult, sort); //hits = docs.TotalHits; foreach (var scoreDoc in docs.ScoreDocs) { Document doc = indexSearcher.Doc(scoreDoc.Doc); Console.WriteLine("{0}|{1}|{2}", doc.Get(scraptime_field), doc.Get(id_field), doc.Get(title_field)); result.Add(new ArticleLinkInfo(doc.Get(id_field), doc.Get(scraptime_field), doc.Get(title_field), doc.Get(url_field))); } } } catch (Exception ex) { Console.WriteLine(ex); } return(result); }
private static ScoreDoc[] TopDocs(int start, int limit, TopFieldDocs docs) { int endIndex = 0; int hc = docs.TotalHits; if (hc - start > limit) { endIndex = start + limit; } else { endIndex = hc; } List <ScoreDoc> dl = new List <ScoreDoc>(); var da = docs.ScoreDocs; for (int i = start; i < endIndex; i++) { dl.Add(da[i]); } return(dl.ToArray()); }
/// <summary> /// Create the results based on the search hits. /// Can be overridden by subclass to add particular behavior (e.g. weight transformation) </summary> /// <exception cref="System.IO.IOException"> If there are problems reading fields from the underlying Lucene index. </exception> protected internal virtual List<LookupResult> CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string charSequence, bool doHighlight, IEnumerable<string> matchedTokens, string prefixToken) { BinaryDocValues textDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, TEXT_FIELD_NAME); // This will just be null if app didn't pass payloads to build(): // TODO: maybe just stored fields? they compress... BinaryDocValues payloadsDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, "payloads"); IList<AtomicReaderContext> leaves = searcher.IndexReader.Leaves; List<LookupResult> results = new List<LookupResult>(); BytesRef scratch = new BytesRef(); for (int i = 0; i < hits.ScoreDocs.Length; i++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[i]; textDV.Get(fd.Doc, scratch); string text = scratch.Utf8ToString(); long score = (long)fd.Fields[0]; BytesRef payload; if (payloadsDV != null) { payload = new BytesRef(); payloadsDV.Get(fd.Doc, payload); } else { payload = null; } // Must look up sorted-set by segment: int segment = ReaderUtil.SubIndex(fd.Doc, leaves); SortedSetDocValues contextsDV = leaves[segment].AtomicReader.GetSortedSetDocValues(CONTEXTS_FIELD_NAME); HashSet<BytesRef> contexts; if (contextsDV != null) { contexts = new HashSet<BytesRef>(); contextsDV.Document = fd.Doc - leaves[segment].DocBase; long ord; while ((ord = contextsDV.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { BytesRef context = new BytesRef(); contextsDV.LookupOrd(ord, context); contexts.Add(context); } } else { contexts = null; } LookupResult result; if (doHighlight) { object highlightKey = Highlight(text, matchedTokens, prefixToken); result = new LookupResult(highlightKey.ToString(), highlightKey, score, payload, contexts); } else { result = new LookupResult(text, score, payload, contexts); } results.Add(result); } return results; }
/// <summary> /// Create the results based on the search hits. /// Can be overridden by subclass to add particular behavior (e.g. weight transformation) </summary> /// <exception cref="System.IO.IOException"> If there are problems reading fields from the underlying Lucene index. </exception> protected internal virtual IList <LookupResult> CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string charSequence, bool doHighlight, IEnumerable <string> matchedTokens, string prefixToken) { BinaryDocValues textDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, TEXT_FIELD_NAME); // This will just be null if app didn't pass payloads to build(): // TODO: maybe just stored fields? they compress... BinaryDocValues payloadsDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, "payloads"); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; List <LookupResult> results = new List <LookupResult>(); BytesRef scratch = new BytesRef(); for (int i = 0; i < hits.ScoreDocs.Length; i++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[i]; textDV.Get(fd.Doc, scratch); string text = scratch.Utf8ToString(); long score = (long)fd.Fields[0]; BytesRef payload; if (payloadsDV != null) { payload = new BytesRef(); payloadsDV.Get(fd.Doc, payload); } else { payload = null; } // Must look up sorted-set by segment: int segment = ReaderUtil.SubIndex(fd.Doc, leaves); SortedSetDocValues contextsDV = leaves[segment].AtomicReader.GetSortedSetDocValues(CONTEXTS_FIELD_NAME); HashSet <BytesRef> contexts; if (contextsDV != null) { contexts = new HashSet <BytesRef>(); contextsDV.SetDocument(fd.Doc - leaves[segment].DocBase); long ord; while ((ord = contextsDV.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { BytesRef context = new BytesRef(); contextsDV.LookupOrd(ord, context); contexts.Add(context); } } else { contexts = null; } LookupResult result; if (doHighlight) { object highlightKey = Highlight(text, matchedTokens, prefixToken); result = new LookupResult(highlightKey.ToString(), highlightKey, score, payload, contexts); } else { result = new LookupResult(text, score, payload, contexts); } results.Add(result); } return(results); }
/// <summary> /// Retrieve suggestions, specifying whether all terms /// must match (<paramref name="allTermsRequired"/>) and whether the hits /// should be highlighted (<paramref name="doHighlight"/>). /// </summary> public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num, bool allTermsRequired, bool doHighlight) { if (m_searcherMgr == null) { throw new InvalidOperationException("suggester was not built"); } Occur occur; if (allTermsRequired) { occur = Occur.MUST; } else { occur = Occur.SHOULD; } TokenStream ts = null; BooleanQuery query; var matchedTokens = new HashSet <string>(); string prefixToken = null; try { ts = m_queryAnalyzer.GetTokenStream("", new StringReader(key)); //long t0 = System.currentTimeMillis(); ts.Reset(); var termAtt = ts.AddAttribute <ICharTermAttribute>(); var offsetAtt = ts.AddAttribute <IOffsetAttribute>(); string lastToken = null; query = new BooleanQuery(); int maxEndOffset = -1; matchedTokens = new HashSet <string>(); while (ts.IncrementToken()) { if (lastToken != null) { matchedTokens.Add(lastToken); query.Add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur); } lastToken = termAtt.ToString(); if (lastToken != null) { maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset); } } ts.End(); if (lastToken != null) { Query lastQuery; if (maxEndOffset == offsetAtt.EndOffset) { // Use PrefixQuery (or the ngram equivalent) when // there was no trailing discarded chars in the // string (e.g. whitespace), so that if query does // not end with a space we show prefix matches for // that token: lastQuery = GetLastTokenQuery(lastToken); prefixToken = lastToken; } else { // Use TermQuery for an exact match if there were // trailing discarded chars (e.g. whitespace), so // that if query ends with a space we only show // exact matches for that term: matchedTokens.Add(lastToken); lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)); } if (lastQuery != null) { query.Add(lastQuery, occur); } } if (contexts != null) { BooleanQuery sub = new BooleanQuery(); query.Add(sub, Occur.MUST); foreach (BytesRef context in contexts) { // NOTE: we "should" wrap this in // ConstantScoreQuery, or maybe send this as a // Filter instead to search, but since all of // these are MUST'd, the change to the score won't // affect the overall ranking. Since we indexed // as DOCS_ONLY, the perf should be the same // either way (no freq int[] blocks to decode): // TODO: if we had a BinaryTermField we could fix // this "must be valid ut8f" limitation: sub.Add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.Utf8ToString())), Occur.SHOULD); } } } finally { IOUtils.CloseWhileHandlingException(ts); } // TODO: we could allow blended sort here, combining // weight w/ score. Now we ignore score and sort only // by weight: Query finalQuery = FinishQuery(query, allTermsRequired); //System.out.println("finalQuery=" + query); // Sort by weight, descending: TopFieldCollector c = TopFieldCollector.Create(SORT, num, true, false, false, false); // We sorted postings by weight during indexing, so we // only retrieve the first num hits now: ICollector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); IndexSearcher searcher = m_searcherMgr.Acquire(); IList <LookupResult> results = null; try { //System.out.println("got searcher=" + searcher); searcher.Search(finalQuery, c2); TopFieldDocs hits = (TopFieldDocs)c.GetTopDocs(); // Slower way if postings are not pre-sorted by weight: // hits = searcher.search(query, null, num, SORT); results = CreateResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); } finally { m_searcherMgr.Release(searcher); } //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); //System.out.println(results); return(results); }
public void TestNestedSorting() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); IList <Document> docs = new List <Document>(); Document document = new Document(); document.Add(new StringField("field2", "a", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "b", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "c", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "a", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "c", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "d", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "e", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "b", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "e", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "f", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "g", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "c", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "g", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "h", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "i", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "d", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "i", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "j", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "k", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "f", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "k", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "l", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "m", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "g", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); // This doc will not be included, because it doesn't have nested docs document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "h", Field.Store.NO)); w.AddDocument(document); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "m", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "n", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "o", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "i", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); // Some garbage docs, just to check if the NestedFieldComparer can deal with this. document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.IndexWriter, false)); w.Dispose(); Filter parentFilter = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent"))); Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2"))); ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None); // Sort by field ascending, order first ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, Wrap(parentFilter), Wrap(childFilter)); Sort sort = new Sort(sortField); TopFieldDocs topDocs = searcher.Search(query, 5, sort); assertEquals(7, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(3, topDocs.ScoreDocs[0].Doc); assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[1].Doc); assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[4].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field ascending, order last sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(7, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(3, topDocs.ScoreDocs[0].Doc); assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[1].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[4].Doc); assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field descending, order last sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(topDocs.TotalHits, 7); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(28, topDocs.ScoreDocs[0].Doc); assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(23, topDocs.ScoreDocs[1].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[2].Doc); assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[4].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field descending, order last, sort filter (filter_1:T) childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T")))); query = new ToParentBlockJoinQuery( new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None); sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(6, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(23, topDocs.ScoreDocs[0].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(28, topDocs.ScoreDocs[1].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[4].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }
protected override object Execute(TopFieldDocs hits) { return(hits.ScoreDocs.Length > 0); }
protected abstract object Execute(TopFieldDocs hits);
public T Execute <T>(TopFieldDocs hits) { return((T)Convert.ChangeType(Execute(hits), typeof(T))); }
protected internal override IList <Lookup.LookupResult> CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string key, bool doHighlight, ICollection <string> matchedTokens, string prefixToken) { BinaryDocValues textDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, TEXT_FIELD_NAME); Debug.Assert(textDV != null); // This will just be null if app didn't pass payloads to build(): // TODO: maybe just stored fields? they compress... BinaryDocValues payloadsDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, "payloads"); JCG.SortedSet <Lookup.LookupResult> results = new JCG.SortedSet <Lookup.LookupResult>(LOOKUP_COMP); // we reduce the num to the one initially requested int actualNum = num / numFactor; BytesRef scratch = new BytesRef(); for (int i = 0; i < hits.ScoreDocs.Length; i++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[i]; textDV.Get(fd.Doc, scratch); string text = scratch.Utf8ToString(); long weight = (long)fd.Fields[0]; BytesRef payload; if (payloadsDV != null) { payload = new BytesRef(); payloadsDV.Get(fd.Doc, payload); } else { payload = null; } double coefficient; if (text.StartsWith(key.ToString(), StringComparison.Ordinal)) { // if hit starts with the key, we don't change the score coefficient = 1; } else { coefficient = CreateCoefficient(searcher, fd.Doc, matchedTokens, prefixToken); } long score = (long)(weight * coefficient); LookupResult result; if (doHighlight) { object highlightKey = Highlight(text, matchedTokens, prefixToken); result = new LookupResult(highlightKey.ToString(), highlightKey, score, payload); } else { result = new LookupResult(text, score, payload); } BoundedTreeAdd(results, result, actualNum); } return(new List <LookupResult>(results.Reverse())); }