public void TestEarlyTerminationDifferentSorter() { // test that the collector works correctly when the index was sorted by a // different sorter than the one specified in the ctor. CreateRandomIndexes(5); int numHits = TestUtil.NextInt32(Random, 1, numDocs / 10); Sort sort = new Sort(new SortField("ndv2", SortFieldType.INT64, false)); bool fillFields = Random.nextBoolean(); bool trackDocScores = Random.nextBoolean(); bool trackMaxScore = Random.nextBoolean(); bool inOrder = Random.nextBoolean(); // LUCENENET specific: // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to // fix a hard-to-find null reference exception problem. // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912 //TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); //TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); IndexSearcher searcher = NewSearcher(reader); int iters = AtLeast(5); for (int i = 0; i < iters; ++i) { // LUCENENET specific: // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to // fix a hard-to-find null reference exception problem. // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912 TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms))); searcher.Search(query, collector1); Sort different = new Sort(new SortField("ndv2", SortFieldType.INT64)); searcher.Search(query, new EarlyTerminatingSortingCollectorHelper(collector2, different, numHits)); assertTrue(collector1.TotalHits >= collector2.TotalHits); AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs); } }
public IEnumerable <ISearchHit> Search() { var query = CreateQuery(); IndexSearcher searcher; try { searcher = new IndexSearcher(_directory, true); } catch { // index might not exist if it has been rebuilt Logger.Information("Attempt to read a none existing index"); return(Enumerable.Empty <ISearchHit>()); } using (searcher) { var sort = String.IsNullOrEmpty(_sort) ? Sort.RELEVANCE : new Sort(new SortField(_sort, _comparer, _sortDescending)); var collector = TopFieldCollector.Create( sort, _count + _skip, false, true, false, true); Logger.Debug("Searching: {0}", query.ToString()); searcher.Search(query, collector); var results = collector.TopDocs().ScoreDocs .Skip(_skip) .Select(scoreDoc => new LuceneSearchHit(searcher.Doc(scoreDoc.Doc), scoreDoc.Score)) .ToList(); Logger.Debug("Search results: {0}", results.Count); return(results); } }
/// <summary> /// Realiza a pesquisa a recupera a quantidade de itens do resultado. /// </summary> /// <param name="query"></param> /// <param name="filter"></param> /// <returns></returns> private int SearchCount(Query query, Filter filter) { var reader = CreateReader(); var searcher = new IndexSearcher(reader); try { if (query != null) { var weight = query.Weight(searcher); var collector = TopFieldCollector.create(new global::Lucene.Net.Search.Sort(), 1, true, false, false, !weight.ScoresDocsOutOfOrder()); searcher.Search(weight, filter, collector); return(collector.GetTotalHits()); } } finally { searcher.Close(); reader.Close(); } return(0); }
/// <summary> /// Search, sorting by <see cref="Sort"/>, and computing /// drill down and sideways counts. /// </summary> public virtual DrillSidewaysResult Search(DrillDownQuery query, Filter filter, FieldDoc after, int topN, Sort sort, bool doDocScores, bool doMaxScore) { if (filter != null) { query = new DrillDownQuery(m_config, filter, query); } if (sort != null) { int limit = m_searcher.IndexReader.MaxDoc; if (limit == 0) { limit = 1; // the collector does not alow numHits = 0 } topN = Math.Min(topN, limit); TopFieldCollector hitCollector = TopFieldCollector.Create(sort, topN, after, true, doDocScores, doMaxScore, true); DrillSidewaysResult r = Search(query, hitCollector); return(new DrillSidewaysResult(r.Facets, hitCollector.GetTopDocs())); } else { return(Search(after, query, topN)); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#: //ORIGINAL LINE: private org.apache.lucene.search.TopDocs toTopDocs(org.apache.lucene.search.Query query, org.neo4j.index.lucene.QueryContext context, org.apache.lucene.search.IndexSearcher searcher) throws java.io.IOException private TopDocs ToTopDocs(Query query, QueryContext context, IndexSearcher searcher) { Sort sorting = context != null ? context.Sorting : null; TopDocs topDocs; if (sorting == null && context != null) { topDocs = searcher.search(query, context.Top); } else { if (context == null || !context.TradeCorrectnessForSpeed) { TopFieldCollector collector = LuceneDataSource.ScoringCollector(sorting, context.Top); searcher.search(query, collector); topDocs = collector.topDocs(); } else { topDocs = searcher.search(query, null, context.Top, sorting); } } return(topDocs); }
public void TestEarlyTermination_() { CreateRandomIndexes(5); int numHits = TestUtil.NextInt32(Random, 1, numDocs / 10); Sort sort = new Sort(new SortField("ndv1", SortFieldType.INT64, false)); bool fillFields = Random.nextBoolean(); bool trackDocScores = Random.nextBoolean(); bool trackMaxScore = Random.nextBoolean(); bool inOrder = Random.nextBoolean(); TopFieldCollector collector1 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); TopFieldCollector collector2 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); IndexSearcher searcher = NewSearcher(reader); int iters = AtLeast(5); for (int i = 0; i < iters; ++i) { TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms))); searcher.Search(query, collector1); searcher.Search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits)); } assertTrue(collector1.TotalHits >= collector2.TotalHits); AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs); }
/// <summary> /// Returns the grouped results. Returns null if the /// number of groups collected is <= groupOffset. /// /// <para> /// <b>NOTE</b>: This collector is unable to compute /// the groupValue per group so it will always be null. /// This is normally not a problem, as you can obtain the /// value just like you obtain other values for each /// matching document (eg, via stored fields, via /// FieldCache, etc.) /// </para> /// </summary> /// <typeparam name="TGroupValue">The expected return type for group value</typeparam> /// <param name="withinGroupSort"> /// The <see cref="Sort"/> used to sort /// documents within each group. Passing null is /// allowed, to sort by relevance. /// </param> /// <param name="groupOffset">Which group to start from</param> /// <param name="withinGroupOffset"> /// Which document to start from within each group /// </param> /// <param name="maxDocsPerGroup"> /// How many top documents to keep within each group. /// </param> /// <param name="fillSortFields"> /// If true then the Comparable values for the sort fields will be set /// </param> public virtual ITopGroups <TGroupValue> GetTopGroups <TGroupValue>(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup, bool fillSortFields) { //if (queueFull) { //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups); //} if (subDocUpto != 0) { ProcessGroup(); } if (groupOffset >= groupQueue.Count) { return(null); } int totalGroupedHitCount = 0; FakeScorer fakeScorer = new FakeScorer(); float maxScore = float.Epsilon; // LUCENENET: Epsilon in .NET is the same as MIN_VALUE in Java GroupDocs <TGroupValue>[] groups = new GroupDocs <TGroupValue> [groupQueue.Count - groupOffset]; for (int downTo = groupQueue.Count - groupOffset - 1; downTo >= 0; downTo--) { OneGroup og = groupQueue.Pop(); // At this point we hold all docs w/ in each group, // unsorted; we now sort them: ITopDocsCollector collector; if (withinGroupSort == null) { // Sort by score if (!needsScores) { throw new ArgumentException("cannot sort by relevance within group: needsScores=false"); } collector = TopScoreDocCollector.Create(maxDocsPerGroup, true); } else { // Sort by fields collector = TopFieldCollector.Create(withinGroupSort, maxDocsPerGroup, fillSortFields, needsScores, needsScores, true); } collector.SetScorer(fakeScorer); collector.SetNextReader(og.readerContext); for (int docIDX = 0; docIDX < og.count; docIDX++) { int doc = og.docs[docIDX]; fakeScorer.doc = doc; if (needsScores) { fakeScorer.score = og.scores[docIDX]; } collector.Collect(doc); } totalGroupedHitCount += og.count; object[] groupSortValues; if (fillSortFields) { groupSortValues = new IComparable[comparers.Length]; for (int sortFieldIDX = 0; sortFieldIDX < comparers.Length; sortFieldIDX++) { groupSortValues[sortFieldIDX] = comparers[sortFieldIDX][og.comparerSlot]; } } else { groupSortValues = null; } TopDocs topDocs = collector.GetTopDocs(withinGroupOffset, maxDocsPerGroup); // TODO: we could aggregate scores across children // by Sum/Avg instead of passing NaN: groups[downTo] = new GroupDocs <TGroupValue>(float.NaN, topDocs.MaxScore, og.count, topDocs.ScoreDocs,
public override int DoLogic() { int res = 0; // open reader or use existing one IndexSearcher searcher = RunData.GetIndexSearcher(); IndexReader reader; bool closeSearcher; if (searcher == null) { // open our own reader Directory dir = RunData.Directory; reader = DirectoryReader.Open(dir); searcher = new IndexSearcher(reader); closeSearcher = true; } else { // use existing one; this passes +1 ref to us reader = searcher.IndexReader; closeSearcher = false; } // optionally warm and add num docs traversed to count if (WithWarm) { Document doc = null; IBits liveDocs = MultiFields.GetLiveDocs(reader); for (int m = 0; m < reader.MaxDoc; m++) { if (null == liveDocs || liveDocs.Get(m)) { doc = reader.Document(m); res += (doc == null ? 0 : 1); } } } if (WithSearch) { res++; Query q = queryMaker.MakeQuery(); Sort sort = Sort; TopDocs hits = null; int numHits = NumHits; if (numHits > 0) { if (WithCollector == false) { if (sort != null) { // TODO: instead of always passing false we // should detect based on the query; if we make // the IndexSearcher search methods that take // Weight public again, we can go back to // pulling the Weight ourselves: TopFieldCollector collector = TopFieldCollector.Create(sort, numHits, true, WithScore, WithMaxScore, false); searcher.Search(q, null, collector); hits = collector.GetTopDocs(); } else { hits = searcher.Search(q, numHits); } } else { ICollector collector = CreateCollector(); searcher.Search(q, null, collector); //hits = collector.topDocs(); } string printHitsField = RunData.Config.Get("print.hits.field", null); if (hits != null && printHitsField != null && printHitsField.Length > 0) { Console.WriteLine("totalHits = " + hits.TotalHits); Console.WriteLine("maxDoc() = " + reader.MaxDoc); Console.WriteLine("numDocs() = " + reader.NumDocs); for (int i = 0; i < hits.ScoreDocs.Length; i++) { int docID = hits.ScoreDocs[i].Doc; Document doc = reader.Document(docID); Console.WriteLine(" " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField)); } } if (WithTraverse) { ScoreDoc[] scoreDocs = hits.ScoreDocs; int traversalSize = Math.Min(scoreDocs.Length, TraversalSize); if (traversalSize > 0) { bool retrieve = WithRetrieve; int numHighlight = Math.Min(NumToHighlight, scoreDocs.Length); Analyzer analyzer = RunData.Analyzer; BenchmarkHighlighter highlighter = null; if (numHighlight > 0) { highlighter = GetBenchmarkHighlighter(q); } for (int m = 0; m < traversalSize; m++) { int id = scoreDocs[m].Doc; res++; if (retrieve) { Document document = RetrieveDoc(reader, id); res += document != null ? 1 : 0; if (numHighlight > 0 && m < numHighlight) { ICollection <string> fieldsToHighlight = GetFieldsToHighlight(document); foreach (string field in fieldsToHighlight) { string text = document.Get(field); res += highlighter.DoHighlight(reader, id, field, document, analyzer, text); } } } } } } } } if (closeSearcher) { reader.Dispose(); } else { // Release our +1 ref from above reader.DecRef(); } return(res); }
public SearcherCallableWithSort(ReentrantLock @lock, IndexSearcher searcher, LeafSlice slice, Weight weight, FieldDoc after, int nDocs, TopFieldCollector hq, Sort sort, bool doDocScores, bool doMaxScore) { this.@lock = @lock; this.searcher = searcher; this.weight = weight; this.nDocs = nDocs; this.hq = hq; this.sort = sort; this.slice = slice; this.after = after; this.doDocScores = doDocScores; this.doMaxScore = doMaxScore; }
/// <summary> /// Retrieve suggestions, specifying whether all terms /// must match (<paramref name="allTermsRequired"/>) and whether the hits /// should be highlighted (<paramref name="doHighlight"/>). /// </summary> public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num, bool allTermsRequired, bool doHighlight) { if (m_searcherMgr == null) { throw new InvalidOperationException("suggester was not built"); } Occur occur; if (allTermsRequired) { occur = Occur.MUST; } else { occur = Occur.SHOULD; } TokenStream ts = null; BooleanQuery query; var matchedTokens = new HashSet <string>(); string prefixToken = null; try { ts = m_queryAnalyzer.GetTokenStream("", new StringReader(key)); //long t0 = System.currentTimeMillis(); ts.Reset(); var termAtt = ts.AddAttribute <ICharTermAttribute>(); var offsetAtt = ts.AddAttribute <IOffsetAttribute>(); string lastToken = null; query = new BooleanQuery(); int maxEndOffset = -1; matchedTokens = new HashSet <string>(); while (ts.IncrementToken()) { if (lastToken != null) { matchedTokens.Add(lastToken); query.Add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur); } lastToken = termAtt.ToString(); if (lastToken != null) { maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset); } } ts.End(); if (lastToken != null) { Query lastQuery; if (maxEndOffset == offsetAtt.EndOffset) { // Use PrefixQuery (or the ngram equivalent) when // there was no trailing discarded chars in the // string (e.g. whitespace), so that if query does // not end with a space we show prefix matches for // that token: lastQuery = GetLastTokenQuery(lastToken); prefixToken = lastToken; } else { // Use TermQuery for an exact match if there were // trailing discarded chars (e.g. whitespace), so // that if query ends with a space we only show // exact matches for that term: matchedTokens.Add(lastToken); lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)); } if (lastQuery != null) { query.Add(lastQuery, occur); } } if (contexts != null) { BooleanQuery sub = new BooleanQuery(); query.Add(sub, Occur.MUST); foreach (BytesRef context in contexts) { // NOTE: we "should" wrap this in // ConstantScoreQuery, or maybe send this as a // Filter instead to search, but since all of // these are MUST'd, the change to the score won't // affect the overall ranking. Since we indexed // as DOCS_ONLY, the perf should be the same // either way (no freq int[] blocks to decode): // TODO: if we had a BinaryTermField we could fix // this "must be valid ut8f" limitation: sub.Add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.Utf8ToString())), Occur.SHOULD); } } } finally { IOUtils.CloseWhileHandlingException(ts); } // TODO: we could allow blended sort here, combining // weight w/ score. Now we ignore score and sort only // by weight: Query finalQuery = FinishQuery(query, allTermsRequired); //System.out.println("finalQuery=" + query); // Sort by weight, descending: TopFieldCollector c = TopFieldCollector.Create(SORT, num, true, false, false, false); // We sorted postings by weight during indexing, so we // only retrieve the first num hits now: ICollector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); IndexSearcher searcher = m_searcherMgr.Acquire(); IList <LookupResult> results = null; try { //System.out.println("got searcher=" + searcher); searcher.Search(finalQuery, c2); TopFieldDocs hits = (TopFieldDocs)c.GetTopDocs(); // Slower way if postings are not pre-sorted by weight: // hits = searcher.search(query, null, num, SORT); results = CreateResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); } finally { m_searcherMgr.Release(searcher); } //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); //System.out.println(results); return(results); }
/// <summary> /// Returns the grouped results. Returns null if the /// number of groups collected is <= groupOffset. /// /// <para> /// <b>NOTE</b>: This collector is unable to compute /// the groupValue per group so it will always be null. /// This is normally not a problem, as you can obtain the /// value just like you obtain other values for each /// matching document (eg, via stored fields, via /// FieldCache, etc.) /// </para> /// </summary> /// <typeparam name="TGroupValue">The expected return type for group value</typeparam> /// <<param name="withinGroupSort"> /// The <see cref="Sort"/> used to sort /// documents within each group. Passing null is /// allowed, to sort by relevance. /// </param> /// <param name="groupOffset">Which group to start from</param> /// <param name="withinGroupOffset"> /// Which document to start from within each group /// </param> /// <param name="maxDocsPerGroup"> /// How many top documents to keep within each group. /// </param> /// <param name="fillSortFields"> /// If true then the Comparable values for the sort fields will be set /// </param> public virtual ITopGroups <TGroupValue> GetTopGroups <TGroupValue>(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup, bool fillSortFields) { //if (queueFull) { //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups); //} if (subDocUpto != 0) { ProcessGroup(); } if (groupOffset >= groupQueue.Size()) { return(null); } int totalGroupedHitCount = 0; FakeScorer fakeScorer = new FakeScorer(); float maxScore = float.MinValue; GroupDocs <TGroupValue>[] groups = new GroupDocs <TGroupValue> [groupQueue.Size() - groupOffset]; for (int downTo = groupQueue.Size() - groupOffset - 1; downTo >= 0; downTo--) { OneGroup og = groupQueue.Pop(); // At this point we hold all docs w/ in each group, // unsorted; we now sort them: ITopDocsCollector collector; if (withinGroupSort == null) { // Sort by score if (!needsScores) { throw new ArgumentException("cannot sort by relevance within group: needsScores=false"); } collector = TopScoreDocCollector.Create(maxDocsPerGroup, true); } else { // Sort by fields collector = TopFieldCollector.Create(withinGroupSort, maxDocsPerGroup, fillSortFields, needsScores, needsScores, true); } collector.Scorer = fakeScorer; collector.NextReader = og.readerContext; for (int docIDX = 0; docIDX < og.count; docIDX++) { int doc = og.docs[docIDX]; fakeScorer.doc = doc; if (needsScores) { fakeScorer.score = og.scores[docIDX]; } collector.Collect(doc); } totalGroupedHitCount += og.count; object[] groupSortValues; if (fillSortFields) { groupSortValues = new IComparable[comparators.Length]; for (int sortFieldIDX = 0; sortFieldIDX < comparators.Length; sortFieldIDX++) { groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].Value(og.comparatorSlot); } } else { groupSortValues = null; } TopDocs topDocs = collector.TopDocs(withinGroupOffset, maxDocsPerGroup); // TODO: we could aggregate scores across children // by Sum/Avg instead of passing NaN: groups[downTo] = new GroupDocs <TGroupValue>(float.NaN, topDocs.MaxScore, og.count, topDocs.ScoreDocs, default(TGroupValue), groupSortValues); maxScore = Math.Max(maxScore, topDocs.MaxScore); } /* * while (groupQueue.size() != 0) { * final OneGroup og = groupQueue.pop(); * //System.out.println(" leftover: og ord=" + og.groupOrd + " count=" + og.count); * totalGroupedHitCount += og.count; * } */ return(new TopGroups <TGroupValue>(new TopGroups <TGroupValue>(groupSort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), totalHitCount, totalGroupedHitCount, groups, maxScore), totalGroupCount)); }
/// <summary> /// 字段分组统计(支持分页) /// </summary> /// <param name="indexSearcher"></param> /// <param name="pageSize"></param> /// <param name="pageIndex"></param> /// <param name="query"></param> /// <param name="recordCount"></param> /// <param name="groupKeyValueList">分组结果</param> /// <param name="filter"></param> /// <param name="sortFields"></param> /// <returns></returns> public static Dictionary <Document, ScoreDoc> SelectGroup(IndexSearcher indexSearcher, int pageSize, int pageIndex, Query query, out int recordCount, out GroupKeyValueList groupKeyValueList, Filter filter = null, params SortField[] sortFields) { recordCount = 0; groupKeyValueList = null; Dictionary <Document, ScoreDoc> dictPager = new Dictionary <Document, ScoreDoc>(); int maxDoc = indexSearcher.IndexReader.MaxDoc; if (maxDoc == 0) {//返回索引可用的最大的索引ID return(dictPager); } TopDocs docs = null; string key = string.Format(CACHE_KEY, query.ToString(), string.Join("_", sortFields.Select(item => item.ToString())), filter == null ? string.Empty : filter.ToString()); string listKey = key + string.Format(",PAGE_INDEX:{0},PAGE_SIZE:{1}", pageIndex, pageSize); string groupKey = "GROUP:::" + key; docs = MemCache.MemoryCacheBus.Get(listKey) as TopDocs; groupKeyValueList = MemCache.MemoryCacheBus.Get(groupKey) as GroupKeyValueList; if (docs == null || groupKeyValueList == null) { //https://searchcode.com/codesearch/view/7233825/ int start = pageIndex * pageSize; start = Math.Min(start, maxDoc); using (GroupCollectorField groupCollectorField = new GroupCollectorField("NameValue")) { if (sortFields.Length > 0) {//先排序,后分页 Sort sort = new Sort(); sort.SetSort(sortFields); TopFieldCollector topFieldCollector = TopFieldCollector.Create(sort, start, true, false, false, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder()); GroupCollectorWrapper groupCollectorWrapper = new GroupCollectorWrapper(start, topFieldCollector, groupCollectorField); indexSearcher.Search(query, filter, groupCollectorWrapper); start = start - pageSize; if (start < 0) { start = 0; } docs = topFieldCollector.TopDocs(start, pageSize);//只返回前start条记录 } else {//不支持排序,只有分页 TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(start + 1, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder()); GroupCollectorWrapper groupCollectorWrapper = new GroupCollectorWrapper(start, topScoreDocCollector, groupCollectorField); indexSearcher.Search(query, filter, groupCollectorWrapper); start = start - pageSize; if (start < 0) { start = 0; } docs = topScoreDocCollector.TopDocs(start, pageSize); //只返回前start条记录 } groupCollectorField.GroupKeyValueDocCountList.Sort(); //排序 groupKeyValueList = ObjectExtensions.Clone(groupCollectorField.GroupKeyValueDocCountList); if (docs != null && groupKeyValueList != null) { TimeSpan timeSpan = TimeSpan.FromMinutes(CACHE_TIME); MemCache.MemoryCacheBus.Insert(groupKey, groupKeyValueList, timeSpan); MemCache.MemoryCacheBus.Insert(listKey, docs, timeSpan); } } } #region 返回搜索的结果集合 if (docs != null) { recordCount = docs.TotalHits; //搜索结果总数量 ScoreDoc[] scoreDocs = docs.ScoreDocs; //搜索的结果集合 if (scoreDocs != null) { foreach (ScoreDoc scoreDoc in scoreDocs) { if (scoreDoc.Doc != int.MaxValue && scoreDoc.Score != System.Single.NegativeInfinity) { dictPager.Add(indexSearcher.Doc(scoreDoc.Doc), scoreDoc); } } } } if (dictPager.Count == 0) {//如果没有取出符合条件的结果删除缓存。wyp MemCache.MemoryCacheBus.Delete(listKey); } if (groupKeyValueList.Count == 0) {//如果没有取出符合条件的结果删除缓存。wyp MemCache.MemoryCacheBus.Delete(groupKey); } #endregion groupKeyValueList = groupKeyValueList ?? new GroupKeyValueList(0); return(dictPager); }
public IEnumerable <IRow> Read() { var reader = _readerFactory.Create(); var numDocs = reader.NumDocs(); var selector = new MapFieldSelector(_fields.Select(f => f.Name).ToArray()); using (var searcher = _searcherFactory.Create()) { // read from input? consider filters, and field names if (_readFrom == ReadFrom.Input) { if (_context.Entity.Filter.Any()) { var queryFields = _context.Entity.Filter.Select(f => f.Field).ToArray(); var query = string.Join(" ", _context.Entity.Filter.Select(f => "(" + (string.IsNullOrEmpty(f.Expression) ? f.Field + ":" + f.Value : f.Expression) + ") " + f.Continuation.ToUpper())); query = query.Remove(query.Length - 3); var topFieldCollector = TopFieldCollector.Create(Sort.INDEXORDER, numDocs, false, false, false, false); searcher.Search(new MultiFieldQueryParser(V, queryFields, _analyzer).Parse(query), topFieldCollector); var topDocs = topFieldCollector.TopDocs(); if (topDocs == null) { yield break; } for (var i = 0; i < topDocs.TotalHits; i++) { var row = _rowFactory.Create(); var doc = searcher.Doc(i, selector); foreach (var field in _fields) { row[field] = field.Convert(doc.Get(field.Name)); } yield return(row); } } else { for (var i = 0; i < numDocs; i++) { if (reader.IsDeleted(i)) { continue; } var doc = reader.Document(i, selector); var row = _rowFactory.Create(); foreach (var field in _fields) { row[field] = field.Convert(doc.Get(field.Name)); } yield return(row); } } } else // read from output? consider tfldeleted and field aliases { var tflDeleted = _context.Entity.TflDeleted(); var collector = TopFieldCollector.Create(Sort.INDEXORDER, numDocs, false, false, false, false); searcher.Search(LuceneConversion.TypeSearch(tflDeleted, tflDeleted.Alias, false), collector); var topDocs = collector.TopDocs(); if (topDocs == null) { yield break; } for (var i = 0; i < topDocs.TotalHits; i++) { var row = _rowFactory.Create(); var doc = searcher.Doc(i, selector); foreach (var field in _fields) { row[field] = field.Convert(doc.Get(field.Alias)); } yield return(row); } } } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#: //ORIGINAL LINE: static org.apache.lucene.search.TopFieldCollector scoringCollector(org.apache.lucene.search.Sort sorting, int n) throws java.io.IOException internal static TopFieldCollector ScoringCollector(Sort sorting, int n) { return(TopFieldCollector.create(sorting, n, false, true, false)); }
private void DoSearch(Query query, IEnumerable <SortField> sortField, int maxResults, int?skip = null, int?take = null) { var extractTermsSupported = CheckQueryForExtractTerms(query); if (extractTermsSupported) { //This try catch is because analyzers strip out stop words and sometimes leave the query //with null values. This simply tries to extract terms, if it fails with a null //reference then its an invalid null query, NotSupporteException occurs when the query is //valid but the type of query can't extract terms. //This IS a work-around, theoretically Lucene itself should check for null query parameters //before throwing exceptions. try { var set = new HashSet <Term>(); query.ExtractTerms(set); } catch (NullReferenceException) { //this means that an analyzer has stipped out stop words and now there are //no words left to search on //it could also mean that potentially a IIndexFieldValueType is throwing a null ref TotalItemCount = 0; return; } catch (NotSupportedException) { //swallow this exception, we should continue if this occurs. } } maxResults = maxResults >= 1 ? Math.Min(maxResults, LuceneSearcher.MaxDoc) : LuceneSearcher.MaxDoc; Collector topDocsCollector; var sortFields = sortField as SortField[] ?? sortField.ToArray(); if (sortFields.Length > 0) { topDocsCollector = TopFieldCollector.Create( new Sort(sortFields), maxResults, false, false, false, false); } else { topDocsCollector = TopScoreDocCollector.Create(maxResults, true); } LuceneSearcher.Search(query, topDocsCollector); if (!skip.HasValue) { TopDocs = sortFields.Length > 0 ? ((TopFieldCollector)topDocsCollector).TopDocs() : ((TopScoreDocCollector)topDocsCollector).TopDocs(); } else { if (sortFields.Length > 0 && take != null && take.Value >= 0) { TopDocs = ((TopFieldCollector)topDocsCollector).TopDocs(skip.Value, take.Value); } else if (sortFields.Length > 0 && (take == null || take.Value < 0)) { TopDocs = ((TopFieldCollector)topDocsCollector).TopDocs(skip.Value); } else if (take != null && take.Value >= 0) { TopDocs = ((TopScoreDocCollector)topDocsCollector).TopDocs(skip.Value, take.Value); } else { TopDocs = ((TopScoreDocCollector)topDocsCollector).TopDocs(skip.Value); } } TotalItemCount = TopDocs.TotalHits; ExecutionCount++; }
public SnTopFieldCollector(int size, SearchParams searchParams, Sort sort) : base() { _searchParams = searchParams; _wrapped = TopFieldCollector.Create(sort, size, false, true, false, false); }
/// <summary> /// 多索引文件联合查询搜索(带分页) /// </summary> /// <typeparam name="T">返回结果数据类型</typeparam> /// <param name="indexPaths">索引文件路径集合</param> /// <param name="query">Query</param> /// <param name="sort">排序</param> /// <param name="pageIndex">当前搜索页</param> /// <param name="pageSize">每页显示数</param> /// <param name="count">总搜索结果数</param> /// <returns></returns> public static List <T> Search <T>(string[] indexPaths, Query query, Sort sort, int pageIndex, int pageSize, out int count) where T : BaseIndexModel { count = 0; if (null == query) { return(null); } if (pageIndex < 1) { pageIndex = 1; } if (pageSize < 1) { pageSize = 1; } //起始搜索位置 int start = (pageIndex - 1) * pageSize; if (null == indexPaths || indexPaths.Length < 1) { return(null); } List <IndexSearcher> searchers = new List <IndexSearcher>(); foreach (var indexPath in indexPaths) { if (string.IsNullOrWhiteSpace(indexPath)) { continue; } //打开索引文件 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); if (null == directory) { continue; } //检测索引文件是否存在 bool isExist = IndexReader.IndexExists(directory); if (!isExist) { continue; } //创建一个只读的索引文件读取实例 IndexReader reader = IndexReader.Open(directory, true); //实例化IndexSearcher搜索器 IndexSearcher searcher = new IndexSearcher(reader); searchers.Add(searcher); } if (searchers.Count < 1) { return(null); } MultiSearcher multiSearcher = new MultiSearcher(searchers.ToArray()); //Collector TopFieldCollector results = TopFieldCollector.Create(sort, start + pageSize, false, false, false, false); //搜索 multiSearcher.Search(query, results); //总命中率(精算) count = results.TotalHits; //获取当前页文档 var docs = results.TopDocs(start, pageSize).ScoreDocs; //定义一个结果返回变量 List <T> list = new List <T>(); //遍历当前页文档,并转换为需要的结果类型 foreach (var scoreDoc in docs) { Document doc = multiSearcher.Doc(scoreDoc.Doc); var data = new IndexFactory(doc).Result as T; if (null != data) { list.Add(data); } } return(list); }
public IList <T> Retrieve <T>(string keyword, out int totalCount, int pageIndex = 1, int pageSize = 10) where T : class, new() { using (IndexSearcher searcher = new IndexSearcher(directory, true)) { List <string> queries = new List <string>(); List <string> fields = new List <string>(); List <Occur> flags = new List <Occur>(); List <SortField> sortFields = new List <SortField>(); PropertyInfo[] properties = typeof(T).GetProperties(); foreach (var property in properties) { if (property.IsDefined(typeof(OccurAttribute), false)) { OccurAttribute attribute = property.GetCustomAttribute(typeof(OccurAttribute)) as OccurAttribute; Occur occur = attribute.Occur; if (!occur.Equals(Occur.MUST_NOT)) { ///这里queriesfields,flags一一对应,见MultiFieldQueryParser.Parse方法说明 queries.Add(keyword); fields.Add(property.Name); flags.Add(occur); } } if (property.IsDefined(typeof(SortAttribute), false)) { SortAttribute attribute = property.GetCustomAttribute(typeof(SortAttribute)) as SortAttribute; int sortField = attribute.Type; bool reverse = attribute.Reverse; sortFields.Add(new SortField(property.Name, sortField, reverse)); } } Query query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_30, queries?.ToArray(), fields?.ToArray(), flags?.ToArray(), analyzer); //Query queryR= new TermRangeQuery() TopDocs tds; int startRowIndex = (pageIndex - 1) * pageSize; //分页 if (sortFields.Count > 0) { Sort sort = new Sort(sortFields?.ToArray()); TopFieldCollector collector = TopFieldCollector.Create(sort, pageIndex * pageSize, false, false, false, false); searcher.Search(query, collector); //返回结果 tds = collector.TopDocs(startRowIndex, pageSize); } else { TopScoreDocCollector collector = TopScoreDocCollector.Create(pageIndex * pageSize, false); searcher.Search(query, collector); tds = collector.TopDocs(startRowIndex, pageSize); } totalCount = tds.TotalHits; IList <T> list = new List <T>(); foreach (ScoreDoc sd in tds.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); T searchResult = new T(); foreach (var property in properties) { string value = doc.Get(property.Name); if (!string.IsNullOrEmpty(value)) { Action <object, object> setValue = ReappearMember.CreatePropertySetter(property); if (property.IsDefined(typeof(OccurAttribute), false)) { setValue(searchResult, Preview(value, keyword)); } else { setValue(searchResult, value); } } } list.Add(searchResult); } return(list); } }
/// <summary> /// Executa a pesquisa /// </summary> /// <returns></returns> private Collector ExecuteSearch(int pageIndex) { Collector collector = null; TopDocsCollector result = null; var totalHits = _options.RandomResult ? 1 : (pageIndex + 1) * (_options.HitsPerPage > 0 ? _options.HitsPerPage : 50); if (totalHits == 0) { totalHits = 1; } global::Lucene.Net.Search.Query query = _query; if (_sort != null && query == null) { var parser = new global::Lucene.Net.QueryParsers.QueryParser(global::Lucene.Net.Util.Version.LUCENE_29, "test", new global::Lucene.Net.Analysis.Standard.StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_29, Stopwords.PORTUGUESE_SET)); query = parser.Parse("test:1"); result = TopFieldCollector.create(_sort, totalHits, true, false, false, true); } if (query != null) { var weight = query.Weight(Searcher); collector = result = TopFieldCollector.create(_sort, totalHits, true, false, false, !weight.ScoresDocsOutOfOrder()); if (_options.RandomResult) { collector = new RandomCollectorWrapper(this, this.Searcher, result); } else if (_channelFields == null || _channelFields.Length == 0) { collector = new SummaryCollectorWrapper(this, this.Searcher, result); } this.Searcher.Search(weight, _filter, collector); if (collector is SummaryCollectorWrapper) { var sCollector = ((SummaryCollectorWrapper)collector); _summaries = sCollector.GetSummaries().ToList(); _channelFields = sCollector.GetChannelsFields().ToArray(); } if (collector is RandomCollectorWrapper) { var wrapper = (RandomCollectorWrapper)collector; var wrapperResult = wrapper.GetResult(); _result = wrapperResult is Element[] ? (Element[])wrapperResult : wrapperResult.ToArray(); _count = _result.Length; OnLoad(); return(null); } _count = result.GetTotalHits(); OnLoad(); return(result); } else { SummaryCollectorWrapper wrapper = null; if (_options.RandomResult) { wrapper = new RandomCollectorWrapper(this); _result = ((RandomCollectorWrapper)wrapper).GetResult().ToArray(); } else { collector = wrapper = new SummaryCollectorWrapper(this); _channelFields = wrapper.GetChannelsFields().ToArray(); } _count = wrapper.GetTotalHits(); _summaries = wrapper.GetSummaries().ToList(); OnLoad(); return(wrapper); } }
/// <summary> /// Accumulates groups for the BlockJoinQuery specified by its slot. /// </summary> /// <param name="slot"> Search query's slot </param> /// <param name="offset"> Parent docs offset </param> /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param> /// <param name="withinGroupOffset"> Offset within each group of child docs </param> /// <param name="withinGroupSort"> Sort criteria within groups </param> /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param> /// <returns> <see cref="ITopGroups{T}"/> for the query specified by slot </returns> /// <exception cref="IOException"> if there is a low-level I/O error </exception> private ITopGroups <int> AccumulateGroups(int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, bool fillSortFields) { var groups = new GroupDocs <int> [sortedGroups.Length - offset]; var fakeScorer = new FakeScorer(); int totalGroupedHitCount = 0; //System.out.println("slot=" + slot); for (int groupIdx = offset; groupIdx < sortedGroups.Length; groupIdx++) { OneGroup og = sortedGroups[groupIdx]; int numChildDocs; if (slot == -1 || slot >= og.counts.Length) { numChildDocs = 0; } else { numChildDocs = og.counts[slot]; } // Number of documents in group should be bounded to prevent redundant memory allocation int numDocsInGroup = Math.Max(1, Math.Min(numChildDocs, maxDocsPerGroup)); //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup); // At this point we hold all docs w/ in each group, unsorted; we now sort them: ICollector collector; if (withinGroupSort == null) { //System.out.println("sort by score"); // Sort by score if (!trackScores) { throw new ArgumentException("cannot sort by relevance within group: trackScores=false"); } collector = TopScoreDocCollector.Create(numDocsInGroup, true); } else { // Sort by fields collector = TopFieldCollector.Create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true); } collector.SetScorer(fakeScorer); collector.SetNextReader(og.readerContext); for (int docIdx = 0; docIdx < numChildDocs; docIdx++) { //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length); int doc = og.docs[slot][docIdx]; fakeScorer.doc = doc; if (trackScores) { fakeScorer._score = og.scores[slot][docIdx]; } collector.Collect(doc); } totalGroupedHitCount += numChildDocs; object[] groupSortValues; if (fillSortFields) { groupSortValues = new object[comparers.Length]; for (int sortFieldIdx = 0; sortFieldIdx < comparers.Length; sortFieldIdx++) { groupSortValues[sortFieldIdx] = comparers[sortFieldIdx][og.Slot]; } } else { groupSortValues = null; } TopDocs topDocs; if (withinGroupSort == null) { var tempCollector = (TopScoreDocCollector)collector; topDocs = tempCollector.GetTopDocs(withinGroupOffset, numDocsInGroup); } else { var tempCollector = (TopFieldCollector)collector; topDocs = tempCollector.GetTopDocs(withinGroupOffset, numDocsInGroup); } groups[groupIdx - offset] = new GroupDocs <int>(og.Score, topDocs.MaxScore, numChildDocs, topDocs.ScoreDocs, og.Doc, groupSortValues); } return(new TopGroups <int>(new TopGroups <int>(sort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount)); }
/// <summary> /// 分页查询(高效率)。wyp /// http://blog.csdn.net/smallearth/article/details/7980226 /// </summary> /// <param name="indexSearcher"></param> /// <param name="pageSize"></param> /// <param name="pageIndex"></param> /// <param name="query"></param> /// <param name="recordCount"></param> /// <param name="filter"></param> /// <param name="sortFields"></param> /// <returns></returns> public static Dictionary <Document, ScoreDoc> SelectAfter(IndexSearcher indexSearcher, int pageSize, int pageIndex, Query query, out int recordCount, string culture = "", Filter filter = null, params SortField[] sortFields) { recordCount = 0; Dictionary <Document, ScoreDoc> dictPager = new Dictionary <Document, ScoreDoc>(pageSize); int maxDoc = indexSearcher.IndexReader.MaxDoc; if (maxDoc == 0) {//返回索引可用的最大的索引ID return(dictPager); } TopDocs docs = null; string key = string.Format(CACHE_KEY, query.ToString(), string.Join("_", sortFields.Select(item => item.ToString())), filter == null ? string.Empty : filter.ToString(), culture.ToUpper()); #if LAST_SCORE_DOC #region 先取出某(PageIndex)页文档结果中的最后一个文档,然后在从这个文档开始继续往下取出PageSize大小的文档记录 //http://blog.csdn.net/smallearth/article/details/7980226 ScoreDoc lastScoreDoc = GetLastScoreDoc(indexSearcher, pageSize, pageIndex, query, filter, culture, sortFields); if (lastScoreDoc == null) {//lastScoreDoc等于null,相当于需要取第一页的数据。wyp key += string.Format(",PAGE_SIZE:{0}", pageSize); docs = MemCache.MemoryCacheBus.Get(key) as TopDocs; if (docs == null) { if (sortFields.Length > 0) {//支持排序 Sort sort = new Sort(); sort.SetSort(sortFields); docs = indexSearcher.Search(query, filter, pageSize, sort);//只返回前pageSize条记录 } else { //不支持排序 docs = indexSearcher.Search(query, filter, pageSize); //只返回前pageSize条记录 } if (docs != null) { MemCache.MemoryCacheBus.Insert(key, docs, TimeSpan.FromMinutes(CACHE_TIME)); } } } else { if (lastScoreDoc.Doc < maxDoc) { key += string.Format(",DOC:{0},PAGE_INDEX:{1},PAGE_SIZE:{2}", lastScoreDoc.Doc, pageIndex, pageSize); docs = MemCache.MemoryCacheBus.Get(key) as TopDocs; if (docs == null) { if (sortFields.Length > 0) {//先排序,后分页 int start = pageIndex * pageSize; start = Math.Min(start, maxDoc); Sort sort = new Sort(); sort.SetSort(sortFields); TopFieldCollector topFieldCollector = TopFieldCollector.Create(sort, start, true, false, false, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder()); indexSearcher.Search(query, filter, topFieldCollector); start = start - pageSize; if (start < 0) { start = 0; } docs = topFieldCollector.TopDocs(start, pageSize);//只返回前start条记录 } else {//不支持排序,只有分页 //http://search-lucene.com/c/Lucene:core/src/java/org/apache/lucene/search/IndexSearcher.java||IndexSearcher 482行 TopScoreDocCollectorEx topScoreDocCollectorEx = TopScoreDocCollectorEx.Create(pageSize, lastScoreDoc, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder()); indexSearcher.Search(query, filter, topScoreDocCollectorEx); docs = topScoreDocCollectorEx.TopDocs(); } if (docs != null) { MemCache.MemoryCacheBus.Insert(key, docs, TimeSpan.FromMinutes(CACHE_TIME)); } } } } #endregion #else #region 先取出前(PageIndex+1)页的文档数,然后在从文档结果中取出最后一页的文档记录。 key += string.Format(",PAGE_INDEX:{0},PAGE_SIZE:{1}", pageIndex, pageSize); docs = WebCache.DataCacheBus.Get(key) as TopDocs; if (docs == null) { //https://searchcode.com/codesearch/view/7233825/ int start = pageIndex * pageSize; start = Math.Min(start, maxDoc); if (sortFields.Length > 0) {//先排序,后分页 Sort sort = new Sort(); sort.SetSort(sortFields); TopFieldCollector topFieldCollector = TopFieldCollector.Create(sort, start, true, false, false, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder()); indexSearcher.Search(query, filter, topFieldCollector); start = start - pageSize; if (start < 0) { start = 0; } docs = topFieldCollector.TopDocs(start, pageSize);//只返回前start条记录 } else {//不支持排序,只有分页 TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(start + 1, !query.CreateWeight(indexSearcher).GetScoresDocsOutOfOrder()); indexSearcher.Search(query, filter, topScoreDocCollector); start = start - pageSize; if (start < 0) { start = 0; } docs = topScoreDocCollector.TopDocs(start, pageSize);//只返回前start条记录 } if (docs != null) { WebCache.DataCacheBus.Insert(key, docs, TimeSpan.FromMinutes(CACHE_TIME)); } } #endregion #endif #region 返回搜索的结果集合 if (docs != null) { recordCount = docs.TotalHits; //搜索结果总数量 ScoreDoc[] scoreDocs = docs.ScoreDocs; //搜索的结果集合 if (scoreDocs != null) { foreach (ScoreDoc scoreDoc in scoreDocs) { if (scoreDoc.Doc != int.MaxValue && scoreDoc.Score != System.Single.NegativeInfinity) { #if LAST_SCORE_DOC lastScoreDoc = scoreDoc;//获取搜索结果中当前页中最后一个ScoreDoc对象 #endif dictPager.Add(indexSearcher.Doc(scoreDoc.Doc), scoreDoc); } } } } if (dictPager.Count == 0) {//如果没有取出符合条件的结果删除缓存。wyp MemCache.MemoryCacheBus.Delete(key); } #if LAST_SCORE_DOC else if (lastScoreDoc != null) { //提前设置好当用户搜索下一页时,需要用到当前这一页的最后一个ScoreDoc对象用于下次搜索使用。wyp key = string.Format(CACHE_KEY, query.ToString(), string.Join("_", sortFields.Select(item => item.ToString())), filter == null ? string.Empty : filter.ToString(), culture.ToUpper()) + string.Format(",PAGE_INDEX:{0},PAGE_SIZE:{1}", pageIndex + 1, pageSize); //提前把下一页用到的LastScoreDoc放入缓存中。wyp MemCache.MemoryCacheBus.Insert(key, lastScoreDoc, TimeSpan.FromMinutes(CACHE_TIME)); } #endif #endregion return(dictPager); }