public override Explanation Explain(IndexSearcher searcher, Explanation firstPassExplanation, int docID) { Explanation result = base.Explain(searcher, firstPassExplanation, docID); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; int subReader = ReaderUtil.SubIndex(docID, leaves); AtomicReaderContext readerContext = leaves[subReader]; int docIDInSegment = docID - readerContext.DocBase; var context = new Dictionary <string, object>(); var fakeScorer = new FakeScorer { score = firstPassExplanation.Value, doc = docIDInSegment }; context["scorer"] = fakeScorer; foreach (string variable in expression.variables) { result.AddDetail(new Explanation((float)bindings.GetValueSource(variable).GetValues (context, readerContext).DoubleVal(docIDInSegment), "variable \"" + variable + "\"" )); } return(result); }
private void SumValues(IList <MatchingDocs> matchingDocs, bool keepScores, ValueSource valueSource) { FakeScorer scorer = new FakeScorer(); IDictionary context = new Dictionary <string, Scorer>(); if (keepScores) { context["scorer"] = scorer; } Int32sRef scratch = new Int32sRef(); foreach (MatchingDocs hits in matchingDocs) { OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.GetReader(hits.Context); int scoresIdx = 0; float[] scores = hits.Scores; FunctionValues functionValues = valueSource.GetValues(context, hits.Context); DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { ords.Get(doc, scratch); if (keepScores) { scorer.docID = doc; scorer.score = scores[scoresIdx++]; } float value = (float)functionValues.DoubleVal(doc); for (int i = 0; i < scratch.Length; i++) { m_values[scratch.Int32s[i]] += value; } } } Rollup(); }
/// <summary> /// Returns the grouped results. Returns null if the /// number of groups collected is <= groupOffset. /// /// <para> /// <b>NOTE</b>: This collector is unable to compute /// the groupValue per group so it will always be null. /// This is normally not a problem, as you can obtain the /// value just like you obtain other values for each /// matching document (eg, via stored fields, via /// FieldCache, etc.) /// </para> /// </summary> /// <typeparam name="TGroupValue">The expected return type for group value</typeparam> /// <param name="withinGroupSort"> /// The <see cref="Sort"/> used to sort /// documents within each group. Passing null is /// allowed, to sort by relevance. /// </param> /// <param name="groupOffset">Which group to start from</param> /// <param name="withinGroupOffset"> /// Which document to start from within each group /// </param> /// <param name="maxDocsPerGroup"> /// How many top documents to keep within each group. /// </param> /// <param name="fillSortFields"> /// If true then the Comparable values for the sort fields will be set /// </param> public virtual ITopGroups <TGroupValue> GetTopGroups <TGroupValue>(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup, bool fillSortFields) { //if (queueFull) { //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups); //} if (subDocUpto != 0) { ProcessGroup(); } if (groupOffset >= groupQueue.Count) { return(null); } int totalGroupedHitCount = 0; FakeScorer fakeScorer = new FakeScorer(); float maxScore = float.Epsilon; // LUCENENET: Epsilon in .NET is the same as MIN_VALUE in Java GroupDocs <TGroupValue>[] groups = new GroupDocs <TGroupValue> [groupQueue.Count - groupOffset]; for (int downTo = groupQueue.Count - groupOffset - 1; downTo >= 0; downTo--) { OneGroup og = groupQueue.Pop(); // At this point we hold all docs w/ in each group, // unsorted; we now sort them: ITopDocsCollector collector; if (withinGroupSort == null) { // Sort by score if (!needsScores) { throw new ArgumentException("cannot sort by relevance within group: needsScores=false"); } collector = TopScoreDocCollector.Create(maxDocsPerGroup, true); } else { // Sort by fields collector = TopFieldCollector.Create(withinGroupSort, maxDocsPerGroup, fillSortFields, needsScores, needsScores, true); } collector.SetScorer(fakeScorer); collector.SetNextReader(og.readerContext); for (int docIDX = 0; docIDX < og.count; docIDX++) { int doc = og.docs[docIDX]; fakeScorer.doc = doc; if (needsScores) { fakeScorer.score = og.scores[docIDX]; } collector.Collect(doc); } totalGroupedHitCount += og.count; object[] groupSortValues; if (fillSortFields) { groupSortValues = new IComparable[comparers.Length]; for (int sortFieldIDX = 0; sortFieldIDX < comparers.Length; sortFieldIDX++) { groupSortValues[sortFieldIDX] = comparers[sortFieldIDX][og.comparerSlot]; } } else { groupSortValues = null; } TopDocs topDocs = collector.GetTopDocs(withinGroupOffset, maxDocsPerGroup); // TODO: we could aggregate scores across children // by Sum/Avg instead of passing NaN: groups[downTo] = new GroupDocs <TGroupValue>(float.NaN, topDocs.MaxScore, og.count, topDocs.ScoreDocs,
/// <summary> /// Accumulates groups for the BlockJoinQuery specified by its slot. /// </summary> /// <param name="slot"> Search query's slot </param> /// <param name="offset"> Parent docs offset </param> /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param> /// <param name="withinGroupOffset"> Offset within each group of child docs </param> /// <param name="withinGroupSort"> Sort criteria within groups </param> /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param> /// <returns> TopGroups for the query specified by slot </returns> /// <exception cref="IOException"> if there is a low-level I/O error </exception> private TopGroups<int> AccumulateGroups(int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, bool fillSortFields) { var groups = new GroupDocs<int>[sortedGroups.Length - offset]; var fakeScorer = new FakeScorer(); int totalGroupedHitCount = 0; //System.out.println("slot=" + slot); for (int groupIdx = offset; groupIdx < sortedGroups.Length; groupIdx++) { OneGroup og = sortedGroups[groupIdx]; int numChildDocs; if (slot == -1 || slot >= og.counts.Length) { numChildDocs = 0; } else { numChildDocs = og.counts[slot]; } // Number of documents in group should be bounded to prevent redundant memory allocation int numDocsInGroup = Math.Max(1, Math.Min(numChildDocs, maxDocsPerGroup)); //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup); // At this point we hold all docs w/ in each group, unsorted; we now sort them: Collector collector; if (withinGroupSort == null) { //System.out.println("sort by score"); // Sort by score if (!trackScores) { throw new ArgumentException("cannot sort by relevance within group: trackScores=false"); } collector = TopScoreDocCollector.Create(numDocsInGroup, true); } else { // Sort by fields collector = TopFieldCollector.Create(withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true); } collector.Scorer = fakeScorer; collector.NextReader = og.readerContext; for (int docIdx = 0; docIdx < numChildDocs; docIdx++) { //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length); int doc = og.docs[slot][docIdx]; fakeScorer.doc = doc; if (trackScores) { fakeScorer._score = og.scores[slot][docIdx]; } collector.Collect(doc); } totalGroupedHitCount += numChildDocs; object[] groupSortValues; if (fillSortFields) { groupSortValues = new object[comparators.Length]; for (int sortFieldIdx = 0; sortFieldIdx < comparators.Length; sortFieldIdx++) { groupSortValues[sortFieldIdx] = comparators[sortFieldIdx].Value(og.Slot); } } else { groupSortValues = null; } TopDocs topDocs; if (withinGroupSort == null) { var tempCollector = (TopScoreDocCollector) collector; topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup); } else { var tempCollector = (TopFieldCollector) collector; topDocs = tempCollector.TopDocs(withinGroupOffset, numDocsInGroup); } groups[groupIdx - offset] = new GroupDocs<int>(og.Score, topDocs.MaxScore, numChildDocs, topDocs.ScoreDocs, og.Doc, groupSortValues); } return new TopGroups<int>(new TopGroups<int>(sort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount); }
public override bool Score(ICollector collector, int maxDoc) { if (maxDoc != int.MaxValue) { throw new ArgumentException("maxDoc must be System.Int32.MaxValue"); } //if (DEBUG) { // System.out.println("\nscore: reader=" + context.reader()); //} //System.out.println("score r=" + context.reader()); FakeScorer scorer = new FakeScorer(this); collector.SetScorer(scorer); if (drillDownCollector != null) { drillDownCollector.SetScorer(scorer); drillDownCollector.SetNextReader(context); } foreach (DocsAndCost dim in dims) { dim.sidewaysCollector.SetScorer(scorer); dim.sidewaysCollector.SetNextReader(context); } // TODO: if we ever allow null baseScorer ... it will // mean we DO score docs out of order ... hmm, or if we // change up the order of the conjuntions below if (Debugging.AssertsEnabled) { Debugging.Assert(baseScorer != null); } // some scorers, eg ReqExlScorer, can hit NPE if cost is called after nextDoc long baseQueryCost = baseScorer.GetCost(); int numDims = dims.Length; long drillDownCost = 0; for (int dim = 0; dim < numDims; dim++) { DocIdSetIterator disi = dims[dim].disi; if (dims[dim].bits == null && disi != null) { drillDownCost += disi.GetCost(); } } long drillDownAdvancedCost = 0; if (numDims > 1 && dims[1].disi != null) { drillDownAdvancedCost = dims[1].disi.GetCost(); } // Position all scorers to their first matching doc: baseScorer.NextDoc(); int numBits = 0; foreach (DocsAndCost dim in dims) { if (dim.disi != null) { dim.disi.NextDoc(); } else if (dim.bits != null) { numBits++; } } IBits[] bits = new IBits[numBits]; ICollector[] bitsSidewaysCollectors = new ICollector[numBits]; DocIdSetIterator[] disis = new DocIdSetIterator[numDims - numBits]; ICollector[] sidewaysCollectors = new ICollector[numDims - numBits]; int disiUpto = 0; int bitsUpto = 0; for (int dim = 0; dim < numDims; dim++) { DocIdSetIterator disi = dims[dim].disi; if (dims[dim].bits == null) { disis[disiUpto] = disi; sidewaysCollectors[disiUpto] = dims[dim].sidewaysCollector; disiUpto++; } else { bits[bitsUpto] = dims[dim].bits; bitsSidewaysCollectors[bitsUpto] = dims[dim].sidewaysCollector; bitsUpto++; } } /* * System.out.println("\nbaseDocID=" + baseScorer.docID() + " est=" + estBaseHitCount); * System.out.println(" maxDoc=" + context.reader().maxDoc()); * System.out.println(" maxCost=" + maxCost); * System.out.println(" dims[0].freq=" + dims[0].freq); * if (numDims > 1) { * System.out.println(" dims[1].freq=" + dims[1].freq); * } */ if (bitsUpto > 0 || scoreSubDocsAtOnce || baseQueryCost < drillDownCost / 10) { //System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length + " bits.length=" + bits.length); DoQueryFirstScoring(collector, disis, sidewaysCollectors, bits, bitsSidewaysCollectors); } else if (numDims > 1 && (dims[1].disi == null || drillDownAdvancedCost < baseQueryCost / 10)) { //System.out.println("drillDownAdvance"); DoDrillDownAdvanceScoring(collector, disis, sidewaysCollectors); } else { //System.out.println("union"); DoUnionScoring(collector, disis, sidewaysCollectors); } return(false); }
private void SumValues(IList<MatchingDocs> matchingDocs, bool keepScores, ValueSource valueSource) { FakeScorer scorer = new FakeScorer(); IDictionary context = new Dictionary<string, Scorer>(); if (keepScores) { context["scorer"] = scorer; } IntsRef scratch = new IntsRef(); foreach (MatchingDocs hits in matchingDocs) { OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.GetReader(hits.Context); int scoresIdx = 0; float[] scores = hits.Scores; FunctionValues functionValues = valueSource.GetValues(context, hits.Context); DocIdSetIterator docs = hits.Bits.GetIterator(); int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { ords.Get(doc, scratch); if (keepScores) { scorer.docID_Renamed = doc; scorer.score_Renamed = scores[scoresIdx++]; } float value = (float)functionValues.DoubleVal(doc); for (int i = 0; i < scratch.Length; i++) { values[scratch.Ints[i]] += value; } } } Rollup(); }
public override bool Score(Collector collector, int max) { FakeScorer fakeScorer = new FakeScorer(); collector.Scorer = fakeScorer; if (_doc == -1) { _doc = NextDocOutOfOrder(); } while (_doc < max) { fakeScorer.doc = _doc; fakeScorer._score = outerInstance._scores[outerInstance._ords[_scoreUpto]]; collector.Collect(_doc); _doc = NextDocOutOfOrder(); } return _doc != DocIdSetIterator.NO_MORE_DOCS; }
/// <summary> /// Returns the grouped results. Returns null if the /// number of groups collected is <= groupOffset. /// /// <para> /// <b>NOTE</b>: This collector is unable to compute /// the groupValue per group so it will always be null. /// This is normally not a problem, as you can obtain the /// value just like you obtain other values for each /// matching document (eg, via stored fields, via /// FieldCache, etc.) /// </para> /// </summary> /// <typeparam name="TGroupValue">The expected return type for group value</typeparam> /// <<param name="withinGroupSort"> /// The <see cref="Sort"/> used to sort /// documents within each group. Passing null is /// allowed, to sort by relevance. /// </param> /// <param name="groupOffset">Which group to start from</param> /// <param name="withinGroupOffset"> /// Which document to start from within each group /// </param> /// <param name="maxDocsPerGroup"> /// How many top documents to keep within each group. /// </param> /// <param name="fillSortFields"> /// If true then the Comparable values for the sort fields will be set /// </param> public virtual ITopGroups <TGroupValue> GetTopGroups <TGroupValue>(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup, bool fillSortFields) { //if (queueFull) { //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups); //} if (subDocUpto != 0) { ProcessGroup(); } if (groupOffset >= groupQueue.Size()) { return(null); } int totalGroupedHitCount = 0; FakeScorer fakeScorer = new FakeScorer(); float maxScore = float.MinValue; GroupDocs <TGroupValue>[] groups = new GroupDocs <TGroupValue> [groupQueue.Size() - groupOffset]; for (int downTo = groupQueue.Size() - groupOffset - 1; downTo >= 0; downTo--) { OneGroup og = groupQueue.Pop(); // At this point we hold all docs w/ in each group, // unsorted; we now sort them: ITopDocsCollector collector; if (withinGroupSort == null) { // Sort by score if (!needsScores) { throw new ArgumentException("cannot sort by relevance within group: needsScores=false"); } collector = TopScoreDocCollector.Create(maxDocsPerGroup, true); } else { // Sort by fields collector = TopFieldCollector.Create(withinGroupSort, maxDocsPerGroup, fillSortFields, needsScores, needsScores, true); } collector.Scorer = fakeScorer; collector.NextReader = og.readerContext; for (int docIDX = 0; docIDX < og.count; docIDX++) { int doc = og.docs[docIDX]; fakeScorer.doc = doc; if (needsScores) { fakeScorer.score = og.scores[docIDX]; } collector.Collect(doc); } totalGroupedHitCount += og.count; object[] groupSortValues; if (fillSortFields) { groupSortValues = new IComparable[comparators.Length]; for (int sortFieldIDX = 0; sortFieldIDX < comparators.Length; sortFieldIDX++) { groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].Value(og.comparatorSlot); } } else { groupSortValues = null; } TopDocs topDocs = collector.TopDocs(withinGroupOffset, maxDocsPerGroup); // TODO: we could aggregate scores across children // by Sum/Avg instead of passing NaN: groups[downTo] = new GroupDocs <TGroupValue>(float.NaN, topDocs.MaxScore, og.count, topDocs.ScoreDocs, default(TGroupValue), groupSortValues); maxScore = Math.Max(maxScore, topDocs.MaxScore); } /* * while (groupQueue.size() != 0) { * final OneGroup og = groupQueue.pop(); * //System.out.println(" leftover: og ord=" + og.groupOrd + " count=" + og.count); * totalGroupedHitCount += og.count; * } */ return(new TopGroups <TGroupValue>(new TopGroups <TGroupValue>(groupSort.GetSort(), withinGroupSort == null ? null : withinGroupSort.GetSort(), totalHitCount, totalGroupedHitCount, groups, maxScore), totalGroupCount)); }