public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) { ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone(); Array.Sort(hits, new ComparerAnonymousInnerClassHelper(this)); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; Weight weight = searcher.CreateNormalizedWeight(query); // Now merge sort docIDs from hits, with reader's leaves: int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; Scorer scorer = null; while (hitUpto < hits.Length) { ScoreDoc hit = hits[hitUpto]; int docID = hit.Doc; AtomicReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++; readerContext = leaves[readerUpto]; endDoc = readerContext.DocBase + readerContext.Reader.MaxDoc; } if (readerContext != null) { // We advanced to another segment: docBase = readerContext.DocBase; scorer = weight.GetScorer(readerContext, null); } int targetDoc = docID - docBase; int actualDoc = scorer.DocID; if (actualDoc < targetDoc) { actualDoc = scorer.Advance(targetDoc); } if (actualDoc == targetDoc) { // Query did match this doc: hit.Score = Combine(hit.Score, true, scorer.GetScore()); } else { // Query did not match this doc: Debug.Assert(actualDoc > targetDoc); hit.Score = Combine(hit.Score, false, 0.0f); } hitUpto++; } // TODO: we should do a partial sort (of only topN) // instead, but typically the number of hits is // smallish: Array.Sort(hits, new ComparerAnonymousInnerClassHelper2(this)); if (topN < hits.Length) { ScoreDoc[] subset = new ScoreDoc[topN]; Array.Copy(hits, 0, subset, 0, topN); hits = subset; } return(new TopDocs(firstPassTopDocs.TotalHits, hits, hits[0].Score)); }
private void AssertAdvance(Scorer expected, Scorer actual, int amount) { if (actual == null) { Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, expected.NextDoc()); return; } int prevDoc = 0; int doc; while ((doc = expected.Advance(prevDoc + amount)) != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(doc, actual.Advance(prevDoc + amount)); Assert.AreEqual(expected.Freq(), actual.Freq()); float expectedScore = expected.Score(); float actualScore = actual.Score(); Assert.AreEqual(expectedScore, actualScore, CheckHits.ExplainToleranceDelta(expectedScore, actualScore)); prevDoc = doc; } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, actual.Advance(prevDoc + amount)); }
public virtual void TestBS2DisjunctionNextVsAdvance() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); int numDocs = AtLeast(300); for (int docUpto = 0; docUpto < numDocs; docUpto++) { string contents = "a"; if (Random.Next(20) <= 16) { contents += " b"; } if (Random.Next(20) <= 8) { contents += " c"; } if (Random.Next(20) <= 4) { contents += " d"; } if (Random.Next(20) <= 2) { contents += " e"; } if (Random.Next(20) <= 1) { contents += " f"; } Document doc = new Document(); doc.Add(new TextField("field", contents, Field.Store.NO)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); w.Dispose(); for (int iter = 0; iter < 10 * RandomMultiplier; iter++) { if (Verbose) { Console.WriteLine("iter=" + iter); } IList <string> terms = new List <string> { "a", "b", "c", "d", "e", "f" }; int numTerms = TestUtil.NextInt32(Random, 1, terms.Count); while (terms.Count > numTerms) { terms.RemoveAt(Random.Next(terms.Count)); } if (Verbose) { Console.WriteLine(" terms=" + terms); } BooleanQuery q = new BooleanQuery(); foreach (string term in terms) { q.Add(new BooleanClause(new TermQuery(new Term("field", term)), Occur.SHOULD)); } Weight weight = s.CreateNormalizedWeight(q); Scorer scorer = weight.GetScorer(s.m_leafContexts[0], null); // First pass: just use .NextDoc() to gather all hits IList <ScoreDoc> hits = new List <ScoreDoc>(); while (scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { hits.Add(new ScoreDoc(scorer.DocID, scorer.GetScore())); } if (Verbose) { Console.WriteLine(" " + hits.Count + " hits"); } // Now, randomly next/advance through the list and // verify exact match: for (int iter2 = 0; iter2 < 10; iter2++) { weight = s.CreateNormalizedWeight(q); scorer = weight.GetScorer(s.m_leafContexts[0], null); if (Verbose) { Console.WriteLine(" iter2=" + iter2); } int upto = -1; while (upto < hits.Count) { int nextUpto; int nextDoc; int left = hits.Count - upto; if (left == 1 || Random.nextBoolean()) { // next nextUpto = 1 + upto; nextDoc = scorer.NextDoc(); } else { // advance int inc = TestUtil.NextInt32(Random, 1, left - 1); nextUpto = inc + upto; nextDoc = scorer.Advance(hits[nextUpto].Doc); } if (nextUpto == hits.Count) { Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, nextDoc); } else { ScoreDoc hit = hits[nextUpto]; Assert.AreEqual(hit.Doc, nextDoc); // Test for precise float equality: Assert.IsTrue(hit.Score == scorer.GetScore(), "doc " + hit.Doc + " has wrong score: expected=" + hit.Score + " actual=" + scorer.GetScore()); } upto = nextUpto; } } } r.Dispose(); d.Dispose(); }
public override int Advance(int target) { return(scorer.Advance(target)); }
public override int Advance(int target) { return(doc = countingSumScorer.Advance(target)); }
public override bool SkipTo(int docNr) { return(scorer.Advance(docNr) != NO_MORE_DOCS); }
public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) { ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone(); Array.Sort(hits, Comparer <ScoreDoc> .Create((a, b) => a.Doc - b.Doc)); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; Weight weight = searcher.CreateNormalizedWeight(query); // Now merge sort docIDs from hits, with reader's leaves: int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; Scorer scorer = null; while (hitUpto < hits.Length) { ScoreDoc hit = hits[hitUpto]; int docID = hit.Doc; AtomicReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++; readerContext = leaves[readerUpto]; endDoc = readerContext.DocBase + readerContext.Reader.MaxDoc; } if (readerContext != null) { // We advanced to another segment: docBase = readerContext.DocBase; scorer = weight.GetScorer(readerContext, null); } int targetDoc = docID - docBase; int actualDoc = scorer.DocID; if (actualDoc < targetDoc) { actualDoc = scorer.Advance(targetDoc); } if (actualDoc == targetDoc) { // Query did match this doc: hit.Score = Combine(hit.Score, true, scorer.GetScore()); } else { // Query did not match this doc: if (Debugging.AssertsEnabled) { Debugging.Assert(actualDoc > targetDoc); } hit.Score = Combine(hit.Score, false, 0.0f); } hitUpto++; } // TODO: we should do a partial sort (of only topN) // instead, but typically the number of hits is // smallish: Array.Sort(hits, Comparer <ScoreDoc> .Create((a, b) => { // Sort by score descending, then docID ascending: // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (NumericUtils.SingleToSortableInt32(a.Score) > NumericUtils.SingleToSortableInt32(b.Score)) { return(-1); } else if (NumericUtils.SingleToSortableInt32(a.Score) < NumericUtils.SingleToSortableInt32(b.Score)) { return(1); } else { // this subtraction can't overflow int // because docIDs are >= 0: return(a.Doc - b.Doc); } })); if (topN < hits.Length) { ScoreDoc[] subset = new ScoreDoc[topN]; Array.Copy(hits, 0, subset, 0, topN); hits = subset; } return(new TopDocs(firstPassTopDocs.TotalHits, hits, hits[0].Score)); }