public virtual void Collect(int doc) { float score = scorer.GetScore(); try { long startMS = Environment.TickCount; for (int i = lastDoc[0] + 1; i <= doc; i++) { Weight w = s.CreateNormalizedWeight(q); Scorer scorer_ = w.GetScorer(context[leafPtr], liveDocs); Assert.IsTrue(scorer_.Advance(i) != DocIdSetIterator.NO_MORE_DOCS, "query collected " + doc + " but skipTo(" + i + ") says no more docs!"); Assert.AreEqual(doc, scorer_.DocID, "query collected " + doc + " but skipTo(" + i + ") got to " + scorer_.DocID); float skipToScore = scorer_.GetScore(); Assert.AreEqual(skipToScore, scorer_.GetScore(), maxDiff, "unstable skipTo(" + i + ") score!"); Assert.AreEqual(score, skipToScore, maxDiff, "query assigned doc " + doc + " a score of <" + score + "> but skipTo(" + i + ") has <" + skipToScore + ">!"); // Hurry things along if they are going slow (eg // if you got SimpleText codec this will kick in): if (i < doc && Environment.TickCount - startMS > 5) { i = doc - 1; } } lastDoc[0] = doc; } catch (IOException e) { throw new Exception(e.ToString(), e); } }
public virtual void SetNextReader(AtomicReaderContext context) { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS if (lastReader[0] != null) { AtomicReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION luceneTestCase, #endif previousReader); indexSearcher.Similarity = s.Similarity; Weight w = indexSearcher.CreateNormalizedWeight(q); AtomicReaderContext ctx = (AtomicReaderContext)indexSearcher.TopReaderContext; Scorer scorer = w.GetScorer(ctx, ((AtomicReader)ctx.Reader).LiveDocs); if (scorer != null) { bool more = scorer.Advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.IsFalse(more, "query's last doc was " + lastDoc[0] + " but skipTo(" + (lastDoc[0] + 1) + ") got to " + scorer.DocID); } leafPtr++; } lastReader[0] = (AtomicReader)context.Reader; Debug.Assert(readerContextArray[leafPtr].Reader == context.Reader); this.scorer = null; lastDoc[0] = -1; }
public virtual void Collect(int doc) { float score = scorer.GetScore(); try { long startMS = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results for (int i = lastDoc[0] + 1; i <= doc; i++) { Weight w = s.CreateNormalizedWeight(q); Scorer scorer_ = w.GetScorer(context[leafPtr], liveDocs); Assert.IsTrue(scorer_.Advance(i) != DocIdSetIterator.NO_MORE_DOCS, "query collected " + doc + " but skipTo(" + i + ") says no more docs!"); Assert.AreEqual(doc, scorer_.DocID, "query collected " + doc + " but skipTo(" + i + ") got to " + scorer_.DocID); float skipToScore = scorer_.GetScore(); Assert.AreEqual(skipToScore, scorer_.GetScore(), maxDiff, "unstable skipTo(" + i + ") score!"); Assert.AreEqual(score, skipToScore, maxDiff, "query assigned doc " + doc + " a score of <" + score + "> but skipTo(" + i + ") has <" + skipToScore + ">!"); // Hurry things along if they are going slow (eg // if you got SimpleText codec this will kick in): if (i < doc && (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - startMS > 5) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results { i = doc - 1; } } lastDoc[0] = doc; } catch (Exception e) when(e.IsIOException()) { throw RuntimeException.Create(e); } }
/// <summary> /// Check that first skip on just created scorers always goes to the right doc.</summary> public static void CheckFirstSkipTo(Query q, IndexSearcher s) { //System.out.println("checkFirstSkipTo: "+q); const float maxDiff = 1e-3f; int[] lastDoc = new int[] { -1 }; AtomicReader[] lastReader = new AtomicReader[] { null }; IList <AtomicReaderContext> context = s.TopReaderContext.Leaves; s.Search(q, new CollectorAnonymousClass2(q, s, maxDiff, lastDoc, lastReader, context)); if (lastReader[0] != null) { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS AtomicReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.NewSearcher(previousReader); indexSearcher.Similarity = s.Similarity; Weight w = indexSearcher.CreateNormalizedWeight(q); Scorer scorer = w.GetScorer((AtomicReaderContext)indexSearcher.TopReaderContext, previousReader.LiveDocs); if (scorer != null) { bool more = scorer.Advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.IsFalse(more, "query's last doc was " + lastDoc[0] + " but skipTo(" + (lastDoc[0] + 1) + ") got to " + scorer.DocID); } } }
public override void Collect(int doc) { float score = scorer.Score(); try { long startMS = DateTime.Now.Millisecond; for (int i = LastDoc[0] + 1; i <= doc; i++) { Weight w = s.CreateNormalizedWeight(q); Scorer scorer_ = w.Scorer(Context[leafPtr], liveDocs); Assert.IsTrue(scorer_.Advance(i) != DocIdSetIterator.NO_MORE_DOCS, "query collected " + doc + " but skipTo(" + i + ") says no more docs!"); Assert.AreEqual(doc, scorer_.DocID(), "query collected " + doc + " but skipTo(" + i + ") got to " + scorer_.DocID()); float skipToScore = scorer_.Score(); Assert.AreEqual(skipToScore, scorer_.Score(), MaxDiff, "unstable skipTo(" + i + ") score!"); Assert.AreEqual(score, skipToScore, MaxDiff, "query assigned doc " + doc + " a score of <" + score + "> but skipTo(" + i + ") has <" + skipToScore + ">!"); // Hurry things along if they are going slow (eg // if you got SimpleText codec this will kick in): if (i < doc && DateTime.Now.Millisecond - startMS > 5) { i = doc - 1; } } LastDoc[0] = doc; } catch (IOException e) { throw new Exception(e.Message, e); } }
/// <summary> /// Alternate scorer skipTo(),skipTo(),next(),next(),skipTo(),skipTo(), etc /// and ensure a hitcollector receives same docs and scores. /// </summary> /// <param name="luceneTestCase">The current test instance.</param> /// <param name="q"></param> /// <param name="s"></param> // LUCENENET specific // Removes dependency on <see cref="LuceneTestCase.ClassEnv.Similarity"/> public static void CheckSkipTo(LuceneTestCase luceneTestCase, Query q, IndexSearcher s) #endif { //System.out.println("Checking "+q); IList <AtomicReaderContext> readerContextArray = s.TopReaderContext.Leaves; if (s.CreateNormalizedWeight(q).ScoresDocsOutOfOrder) // in this case order of skipTo() might differ from that of next(). { return; } const int skip_op = 0; const int next_op = 1; int[][] orders = new int[][] { new int[] { next_op }, new int[] { skip_op }, new int[] { skip_op, next_op }, new int[] { next_op, skip_op }, new int[] { skip_op, skip_op, next_op, next_op }, new int[] { next_op, next_op, skip_op, skip_op }, new int[] { skip_op, skip_op, skip_op, next_op, next_op } }; for (int k = 0; k < orders.Length; k++) { int[] order = orders[k]; // System.out.print("Order:");for (int i = 0; i < order.Length; i++) // System.out.print(order[i]==skip_op ? " skip()":" next()"); // System.out.println(); int[] opidx = new int[] { 0 }; int[] lastDoc = new int[] { -1 }; // FUTURE: ensure scorer.Doc()==-1 const float maxDiff = 1e-5f; AtomicReader[] lastReader = new AtomicReader[] { null }; s.Search(q, new CollectorAnonymousInnerClassHelper( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION luceneTestCase, #endif q, s, readerContextArray, skip_op, order, opidx, lastDoc, maxDiff, lastReader)); if (lastReader[0] != null) { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS AtomicReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.NewSearcher( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION luceneTestCase, #endif previousReader, false); indexSearcher.Similarity = s.Similarity; Weight w = indexSearcher.CreateNormalizedWeight(q); AtomicReaderContext ctx = (AtomicReaderContext)previousReader.Context; Scorer scorer = w.GetScorer(ctx, ((AtomicReader)ctx.Reader).LiveDocs); if (scorer != null) { bool more = scorer.Advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.IsFalse(more, "query's last doc was " + lastDoc[0] + " but skipTo(" + (lastDoc[0] + 1) + ") got to " + scorer.DocID); } } } }
public virtual void TestSkipToFirsttimeMiss() { DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f); dq.Add(Tq("id", "d1")); dq.Add(Tq("dek", "DOES_NOT_EXIST")); QueryUtils.Check(Random(), dq, s, Similarity); Assert.IsTrue(s.TopReaderContext is AtomicReaderContext); Weight dw = s.CreateNormalizedWeight(dq); AtomicReaderContext context = (AtomicReaderContext)s.TopReaderContext; Scorer ds = dw.GetScorer(context, (context.AtomicReader).LiveDocs); bool skipOk = ds.Advance(3) != DocIdSetIterator.NO_MORE_DOCS; if (skipOk) { Assert.Fail("firsttime skipTo found a match? ... " + r.Document(ds.DocID).Get("id")); } }
public virtual void TestBooleanScorerMax() { Directory dir = NewDirectory(); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); int docCount = AtLeast(10000); for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(NewField("field", "a", TextField.TYPE_NOT_STORED)); riw.AddDocument(doc); } riw.ForceMerge(1); IndexReader r = riw.Reader; riw.Dispose(); IndexSearcher s = NewSearcher(r); BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); Weight w = s.CreateNormalizedWeight(bq); Assert.AreEqual(1, s.IndexReader.Leaves.Count); BulkScorer scorer = w.BulkScorer(s.IndexReader.Leaves[0], false, null); FixedBitSet hits = new FixedBitSet(docCount); AtomicInteger end = new AtomicInteger(); Collector c = new CollectorAnonymousInnerClassHelper(this, scorer, hits, end); while (end.Get() < docCount) { int inc = TestUtil.NextInt(Random(), 1, 1000); end.AddAndGet(inc); scorer.Score(c, end.Get()); } Assert.AreEqual(docCount, hits.Cardinality()); r.Dispose(); dir.Dispose(); }
public virtual void Collect(int doc) { float score = sc.GetScore(); lastDoc[0] = doc; try { if (scorer == null) { Weight w = s.CreateNormalizedWeight(q); AtomicReaderContext context = readerContextArray[leafPtr]; scorer = w.GetScorer(context, (context.AtomicReader).LiveDocs); } int op = order[(opidx[0]++) % order.Length]; // System.out.println(op==skip_op ? // "skip("+(sdoc[0]+1)+")":"next()"); bool more = op == skip_op?scorer.Advance(scorer.DocID + 1) != DocIdSetIterator.NO_MORE_DOCS : scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS; int scorerDoc = scorer.DocID; float scorerScore = scorer.GetScore(); float scorerScore2 = scorer.GetScore(); float scoreDiff = Math.Abs(score - scorerScore); float scorerDiff = Math.Abs(scorerScore2 - scorerScore); if (!more || doc != scorerDoc || scoreDiff > maxDiff || scorerDiff > maxDiff) { StringBuilder sbord = new StringBuilder(); for (int i = 0; i < order.Length; i++) { sbord.Append(order[i] == skip_op ? " skip()" : " next()"); } throw new Exception("ERROR matching docs:" + "\n\t" + (doc != scorerDoc ? "--> " : "") + "doc=" + doc + ", scorerDoc=" + scorerDoc + "\n\t" + (!more ? "--> " : "") + "tscorer.more=" + more + "\n\t" + (scoreDiff > maxDiff ? "--> " : "") + "scorerScore=" + scorerScore + " scoreDiff=" + scoreDiff + " maxDiff=" + maxDiff + "\n\t" + (scorerDiff > maxDiff ? "--> " : "") + "scorerScore2=" + scorerScore2 + " scorerDiff=" + scorerDiff + "\n\thitCollector.Doc=" + doc + " score=" + score + "\n\t Scorer=" + scorer + "\n\t Query=" + q + " " + q.GetType().Name + "\n\t Searcher=" + s + "\n\t Order=" + sbord + "\n\t Op=" + (op == skip_op ? " skip()" : " next()")); } } catch (IOException e) { throw new Exception(e.ToString(), e); } }
public virtual void SetNextReader(AtomicReaderContext context) { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS if (lastReader[0] != null) { AtomicReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.NewSearcher(previousReader, similarity); indexSearcher.Similarity = s.Similarity; Weight w = indexSearcher.CreateNormalizedWeight(q); Scorer scorer = w.GetScorer((AtomicReaderContext)indexSearcher.TopReaderContext, previousReader.LiveDocs); if (scorer != null) { bool more = scorer.Advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.IsFalse(more, "query's last doc was " + lastDoc[0] + " but skipTo(" + (lastDoc[0] + 1) + ") got to " + scorer.DocID); } leafPtr++; } lastReader[0] = (AtomicReader)context.Reader; lastDoc[0] = -1; liveDocs = ((AtomicReader)context.Reader).LiveDocs; }
[AwaitsFix(BugUrl = "https://github.com/apache/lucenenet/issues/269")] // LUCENENET TODO: this test fails on x86 on .NET Framework in Release mode only #endif public virtual void TestBS2DisjunctionNextVsAdvance() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); int numDocs = AtLeast(300); for (int docUpto = 0; docUpto < numDocs; docUpto++) { string contents = "a"; if (Random.Next(20) <= 16) { contents += " b"; } if (Random.Next(20) <= 8) { contents += " c"; } if (Random.Next(20) <= 4) { contents += " d"; } if (Random.Next(20) <= 2) { contents += " e"; } if (Random.Next(20) <= 1) { contents += " f"; } Document doc = new Document(); doc.Add(new TextField("field", contents, Field.Store.NO)); w.AddDocument(doc); } w.ForceMerge(1); IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); w.Dispose(); for (int iter = 0; iter < 10 * RandomMultiplier; iter++) { if (Verbose) { Console.WriteLine("iter=" + iter); } IList <string> terms = new List <string> { "a", "b", "c", "d", "e", "f" }; int numTerms = TestUtil.NextInt32(Random, 1, terms.Count); while (terms.Count > numTerms) { terms.RemoveAt(Random.Next(terms.Count)); } if (Verbose) { Console.WriteLine(" terms=" + terms); } BooleanQuery q = new BooleanQuery(); foreach (string term in terms) { q.Add(new BooleanClause(new TermQuery(new Term("field", term)), Occur.SHOULD)); } Weight weight = s.CreateNormalizedWeight(q); Scorer scorer = weight.GetScorer(s.m_leafContexts[0], null); // First pass: just use .NextDoc() to gather all hits IList <ScoreDoc> hits = new List <ScoreDoc>(); while (scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { hits.Add(new ScoreDoc(scorer.DocID, scorer.GetScore())); } if (Verbose) { Console.WriteLine(" " + hits.Count + " hits"); } // Now, randomly next/advance through the list and // verify exact match: for (int iter2 = 0; iter2 < 10; iter2++) { weight = s.CreateNormalizedWeight(q); scorer = weight.GetScorer(s.m_leafContexts[0], null); if (Verbose) { Console.WriteLine(" iter2=" + iter2); } int upto = -1; while (upto < hits.Count) { int nextUpto; int nextDoc; int left = hits.Count - upto; if (left == 1 || Random.nextBoolean()) { // next nextUpto = 1 + upto; nextDoc = scorer.NextDoc(); } else { // advance int inc = TestUtil.NextInt32(Random, 1, left - 1); nextUpto = inc + upto; nextDoc = scorer.Advance(hits[nextUpto].Doc); } if (nextUpto == hits.Count) { Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, nextDoc); } else { ScoreDoc hit = hits[nextUpto]; Assert.AreEqual(hit.Doc, nextDoc); // Test for precise float equality: Assert.IsTrue(hit.Score == scorer.GetScore(), "doc " + hit.Doc + " has wrong score: expected=" + hit.Score + " actual=" + scorer.GetScore()); } upto = nextUpto; } } } r.Dispose(); d.Dispose(); }
public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) { ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone(); Array.Sort(hits, new ComparerAnonymousInnerClassHelper(this)); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; Weight weight = searcher.CreateNormalizedWeight(query); // Now merge sort docIDs from hits, with reader's leaves: int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; Scorer scorer = null; while (hitUpto < hits.Length) { ScoreDoc hit = hits[hitUpto]; int docID = hit.Doc; AtomicReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++; readerContext = leaves[readerUpto]; endDoc = readerContext.DocBase + readerContext.Reader.MaxDoc; } if (readerContext != null) { // We advanced to another segment: docBase = readerContext.DocBase; scorer = weight.GetScorer(readerContext, null); } int targetDoc = docID - docBase; int actualDoc = scorer.DocID; if (actualDoc < targetDoc) { actualDoc = scorer.Advance(targetDoc); } if (actualDoc == targetDoc) { // Query did match this doc: hit.Score = Combine(hit.Score, true, scorer.GetScore()); } else { // Query did not match this doc: Debug.Assert(actualDoc > targetDoc); hit.Score = Combine(hit.Score, false, 0.0f); } hitUpto++; } // TODO: we should do a partial sort (of only topN) // instead, but typically the number of hits is // smallish: Array.Sort(hits, new ComparerAnonymousInnerClassHelper2(this)); if (topN < hits.Length) { ScoreDoc[] subset = new ScoreDoc[topN]; Array.Copy(hits, 0, subset, 0, topN); hits = subset; } return(new TopDocs(firstPassTopDocs.TotalHits, hits, hits[0].Score)); }
internal virtual void TestSort(bool useFrom) { IndexReader reader = null; Directory dir = null; int numDocs = AtLeast(1000); //final int numDocs = AtLeast(50); string[] tokens = new string[] { "a", "b", "c", "d", "e" }; if (VERBOSE) { Console.WriteLine("TEST: make index"); } { dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir); // w.setDoRandomForceMerge(false); // w.w.getConfig().SetMaxBufferedDocs(AtLeast(100)); string[] content = new string[AtLeast(20)]; for (int contentIDX = 0; contentIDX < content.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); int numTokens = TestUtil.NextInt(Random(), 1, 10); for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) { sb.Append(tokens[Random().Next(tokens.Length)]).Append(' '); } content[contentIDX] = sb.ToString(); } for (int docIDX = 0; docIDX < numDocs; docIDX++) { Document doc = new Document(); doc.Add(NewStringField("string", TestUtil.RandomRealisticUnicodeString(Random()), Field.Store.NO)); doc.Add(NewTextField("text", content[Random().Next(content.Length)], Field.Store.NO)); doc.Add(new FloatField("float", (float)Random().NextDouble(), Field.Store.NO)); int intValue; if (Random().Next(100) == 17) { intValue = int.MinValue; } else if (Random().Next(100) == 17) { intValue = int.MaxValue; } else { intValue = Random().Next(); } doc.Add(new IntField("int", intValue, Field.Store.NO)); if (VERBOSE) { Console.WriteLine(" doc=" + doc); } w.AddDocument(doc); } reader = w.Reader; w.Dispose(); } // NOTE: sometimes reader has just one segment, which is // important to test IndexSearcher searcher = NewSearcher(reader); IndexReaderContext ctx = searcher.TopReaderContext; ShardSearcher[] subSearchers; int[] docStarts; if (ctx is AtomicReaderContext) { subSearchers = new ShardSearcher[1]; docStarts = new int[1]; subSearchers[0] = new ShardSearcher((AtomicReaderContext)ctx, ctx); docStarts[0] = 0; } else { CompositeReaderContext compCTX = (CompositeReaderContext)ctx; int size = compCTX.Leaves.Count; subSearchers = new ShardSearcher[size]; docStarts = new int[size]; int docBase = 0; for (int searcherIDX = 0; searcherIDX < subSearchers.Length; searcherIDX++) { AtomicReaderContext leave = compCTX.Leaves[searcherIDX]; subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX); docStarts[searcherIDX] = docBase; docBase += leave.Reader.MaxDoc; } } IList <SortField> sortFields = new List <SortField>(); sortFields.Add(new SortField("string", SortField.Type_e.STRING, true)); sortFields.Add(new SortField("string", SortField.Type_e.STRING, false)); sortFields.Add(new SortField("int", SortField.Type_e.INT, true)); sortFields.Add(new SortField("int", SortField.Type_e.INT, false)); sortFields.Add(new SortField("float", SortField.Type_e.FLOAT, true)); sortFields.Add(new SortField("float", SortField.Type_e.FLOAT, false)); sortFields.Add(new SortField(null, SortField.Type_e.SCORE, true)); sortFields.Add(new SortField(null, SortField.Type_e.SCORE, false)); sortFields.Add(new SortField(null, SortField.Type_e.DOC, true)); sortFields.Add(new SortField(null, SortField.Type_e.DOC, false)); for (int iter = 0; iter < 1000 * RANDOM_MULTIPLIER; iter++) { // TODO: custom FieldComp... Query query = new TermQuery(new Term("text", tokens[Random().Next(tokens.Length)])); Sort sort; if (Random().Next(10) == 4) { // Sort by score sort = null; } else { SortField[] randomSortFields = new SortField[TestUtil.NextInt(Random(), 1, 3)]; for (int sortIDX = 0; sortIDX < randomSortFields.Length; sortIDX++) { randomSortFields[sortIDX] = sortFields[Random().Next(sortFields.Count)]; } sort = new Sort(randomSortFields); } int numHits = TestUtil.NextInt(Random(), 1, numDocs + 5); //final int numHits = 5; if (VERBOSE) { Console.WriteLine("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits); } int from = -1; int size = -1; // First search on whole index: TopDocs topHits; if (sort == null) { if (useFrom) { TopScoreDocCollector c = TopScoreDocCollector.Create(numHits, Random().NextBoolean()); searcher.Search(query, c); from = TestUtil.NextInt(Random(), 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.TopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = searcher.Search(query, numHits); } } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random().NextBoolean()); searcher.Search(query, c); if (useFrom) { from = TestUtil.NextInt(Random(), 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.TopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = c.TopDocs(0, numHits); } } if (VERBOSE) { if (useFrom) { Console.WriteLine("from=" + from + " size=" + size); } Console.WriteLine(" top search: " + topHits.TotalHits + " totalHits; hits=" + (topHits.ScoreDocs == null ? "null" : topHits.ScoreDocs.Length + " maxScore=" + topHits.MaxScore)); if (topHits.ScoreDocs != null) { for (int hitIDX = 0; hitIDX < topHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = topHits.ScoreDocs[hitIDX]; Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } // ... then all shards: Weight w = searcher.CreateNormalizedWeight(query); TopDocs[] shardHits = new TopDocs[subSearchers.Length]; for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++) { TopDocs subHits; ShardSearcher subSearcher = subSearchers[shardIDX]; if (sort == null) { subHits = subSearcher.Search(w, numHits); } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random().NextBoolean()); subSearcher.Search(w, c); subHits = c.TopDocs(0, numHits); } shardHits[shardIDX] = subHits; if (VERBOSE) { Console.WriteLine(" shard=" + shardIDX + " " + subHits.TotalHits + " totalHits hits=" + (subHits.ScoreDocs == null ? "null" : subHits.ScoreDocs.Length.ToString())); if (subHits.ScoreDocs != null) { foreach (ScoreDoc sd in subHits.ScoreDocs) { Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } } // Merge: TopDocs mergedHits; if (useFrom) { mergedHits = TopDocs.Merge(sort, from, size, shardHits); } else { mergedHits = TopDocs.Merge(sort, numHits, shardHits); } if (mergedHits.ScoreDocs != null) { // Make sure the returned shards are correct: for (int hitIDX = 0; hitIDX < mergedHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = mergedHits.ScoreDocs[hitIDX]; Assert.AreEqual(ReaderUtil.SubIndex(sd.Doc, docStarts), sd.ShardIndex, "doc=" + sd.Doc + " wrong shard"); } } TestUtil.AssertEquals(topHits, mergedHits); } reader.Dispose(); dir.Dispose(); }
public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) { ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone(); Array.Sort(hits, Comparer <ScoreDoc> .Create((a, b) => a.Doc - b.Doc)); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; Weight weight = searcher.CreateNormalizedWeight(query); // Now merge sort docIDs from hits, with reader's leaves: int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; Scorer scorer = null; while (hitUpto < hits.Length) { ScoreDoc hit = hits[hitUpto]; int docID = hit.Doc; AtomicReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++; readerContext = leaves[readerUpto]; endDoc = readerContext.DocBase + readerContext.Reader.MaxDoc; } if (readerContext != null) { // We advanced to another segment: docBase = readerContext.DocBase; scorer = weight.GetScorer(readerContext, null); } int targetDoc = docID - docBase; int actualDoc = scorer.DocID; if (actualDoc < targetDoc) { actualDoc = scorer.Advance(targetDoc); } if (actualDoc == targetDoc) { // Query did match this doc: hit.Score = Combine(hit.Score, true, scorer.GetScore()); } else { // Query did not match this doc: if (Debugging.AssertsEnabled) { Debugging.Assert(actualDoc > targetDoc); } hit.Score = Combine(hit.Score, false, 0.0f); } hitUpto++; } // TODO: we should do a partial sort (of only topN) // instead, but typically the number of hits is // smallish: Array.Sort(hits, Comparer <ScoreDoc> .Create((a, b) => { // Sort by score descending, then docID ascending: // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (NumericUtils.SingleToSortableInt32(a.Score) > NumericUtils.SingleToSortableInt32(b.Score)) { return(-1); } else if (NumericUtils.SingleToSortableInt32(a.Score) < NumericUtils.SingleToSortableInt32(b.Score)) { return(1); } else { // this subtraction can't overflow int // because docIDs are >= 0: return(a.Doc - b.Doc); } })); if (topN < hits.Length) { ScoreDoc[] subset = new ScoreDoc[topN]; Array.Copy(hits, 0, subset, 0, topN); hits = subset; } return(new TopDocs(firstPassTopDocs.TotalHits, hits, hits[0].Score)); }
/// <summary> /// alternate scorer skipTo(),skipTo(),next(),next(),skipTo(),skipTo(), etc /// and ensure a hitcollector receives same docs and scores /// </summary> public static void CheckSkipTo(Query q, IndexSearcher s) { //System.out.println("Checking "+q); IList<AtomicReaderContext> readerContextArray = s.TopReaderContext.Leaves; if (s.CreateNormalizedWeight(q).ScoresDocsOutOfOrder()) // in this case order of skipTo() might differ from that of next(). { return; } const int skip_op = 0; const int next_op = 1; int[][] orders = new int[][] { new int[] { next_op }, new int[] { skip_op }, new int[] { skip_op, next_op }, new int[] { next_op, skip_op }, new int[] { skip_op, skip_op, next_op, next_op }, new int[] { next_op, next_op, skip_op, skip_op }, new int[] { skip_op, skip_op, skip_op, next_op, next_op } }; for (int k = 0; k < orders.Length; k++) { int[] order = orders[k]; // System.out.print("Order:");for (int i = 0; i < order.Length; i++) // System.out.print(order[i]==skip_op ? " skip()":" next()"); // System.out.println(); int[] opidx = new int[] { 0 }; int[] lastDoc = new int[] { -1 }; // FUTURE: ensure scorer.Doc()==-1 const float maxDiff = 1e-5f; AtomicReader[] lastReader = new AtomicReader[] { null }; s.Search(q, new CollectorAnonymousInnerClassHelper(q, s, readerContextArray, skip_op, order, opidx, lastDoc, maxDiff, lastReader)); if (lastReader[0] != null) { // confirm that skipping beyond the last doc, on the // previous reader, hits NO_MORE_DOCS AtomicReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.NewSearcher(previousReader, false); indexSearcher.Similarity = s.Similarity; Weight w = indexSearcher.CreateNormalizedWeight(q); AtomicReaderContext ctx = (AtomicReaderContext)previousReader.Context; Scorer scorer = w.Scorer(ctx, ((AtomicReader)ctx.Reader).LiveDocs); if (scorer != null) { bool more = scorer.Advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.IsFalse(more, "query's last doc was " + lastDoc[0] + " but skipTo(" + (lastDoc[0] + 1) + ") got to " + scorer.DocID()); } } } }
public override void CreateWeight(IDictionary context, IndexSearcher searcher) { Weight w = searcher.CreateNormalizedWeight(q); context[this] = w; }