public virtual void TestRandomQueries() { System.Random rnd = NewRandom(); System.String[] vals = new System.String[] { "w1", "w2", "w3", "w4", "w5", "xx", "yy", "zzz" }; int tot = 0; BooleanQuery q1 = null; try { // increase number of iterations for more complete testing for (int i = 0; i < 1000; i++) { int level = rnd.Next(3); q1 = RandBoolQuery(new System.Random(rnd.Next(System.Int32.MaxValue)), rnd.Next(0, 2) == 0 ? false : true, level, field, vals, null); // Can't sort by relevance since floating point numbers may not quite // match up. Sort sort = Sort.INDEXORDER; QueryUtils.Check(q1, searcher); TopFieldCollector collector = TopFieldCollector.Create(sort, 1000, false, true, true, true); searcher.Search(q1, null, collector, null); ScoreDoc[] hits1 = collector.TopDocs().ScoreDocs; collector = TopFieldCollector.Create(sort, 1000, false, true, true, false); searcher.Search(q1, null, collector, null); ScoreDoc[] hits2 = collector.TopDocs().ScoreDocs; tot += hits2.Length; CheckHits.CheckEqual(q1, hits1, hits2); BooleanQuery q3 = new BooleanQuery(); q3.Add(q1, Occur.SHOULD); q3.Add(new PrefixQuery(new Term("field2", "b")), Occur.SHOULD); TopDocs hits4 = bigSearcher.Search(q3, 1, null); Assert.AreEqual(mulFactor * collector.internalTotalHits + NUM_EXTRA_DOCS / 2, hits4.TotalHits); } } catch (System.Exception e) { // For easier debugging System.Console.Out.WriteLine("failed query: " + q1); throw e; } // System.out.println("Total hits:"+tot); }
/// <summary> Just like {@link #Search(Weight, Filter, int, Sort)}, but you choose /// whether or not the fields in the returned {@link FieldDoc} instances /// should be set by specifying fillFields.<br/> /// /// <p/> /// NOTE: this does not compute scores by default. If you need scores, create /// a {@link TopFieldCollector} instance by calling /// {@link TopFieldCollector#create} and then pass that to /// {@link #Search(Weight, Filter, Collector)}. /// <p/> /// </summary> public virtual TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort, bool fillFields) { nDocs = Math.Min(nDocs, reader.MaxDoc()); SortField[] fields = sort.fields; bool legacy = false; for (int i = 0; i < fields.Length; i++) { SortField field = fields[i]; System.String fieldname = field.GetField(); int type = field.GetType(); // Resolve AUTO into its true type if (type == SortField.AUTO) { int autotype = SortField.DetectFieldType(reader, fieldname); if (autotype == SortField.STRING) { fields[i] = new SortField(fieldname, field.GetLocale(), field.GetReverse()); } else { fields[i] = new SortField(fieldname, autotype, field.GetReverse()); } } if (field.GetUseLegacySearch()) { legacy = true; } } if (legacy) { // Search the single top-level reader TopDocCollector collector = new TopFieldDocCollector(reader, sort, nDocs); HitCollectorWrapper hcw = new HitCollectorWrapper(collector); hcw.SetNextReader(reader, 0); if (filter == null) { Scorer scorer = weight.Scorer(reader, true, true); if (scorer != null) { scorer.Score(hcw); } } else { SearchWithFilter(reader, weight, filter, hcw); } return((TopFieldDocs)collector.TopDocs()); } TopFieldCollector collector2 = TopFieldCollector.Create(sort, nDocs, fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight.ScoresDocsOutOfOrder()); Search(weight, filter, collector2); return((TopFieldDocs)collector2.TopDocs()); }
/// <summary> Just like <see cref="Search(Weight, Filter, int, Sort)" />, but you choose /// whether or not the fields in the returned <see cref="FieldDoc" /> instances /// should be set by specifying fillFields. /// <p/> /// NOTE: this does not compute scores by default. If you need scores, create /// a <see cref="TopFieldCollector" /> instance by calling /// <see cref="TopFieldCollector.Create" /> and then pass that to /// <see cref="Search(Weight, Filter, Collector)" />. /// <p/> /// </summary> public virtual TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort, bool fillFields, IState state) { nDocs = Math.Min(nDocs, reader.MaxDoc); TopFieldCollector collector2 = TopFieldCollector.Create(sort, nDocs, fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight.GetScoresDocsOutOfOrder()); Search(weight, filter, collector2, state); return((TopFieldDocs)collector2.TopDocs()); }
public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) { // Copy ScoreDoc[] and sort by ascending docID: ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone(); Array.Sort(hits, new ComparatorAnonymousInnerClassHelper(this)); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; TopFieldCollector collector = TopFieldCollector.Create(sort, topN, true, true, true, false); // Now merge sort docIDs from hits, with reader's leaves: int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; FakeScorer fakeScorer = new FakeScorer(); while (hitUpto < hits.Length) { ScoreDoc hit = hits[hitUpto]; int docID = hit.Doc; AtomicReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++; readerContext = leaves[readerUpto]; endDoc = readerContext.DocBase + readerContext.Reader.MaxDoc; } if (readerContext != null) { // We advanced to another segment: collector.NextReader = readerContext; collector.Scorer = fakeScorer; docBase = readerContext.DocBase; } fakeScorer.score = hit.Score; fakeScorer.doc = docID - docBase; collector.Collect(fakeScorer.doc); hitUpto++; } return(collector.TopDocs()); }
/// <summary> /// Just like <seealso cref="#search(Weight, int, Sort, boolean, boolean)"/>, but you choose /// whether or not the fields in the returned <seealso cref="FieldDoc"/> instances should /// be set by specifying fillFields. /// </summary> protected internal virtual TopFieldDocs Search(IList <AtomicReaderContext> leaves, Weight weight, FieldDoc after, int nDocs, Sort sort, bool fillFields, bool doDocScores, bool doMaxScore) { // single thread int limit = Reader.MaxDoc; if (limit == 0) { limit = 1; } nDocs = Math.Min(nDocs, limit); TopFieldCollector collector = TopFieldCollector.Create(sort, nDocs, after, fillFields, doDocScores, doMaxScore, !weight.ScoresDocsOutOfOrder()); Search(leaves, weight, collector); return((TopFieldDocs)collector.TopDocs()); }
/// <summary> /// Just like <seealso cref="#search(Weight, int, Sort, boolean, boolean)"/>, but you choose /// whether or not the fields in the returned <seealso cref="FieldDoc"/> instances should /// be set by specifying fillFields. /// </summary> protected internal virtual TopFieldDocs Search(Weight weight, FieldDoc after, int nDocs, Sort sort, bool fillFields, bool doDocScores, bool doMaxScore) { if (sort == null) { throw new System.NullReferenceException("Sort must not be null"); } int limit = Reader.MaxDoc; if (limit == 0) { limit = 1; } nDocs = Math.Min(nDocs, limit); if (Executor == null) { // use all leaves here! return(Search(LeafContexts, weight, after, nDocs, sort, fillFields, doDocScores, doMaxScore)); } else { TopFieldCollector topCollector = TopFieldCollector.Create(sort, nDocs, after, fillFields, doDocScores, doMaxScore, false); ReentrantLock @lock = new ReentrantLock(); ExecutionHelper <TopFieldDocs> runner = new ExecutionHelper <TopFieldDocs>(Executor); for (int i = 0; i < LeafSlices.Length; i++) // search each leaf slice { runner.Submit(new SearcherCallableWithSort(@lock, this, LeafSlices[i], weight, after, nDocs, topCollector, sort, doDocScores, doMaxScore)); } int totalHits = 0; float maxScore = float.NegativeInfinity; foreach (TopFieldDocs topFieldDocs in runner) { if (topFieldDocs.TotalHits != 0) { totalHits += topFieldDocs.TotalHits; maxScore = Math.Max(maxScore, topFieldDocs.MaxScore); } } TopFieldDocs topDocs = (TopFieldDocs)topCollector.TopDocs(); return(new TopFieldDocs(totalHits, topDocs.ScoreDocs, topDocs.Fields, topDocs.MaxScore)); } }
private void RunTest(IndexSearcher searcher, bool reversed) { BooleanQuery newq = new BooleanQuery(false); TermQuery query = new TermQuery(new Term("title", "ipod")); newq.Add(query, Occur.SHOULD); newq.Add(GetElevatedQuery(new System.String[] { "id", "a", "id", "x" }), Occur.SHOULD); Sort sort = new Sort(new SortField("id", new ElevationComparatorSource(priority), false), new SortField(null, SortField.SCORE, reversed)); TopFieldCollector topCollector = TopFieldCollector.Create(sort, 50, false, true, true, true); searcher.Search(newq, null, topCollector, null); TopDocs topDocs = topCollector.TopDocs(0, 10); int nDocsReturned = topDocs.ScoreDocs.Length; Assert.AreEqual(4, nDocsReturned); // 0 & 3 were elevated Assert.AreEqual(0, topDocs.ScoreDocs[0].Doc); Assert.AreEqual(3, topDocs.ScoreDocs[1].Doc); if (reversed) { Assert.AreEqual(2, topDocs.ScoreDocs[2].Doc); Assert.AreEqual(1, topDocs.ScoreDocs[3].Doc); } else { Assert.AreEqual(1, topDocs.ScoreDocs[2].Doc); Assert.AreEqual(2, topDocs.ScoreDocs[3].Doc); } /* * for (int i = 0; i < nDocsReturned; i++) { * ScoreDoc scoreDoc = topDocs.scoreDocs[i]; * ids[i] = scoreDoc.doc; * scores[i] = scoreDoc.score; * documents[i] = searcher.doc(ids[i]); * System.out.println("ids[i] = " + ids[i]); * System.out.println("documents[i] = " + documents[i]); * System.out.println("scores[i] = " + scores[i]); * } */ }
internal virtual void TestSort(bool useFrom) { IndexReader reader = null; Directory dir = null; int numDocs = AtLeast(1000); //final int numDocs = AtLeast(50); string[] tokens = new string[] { "a", "b", "c", "d", "e" }; if (VERBOSE) { Console.WriteLine("TEST: make index"); } { dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir); // w.setDoRandomForceMerge(false); // w.w.getConfig().SetMaxBufferedDocs(AtLeast(100)); string[] content = new string[AtLeast(20)]; for (int contentIDX = 0; contentIDX < content.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); int numTokens = TestUtil.NextInt(Random(), 1, 10); for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) { sb.Append(tokens[Random().Next(tokens.Length)]).Append(' '); } content[contentIDX] = sb.ToString(); } for (int docIDX = 0; docIDX < numDocs; docIDX++) { Document doc = new Document(); doc.Add(NewStringField("string", TestUtil.RandomRealisticUnicodeString(Random()), Field.Store.NO)); doc.Add(NewTextField("text", content[Random().Next(content.Length)], Field.Store.NO)); doc.Add(new FloatField("float", (float)Random().NextDouble(), Field.Store.NO)); int intValue; if (Random().Next(100) == 17) { intValue = int.MinValue; } else if (Random().Next(100) == 17) { intValue = int.MaxValue; } else { intValue = Random().Next(); } doc.Add(new IntField("int", intValue, Field.Store.NO)); if (VERBOSE) { Console.WriteLine(" doc=" + doc); } w.AddDocument(doc); } reader = w.Reader; w.Dispose(); } // NOTE: sometimes reader has just one segment, which is // important to test IndexSearcher searcher = NewSearcher(reader); IndexReaderContext ctx = searcher.TopReaderContext; ShardSearcher[] subSearchers; int[] docStarts; if (ctx is AtomicReaderContext) { subSearchers = new ShardSearcher[1]; docStarts = new int[1]; subSearchers[0] = new ShardSearcher((AtomicReaderContext)ctx, ctx); docStarts[0] = 0; } else { CompositeReaderContext compCTX = (CompositeReaderContext)ctx; int size = compCTX.Leaves.Count; subSearchers = new ShardSearcher[size]; docStarts = new int[size]; int docBase = 0; for (int searcherIDX = 0; searcherIDX < subSearchers.Length; searcherIDX++) { AtomicReaderContext leave = compCTX.Leaves[searcherIDX]; subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX); docStarts[searcherIDX] = docBase; docBase += leave.Reader.MaxDoc; } } IList <SortField> sortFields = new List <SortField>(); sortFields.Add(new SortField("string", SortField.Type_e.STRING, true)); sortFields.Add(new SortField("string", SortField.Type_e.STRING, false)); sortFields.Add(new SortField("int", SortField.Type_e.INT, true)); sortFields.Add(new SortField("int", SortField.Type_e.INT, false)); sortFields.Add(new SortField("float", SortField.Type_e.FLOAT, true)); sortFields.Add(new SortField("float", SortField.Type_e.FLOAT, false)); sortFields.Add(new SortField(null, SortField.Type_e.SCORE, true)); sortFields.Add(new SortField(null, SortField.Type_e.SCORE, false)); sortFields.Add(new SortField(null, SortField.Type_e.DOC, true)); sortFields.Add(new SortField(null, SortField.Type_e.DOC, false)); for (int iter = 0; iter < 1000 * RANDOM_MULTIPLIER; iter++) { // TODO: custom FieldComp... Query query = new TermQuery(new Term("text", tokens[Random().Next(tokens.Length)])); Sort sort; if (Random().Next(10) == 4) { // Sort by score sort = null; } else { SortField[] randomSortFields = new SortField[TestUtil.NextInt(Random(), 1, 3)]; for (int sortIDX = 0; sortIDX < randomSortFields.Length; sortIDX++) { randomSortFields[sortIDX] = sortFields[Random().Next(sortFields.Count)]; } sort = new Sort(randomSortFields); } int numHits = TestUtil.NextInt(Random(), 1, numDocs + 5); //final int numHits = 5; if (VERBOSE) { Console.WriteLine("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits); } int from = -1; int size = -1; // First search on whole index: TopDocs topHits; if (sort == null) { if (useFrom) { TopScoreDocCollector c = TopScoreDocCollector.Create(numHits, Random().NextBoolean()); searcher.Search(query, c); from = TestUtil.NextInt(Random(), 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.TopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = searcher.Search(query, numHits); } } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random().NextBoolean()); searcher.Search(query, c); if (useFrom) { from = TestUtil.NextInt(Random(), 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.TopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = c.TopDocs(0, numHits); } } if (VERBOSE) { if (useFrom) { Console.WriteLine("from=" + from + " size=" + size); } Console.WriteLine(" top search: " + topHits.TotalHits + " totalHits; hits=" + (topHits.ScoreDocs == null ? "null" : topHits.ScoreDocs.Length + " maxScore=" + topHits.MaxScore)); if (topHits.ScoreDocs != null) { for (int hitIDX = 0; hitIDX < topHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = topHits.ScoreDocs[hitIDX]; Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } // ... then all shards: Weight w = searcher.CreateNormalizedWeight(query); TopDocs[] shardHits = new TopDocs[subSearchers.Length]; for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++) { TopDocs subHits; ShardSearcher subSearcher = subSearchers[shardIDX]; if (sort == null) { subHits = subSearcher.Search(w, numHits); } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random().NextBoolean()); subSearcher.Search(w, c); subHits = c.TopDocs(0, numHits); } shardHits[shardIDX] = subHits; if (VERBOSE) { Console.WriteLine(" shard=" + shardIDX + " " + subHits.TotalHits + " totalHits hits=" + (subHits.ScoreDocs == null ? "null" : subHits.ScoreDocs.Length.ToString())); if (subHits.ScoreDocs != null) { foreach (ScoreDoc sd in subHits.ScoreDocs) { Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } } // Merge: TopDocs mergedHits; if (useFrom) { mergedHits = TopDocs.Merge(sort, from, size, shardHits); } else { mergedHits = TopDocs.Merge(sort, numHits, shardHits); } if (mergedHits.ScoreDocs != null) { // Make sure the returned shards are correct: for (int hitIDX = 0; hitIDX < mergedHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = mergedHits.ScoreDocs[hitIDX]; Assert.AreEqual(ReaderUtil.SubIndex(sd.Doc, docStarts), sd.ShardIndex, "doc=" + sd.Doc + " wrong shard"); } } TestUtil.AssertEquals(topHits, mergedHits); } reader.Dispose(); dir.Dispose(); }
public virtual void TestRandomQueries() { string[] vals = new string[] { "w1", "w2", "w3", "w4", "w5", "xx", "yy", "zzz" }; int tot = 0; BooleanQuery q1 = null; try { // increase number of iterations for more complete testing int num = AtLeast(20); for (int i = 0; i < num; i++) { int level = Random().Next(3); q1 = RandBoolQuery(new Random(Random().Next()), Random().NextBoolean(), level, field, vals, null); // Can't sort by relevance since floating point numbers may not quite // match up. Sort sort = Sort.INDEXORDER; QueryUtils.Check(Random(), q1, Searcher); // baseline sim try { // a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop. Searcher.Similarity = BigSearcher.Similarity; // random sim QueryUtils.Check(Random(), q1, Searcher); } finally { Searcher.Similarity = new DefaultSimilarity(); // restore } TopFieldCollector collector = TopFieldCollector.Create(sort, 1000, false, true, true, true); Searcher.Search(q1, null, collector); ScoreDoc[] hits1 = collector.TopDocs().ScoreDocs; collector = TopFieldCollector.Create(sort, 1000, false, true, true, false); Searcher.Search(q1, null, collector); ScoreDoc[] hits2 = collector.TopDocs().ScoreDocs; tot += hits2.Length; CheckHits.CheckEqual(q1, hits1, hits2); BooleanQuery q3 = new BooleanQuery(); q3.Add(q1, BooleanClause.Occur.SHOULD); q3.Add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD); TopDocs hits4 = BigSearcher.Search(q3, 1); Assert.AreEqual(MulFactor * collector.TotalHits + NUM_EXTRA_DOCS / 2, hits4.TotalHits); } } catch (Exception) { // For easier debugging Console.WriteLine("failed query: " + q1); throw; } // System.out.println("Total hits:"+tot); }