public IEnumerable <T> Search <T>(Query query, int count, Sort sort, Func <Document, ScoreDoc, T> func) { if (1 != Interlocked.Read(ref this.IsSearcherReady)) { return(Array.Empty <T>()); } Interlocked.Increment(ref this.ActiveSearchCount); try { TopFieldCollector collector = TopFieldCollector.Create(sort, count, true, true, false, false); this.Searcher.Search(query, collector); return(collector.GetTopDocs().ScoreDocs.Select <ScoreDoc, T>( scoreDoc => { var document = this.Searcher.Doc(scoreDoc.Doc); return func(document, scoreDoc); } )); } finally { Interlocked.Decrement(ref this.ActiveSearchCount); } }
public static IEnumerable <ISearchItem> Search(string pattern, DirectoryInfo dataFolder, int page) { using (Analyzer analyzer = new SimpleAnalyzer(LuceneVersion.LUCENE_48)) using (IndexReader reader = new PagesReader(dataFolder)) { Query query = new QueryParser(LuceneVersion.LUCENE_48, string.Empty, analyzer).Parse(pattern); IndexSearcher searcher = new IndexSearcher(reader); TopFieldCollector collector = TopFieldCollector.Create(Sort.INDEXORDER, NumHits, false, false, false, false); searcher.Search(query, collector); /* * IFormatter formatter = new SimpleHTMLFormatter(); * IScorer scorer = new QueryScorer(query); * Highlighter highlighter = new Highlighter(formatter, scorer) * { * TextFragmenter = new SimpleFragmenter(3) * }; */ ScoreDoc[] docs = collector.GetTopDocs(page * PageSize, PageSize).ScoreDocs; return(docs.Select(doc => new SearchItem(doc.Doc.ToString(), doc.Doc))); /* * Document document = searcher.Doc(doc.Doc); * string body = document.Get("body"); * TokenStream stream = TokenSources.GetAnyTokenStream(reader, doc.Doc, "body", analyzer); * //TokenStream stream = analyzer.GetTokenStream("test123", new StringReader("test456")); * string best = highlighter.GetBestFragments(stream, body, 1, " "); */ } }
public void SearchGenericWithCollector() { const int NumObjects = 10; const int MinNumberInclusive = 0; const int MaxNumberExclusive = 8; WriteTestObjects(NumObjects, obj => obj.ToDocument()); Assert.AreEqual(NumObjects, writer.NumDocs); using (DirectoryReader reader = DirectoryReader.Open(dir)) { IndexSearcher searcher = new IndexSearcher(reader); TopFieldCollector collector = TopFieldCollector.Create( new Sort(new SortField("Number", SortFieldType.INT64, true)), NumObjects, false, false, false, false); searcher.Search <TestObject>( NumericRangeQuery.NewInt64Range("Number", MinNumberInclusive, MaxNumberExclusive, true, false), collector); TopDocs topDocs = collector.GetTopDocs(); VerifyTopDocsTestObjects(searcher, topDocs, MinNumberInclusive, MaxNumberExclusive, true); } }
public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) { // Copy ScoreDoc[] and sort by ascending docID: ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone(); Array.Sort(hits, new ComparerAnonymousInnerClassHelper(this)); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; TopFieldCollector collector = TopFieldCollector.Create(sort, topN, true, true, true, false); // Now merge sort docIDs from hits, with reader's leaves: int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; FakeScorer fakeScorer = new FakeScorer(); while (hitUpto < hits.Length) { ScoreDoc hit = hits[hitUpto]; int docID = hit.Doc; AtomicReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++; readerContext = leaves[readerUpto]; endDoc = readerContext.DocBase + readerContext.Reader.MaxDoc; } if (readerContext != null) { // We advanced to another segment: collector.SetNextReader(readerContext); collector.SetScorer(fakeScorer); docBase = readerContext.DocBase; } fakeScorer.score = hit.Score; fakeScorer.doc = docID - docBase; collector.Collect(fakeScorer.doc); hitUpto++; } return(collector.GetTopDocs()); }
/// <summary> /// Just like <see cref="Search(Weight, int, Sort, bool, bool)"/>, but you choose /// whether or not the fields in the returned <see cref="FieldDoc"/> instances should /// be set by specifying <paramref name="fillFields"/>. /// </summary> protected virtual TopFieldDocs Search(IList <AtomicReaderContext> leaves, Weight weight, FieldDoc after, int nDocs, Sort sort, bool fillFields, bool doDocScores, bool doMaxScore) { // single thread int limit = reader.MaxDoc; if (limit == 0) { limit = 1; } nDocs = Math.Min(nDocs, limit); TopFieldCollector collector = TopFieldCollector.Create(sort, nDocs, after, fillFields, doDocScores, doMaxScore, !weight.ScoresDocsOutOfOrder); Search(leaves, weight, collector); return((TopFieldDocs)collector.GetTopDocs()); }
/// <summary> /// Just like <see cref="Search(Weight, int, Sort, bool, bool)"/>, but you choose /// whether or not the fields in the returned <see cref="FieldDoc"/> instances should /// be set by specifying <paramref name="fillFields"/>. /// </summary> protected virtual TopFieldDocs Search(Weight weight, FieldDoc after, int nDocs, Sort sort, bool fillFields, bool doDocScores, bool doMaxScore) { if (sort == null) { throw new System.ArgumentNullException("Sort must not be null"); } int limit = reader.MaxDoc; if (limit == 0) { limit = 1; } nDocs = Math.Min(nDocs, limit); if (executor == null) { // use all leaves here! return(Search(m_leafContexts, weight, after, nDocs, sort, fillFields, doDocScores, doMaxScore)); } else { TopFieldCollector topCollector = TopFieldCollector.Create(sort, nDocs, after, fillFields, doDocScores, doMaxScore, false); ReentrantLock @lock = new ReentrantLock(); ExecutionHelper <TopFieldDocs> runner = new ExecutionHelper <TopFieldDocs>(executor); for (int i = 0; i < m_leafSlices.Length; i++) // search each leaf slice { runner.Submit(new SearcherCallableWithSort(@lock, this, m_leafSlices[i], weight, after, nDocs, topCollector, sort, doDocScores, doMaxScore)); } int totalHits = 0; float maxScore = float.NegativeInfinity; foreach (TopFieldDocs topFieldDocs in runner) { if (topFieldDocs.TotalHits != 0) { totalHits += topFieldDocs.TotalHits; maxScore = Math.Max(maxScore, topFieldDocs.MaxScore); } } TopFieldDocs topDocs = (TopFieldDocs)topCollector.GetTopDocs(); return(new TopFieldDocs(totalHits, topDocs.ScoreDocs, topDocs.Fields, topDocs.MaxScore)); } }
public void TestEarlyTerminationDifferentSorter() { // test that the collector works correctly when the index was sorted by a // different sorter than the one specified in the ctor. CreateRandomIndexes(5); int numHits = TestUtil.NextInt32(Random, 1, numDocs / 10); Sort sort = new Sort(new SortField("ndv2", SortFieldType.INT64, false)); bool fillFields = Random.nextBoolean(); bool trackDocScores = Random.nextBoolean(); bool trackMaxScore = Random.nextBoolean(); bool inOrder = Random.nextBoolean(); // LUCENENET specific: // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to // fix a hard-to-find null reference exception problem. // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912 //TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); //TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); IndexSearcher searcher = NewSearcher(reader); int iters = AtLeast(5); for (int i = 0; i < iters; ++i) { // LUCENENET specific: // we are changing this test to use Lucene.Net 4.9-like behavior rather than going through all of the effort to // fix a hard-to-find null reference exception problem. // https://github.com/apache/lucene-solr/commit/c59f13f9918faeeb4e69acd41731e674ce88f912 TopFieldCollector collector1 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); TopFieldCollector collector2 = TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms))); searcher.Search(query, collector1); Sort different = new Sort(new SortField("ndv2", SortFieldType.INT64)); searcher.Search(query, new EarlyTerminatingSortingCollectorHelper(collector2, different, numHits)); assertTrue(collector1.TotalHits >= collector2.TotalHits); AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs); } }
/// <summary> /// Search, sorting by <see cref="Sort"/>, and computing /// drill down and sideways counts. /// </summary> public virtual DrillSidewaysResult Search(DrillDownQuery query, Filter filter, FieldDoc after, int topN, Sort sort, bool doDocScores, bool doMaxScore) { if (filter != null) { query = new DrillDownQuery(m_config, filter, query); } if (sort != null) { int limit = m_searcher.IndexReader.MaxDoc; if (limit == 0) { limit = 1; // the collector does not alow numHits = 0 } topN = Math.Min(topN, limit); TopFieldCollector hitCollector = TopFieldCollector.Create(sort, topN, after, true, doDocScores, doMaxScore, true); DrillSidewaysResult r = Search(query, hitCollector); return(new DrillSidewaysResult(r.Facets, hitCollector.GetTopDocs())); } else { return(Search(after, query, topN)); } }
public void TestEarlyTermination_() { CreateRandomIndexes(5); int numHits = TestUtil.NextInt32(Random, 1, numDocs / 10); Sort sort = new Sort(new SortField("ndv1", SortFieldType.INT64, false)); bool fillFields = Random.nextBoolean(); bool trackDocScores = Random.nextBoolean(); bool trackMaxScore = Random.nextBoolean(); bool inOrder = Random.nextBoolean(); TopFieldCollector collector1 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); TopFieldCollector collector2 = Search.TopFieldCollector.Create(sort, numHits, fillFields, trackDocScores, trackMaxScore, inOrder); IndexSearcher searcher = NewSearcher(reader); int iters = AtLeast(5); for (int i = 0; i < iters; ++i) { TermQuery query = new TermQuery(new Term("s", RandomPicks.RandomFrom(Random, terms))); searcher.Search(query, collector1); searcher.Search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits)); } assertTrue(collector1.TotalHits >= collector2.TotalHits); AssertTopDocsEquals(collector1.GetTopDocs().ScoreDocs, collector2.GetTopDocs().ScoreDocs); }
/// <summary> /// Retrieve suggestions, specifying whether all terms /// must match (<paramref name="allTermsRequired"/>) and whether the hits /// should be highlighted (<paramref name="doHighlight"/>). /// </summary> public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num, bool allTermsRequired, bool doHighlight) { if (m_searcherMgr == null) { throw new InvalidOperationException("suggester was not built"); } Occur occur; if (allTermsRequired) { occur = Occur.MUST; } else { occur = Occur.SHOULD; } TokenStream ts = null; BooleanQuery query; var matchedTokens = new HashSet <string>(); string prefixToken = null; try { ts = m_queryAnalyzer.GetTokenStream("", new StringReader(key)); //long t0 = System.currentTimeMillis(); ts.Reset(); var termAtt = ts.AddAttribute <ICharTermAttribute>(); var offsetAtt = ts.AddAttribute <IOffsetAttribute>(); string lastToken = null; query = new BooleanQuery(); int maxEndOffset = -1; matchedTokens = new HashSet <string>(); while (ts.IncrementToken()) { if (lastToken != null) { matchedTokens.Add(lastToken); query.Add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur); } lastToken = termAtt.ToString(); if (lastToken != null) { maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset); } } ts.End(); if (lastToken != null) { Query lastQuery; if (maxEndOffset == offsetAtt.EndOffset) { // Use PrefixQuery (or the ngram equivalent) when // there was no trailing discarded chars in the // string (e.g. whitespace), so that if query does // not end with a space we show prefix matches for // that token: lastQuery = GetLastTokenQuery(lastToken); prefixToken = lastToken; } else { // Use TermQuery for an exact match if there were // trailing discarded chars (e.g. whitespace), so // that if query ends with a space we only show // exact matches for that term: matchedTokens.Add(lastToken); lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)); } if (lastQuery != null) { query.Add(lastQuery, occur); } } if (contexts != null) { BooleanQuery sub = new BooleanQuery(); query.Add(sub, Occur.MUST); foreach (BytesRef context in contexts) { // NOTE: we "should" wrap this in // ConstantScoreQuery, or maybe send this as a // Filter instead to search, but since all of // these are MUST'd, the change to the score won't // affect the overall ranking. Since we indexed // as DOCS_ONLY, the perf should be the same // either way (no freq int[] blocks to decode): // TODO: if we had a BinaryTermField we could fix // this "must be valid ut8f" limitation: sub.Add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.Utf8ToString())), Occur.SHOULD); } } } finally { IOUtils.CloseWhileHandlingException(ts); } // TODO: we could allow blended sort here, combining // weight w/ score. Now we ignore score and sort only // by weight: Query finalQuery = FinishQuery(query, allTermsRequired); //System.out.println("finalQuery=" + query); // Sort by weight, descending: TopFieldCollector c = TopFieldCollector.Create(SORT, num, true, false, false, false); // We sorted postings by weight during indexing, so we // only retrieve the first num hits now: ICollector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); IndexSearcher searcher = m_searcherMgr.Acquire(); IList <LookupResult> results = null; try { //System.out.println("got searcher=" + searcher); searcher.Search(finalQuery, c2); TopFieldDocs hits = (TopFieldDocs)c.GetTopDocs(); // Slower way if postings are not pre-sorted by weight: // hits = searcher.search(query, null, num, SORT); results = CreateResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); } finally { m_searcherMgr.Release(searcher); } //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); //System.out.println(results); return(results); }
public override int DoLogic() { int res = 0; // open reader or use existing one IndexSearcher searcher = RunData.GetIndexSearcher(); IndexReader reader; bool closeSearcher; if (searcher == null) { // open our own reader Directory dir = RunData.Directory; reader = DirectoryReader.Open(dir); searcher = new IndexSearcher(reader); closeSearcher = true; } else { // use existing one; this passes +1 ref to us reader = searcher.IndexReader; closeSearcher = false; } // optionally warm and add num docs traversed to count if (WithWarm) { Document doc = null; IBits liveDocs = MultiFields.GetLiveDocs(reader); for (int m = 0; m < reader.MaxDoc; m++) { if (null == liveDocs || liveDocs.Get(m)) { doc = reader.Document(m); res += (doc == null ? 0 : 1); } } } if (WithSearch) { res++; Query q = queryMaker.MakeQuery(); Sort sort = Sort; TopDocs hits = null; int numHits = NumHits; if (numHits > 0) { if (WithCollector == false) { if (sort != null) { // TODO: instead of always passing false we // should detect based on the query; if we make // the IndexSearcher search methods that take // Weight public again, we can go back to // pulling the Weight ourselves: TopFieldCollector collector = TopFieldCollector.Create(sort, numHits, true, WithScore, WithMaxScore, false); searcher.Search(q, null, collector); hits = collector.GetTopDocs(); } else { hits = searcher.Search(q, numHits); } } else { ICollector collector = CreateCollector(); searcher.Search(q, null, collector); //hits = collector.topDocs(); } string printHitsField = RunData.Config.Get("print.hits.field", null); if (hits != null && printHitsField != null && printHitsField.Length > 0) { Console.WriteLine("totalHits = " + hits.TotalHits); Console.WriteLine("maxDoc() = " + reader.MaxDoc); Console.WriteLine("numDocs() = " + reader.NumDocs); for (int i = 0; i < hits.ScoreDocs.Length; i++) { int docID = hits.ScoreDocs[i].Doc; Document doc = reader.Document(docID); Console.WriteLine(" " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField)); } } if (WithTraverse) { ScoreDoc[] scoreDocs = hits.ScoreDocs; int traversalSize = Math.Min(scoreDocs.Length, TraversalSize); if (traversalSize > 0) { bool retrieve = WithRetrieve; int numHighlight = Math.Min(NumToHighlight, scoreDocs.Length); Analyzer analyzer = RunData.Analyzer; BenchmarkHighlighter highlighter = null; if (numHighlight > 0) { highlighter = GetBenchmarkHighlighter(q); } for (int m = 0; m < traversalSize; m++) { int id = scoreDocs[m].Doc; res++; if (retrieve) { Document document = RetrieveDoc(reader, id); res += document != null ? 1 : 0; if (numHighlight > 0 && m < numHighlight) { ICollection <string> fieldsToHighlight = GetFieldsToHighlight(document); foreach (string field in fieldsToHighlight) { string text = document.Get(field); res += highlighter.DoHighlight(reader, id, field, document, analyzer, text); } } } } } } } } if (closeSearcher) { reader.Dispose(); } else { // Release our +1 ref from above reader.DecRef(); } return(res); }