/// <summary>Returns the stored fields of the n<sup>th</sup> document in this set. /// <p>Documents are cached, so that repeated requests for the same element may /// return the same Document object. If the fieldselector is changed, then the new /// fields will not be loaded. /// </summary> public Document Doc(int n, FieldSelector fieldSelector) { HitDoc hitDoc = HitDoc(n); // Update LRU cache of documents Remove(hitDoc); // remove from list, if there AddToFront(hitDoc); // add to front of list if (numDocs > maxDocs) { // if cache is full HitDoc oldLast = last; Remove(last); // flush last oldLast.doc = null; // let doc get gc'd } if (hitDoc.doc == null) { if (fieldSelector == null) { hitDoc.doc = searcher.Doc(hitDoc.id); // cache miss: read document } else { hitDoc.doc = searcher.Doc(hitDoc.id, fieldSelector); // cache miss: read document } } return(hitDoc.doc); }
/* * public void testTermRepeatedQuery() throws IOException, ParseException { * // TODO: this corner case yields different results. * checkQuery("multi* multi* foo"); * } */ /// <summary> checks if a query yields the same result when executed on /// a single IndexSearcher containing all documents and on a /// MultiSearcher aggregating sub-searchers /// </summary> /// <param name="queryStr"> the query to check. /// </param> /// <throws> IOException </throws> /// <throws> ParseException </throws> private void CheckQuery(System.String queryStr) { // check result hit ranking if (verbose) { System.Console.Out.WriteLine("Query: " + queryStr); } QueryParser queryParser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); Query query = queryParser.Parse(queryStr); ScoreDoc[] multiSearcherHits = multiSearcher.Search(query, null, 1000).scoreDocs; ScoreDoc[] singleSearcherHits = singleSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(multiSearcherHits.Length, singleSearcherHits.Length); for (int i = 0; i < multiSearcherHits.Length; i++) { Document docMulti = multiSearcher.Doc(multiSearcherHits[i].doc); Document docSingle = singleSearcher.Doc(singleSearcherHits[i].doc); if (verbose) { System.Console.Out.WriteLine("Multi: " + docMulti.Get(FIELD_NAME) + " score=" + multiSearcherHits[i].score); } if (verbose) { System.Console.Out.WriteLine("Single: " + docSingle.Get(FIELD_NAME) + " score=" + singleSearcherHits[i].score); } Assert.AreEqual(multiSearcherHits[i].score, singleSearcherHits[i].score, 0.001f); Assert.AreEqual(docMulti.Get(FIELD_NAME), docSingle.Get(FIELD_NAME)); } if (verbose) { System.Console.Out.WriteLine(); } }
protected internal virtual void PrintHits(System.String test, ScoreDoc[] h, Searcher searcher) { System.Console.Error.WriteLine("------- " + test + " -------"); for (int i = 0; i < h.Length; i++) { Document d = searcher.Doc(h[i].Doc, null); float score = h[i].Score; System.Console.Error.WriteLine("#" + i + ": {0.000000}" + score + " - " + d.Get("id", null) + " - " + d.Get("data", null)); } }
private static void GetResults(ref List<Airport> itemsList, TopDocs results, Searcher searcher) { foreach (ScoreDoc scoreDoc in results.ScoreDocs) { var item = new Airport(); Document doc = searcher.Doc(scoreDoc.Doc); item.id = doc.Get("Code"); item.label = doc.Get("CityName") + " - " + doc.Get("Name") + " (" + doc.Get("Code") + ")"; item.value = doc.Get("CityName") + " - " + doc.Get("Name") + " (" + doc.Get("Code") + ")"; itemsList.Add(item); } }
/// <summary> /// Creates result document collection from Lucene documents. /// </summary> /// <param name="searcher">The searcher.</param> /// <param name="topDocs">The hits.</param> private void CreateDocuments(Searcher searcher, TopDocs topDocs) { // if no documents found return if (topDocs == null) return; var entries = new List<ResultDocument>(); // get total hits var totalCount = topDocs.TotalHits; var recordsToRetrieve = Results.SearchCriteria.RecordsToRetrieve; var startIndex = Results.SearchCriteria.StartingRecord; if (recordsToRetrieve > totalCount) recordsToRetrieve = totalCount; for (var index = startIndex; index < startIndex + recordsToRetrieve; index++) { if (index >= totalCount) break; var document = searcher.Doc(topDocs.ScoreDocs[index].Doc); var doc = new ResultDocument(); var documentFields = document.GetFields(); using (var fi = documentFields.GetEnumerator()) { while (fi.MoveNext()) { if (fi.Current != null) { var field = fi.Current; doc.Add(new DocumentField(field.Name, field.StringValue)); } } } entries.Add(doc); } var searchDocuments = new ResultDocumentSet { Name = "Items", Documents = entries.OfType<IDocument>().ToArray(), TotalCount = totalCount }; Results.Documents = new[] { searchDocuments }; }
private System.Collections.Hashtable GetScores(ScoreDoc[] hits, Searcher searcher) { System.Collections.Hashtable scoreMap = new System.Collections.Hashtable(); int n = hits.Length; for (int i = 0; i < n; ++i) { Document doc = searcher.Doc(hits[i].Doc); System.String[] v = doc.GetValues("tracer"); Assert.AreEqual(v.Length, 1); scoreMap[v[0]] = (float) hits[i].Score; } return scoreMap; }
// make sure the documents returned by the search match the expected list private void AssertMatches(Searcher searcher, Query query, Sort sort, System.String expectedResult) { //ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs; TopDocs hits = searcher.Search(query, null, expectedResult.Length, sort); ScoreDoc[] result = hits.ScoreDocs; Assert.AreEqual(hits.TotalHits, expectedResult.Length); System.Text.StringBuilder buff = new System.Text.StringBuilder(10); int n = result.Length; for (int i = 0; i < n; ++i) { Document doc = searcher.Doc(result[i].Doc); System.String[] v = doc.GetValues("tracer"); for (int j = 0; j < v.Length; ++j) { buff.Append(v[j]); } } Assert.AreEqual(expectedResult, buff.ToString()); }
private void CheckHits(ScoreDoc[] hits, int expectedCount, Searcher searcher) { Assert.AreEqual(expectedCount, hits.Length, "total results"); for (int i = 0; i < hits.Length; i++) { if (i < 10 || (i > 94 && i < 105)) { Document d = searcher.Doc(hits[i].Doc); Assert.AreEqual(System.Convert.ToString(i), d.Get(ID_FIELD), "check " + i); } } }
private void PrintHits(System.IO.StreamWriter out_Renamed, ScoreDoc[] hits, Searcher searcher) { out_Renamed.WriteLine(hits.Length + " total results\n"); for (int i = 0; i < hits.Length; i++) { if (i < 10 || (i > 94 && i < 105)) { Document d = searcher.Doc(hits[i].Doc); out_Renamed.WriteLine(i + " " + d.Get(ID_FIELD)); } } }
public virtual void TestEmptyIndex() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); // creating a document to store Document lDoc = new Document(); lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc2 = new Document(); lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc3 = new Document(); lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED)); lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.AddDocument(lDoc3); writerA.Optimize(); writerA.Close(); // closing the second index writerB.Close(); // creating the query QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT)); Query query = parser.Parse("handle:1"); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB, true); searchers[1] = new IndexSearcher(indexStoreA, true); // creating the multiSearcher Searcher mSearcher = GetMultiSearcherInstance(searchers); // performing the search ScoreDoc[] hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // iterating over the hit documents for (int i = 0; i < hits.Length; i++) { mSearcher.Doc(hits[i].Doc); } mSearcher.Close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); writerB.AddDocument(lDoc); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB, true); searchers2[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2); // performing the same search ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(4, hits2.Length); // iterating over the hit documents for (int i = 0; i < hits2.Length; i++) { // no exception should happen at this point mSearcher2.Doc(hits2[i].Doc); } // test the subSearcher() method: Query subSearcherQuery = parser.Parse("id:doc1"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits2.Length); Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[0] Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].Doc)); // hit from searchers2[1] subSearcherQuery = parser.Parse("id:doc2"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits2.Length); Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[1] mSearcher2.Close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.Open(indexStoreB, false); readerB.DeleteDocuments(term); readerB.Close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB, true); searchers3[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); // performing the same search ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits3.Length); // iterating over the hit documents for (int i = 0; i < hits3.Length; i++) { mSearcher3.Doc(hits3[i].Doc); } mSearcher3.Close(); indexStoreA.Close(); indexStoreB.Close(); }
/// <summary> /// Creates result document collection from Lucene documents. /// </summary> /// <param name="searcher">The searcher.</param> /// <param name="topDocs">The hits.</param> private void CreateDocuments(Searcher searcher, TopDocs topDocs) { // if no documents found return if (topDocs == null) return; var entries = new List<ResultDocument>(); // get total hits var totalCount = topDocs.TotalHits; var recordsToRetrieve = Results.SearchCriteria.RecordsToRetrieve; var startIndex = Results.SearchCriteria.StartingRecord; if (recordsToRetrieve > totalCount) recordsToRetrieve = totalCount; for (var index = startIndex; index < startIndex + recordsToRetrieve; index++) { if (index >= totalCount) break; var document = searcher.Doc(topDocs.ScoreDocs[index].Doc); var doc = new ResultDocument(); var documentFields = document.GetFields(); using (var fi = documentFields.GetEnumerator()) { while (fi.MoveNext()) { if (fi.Current != null) { var field = fi.Current; // make sure document field doens't exist, if it does, simply add another value if (doc.ContainsKey(field.Name)) { var existingField = doc[field.Name] as DocumentField; if (existingField != null) existingField.AddValue(field.StringValue); } else // add new { doc.Add(new DocumentField(field.Name, field.StringValue)); } } } } entries.Add(doc); } var searchDocuments = new ResultDocumentSet { Name = "Items", Documents = entries.ToArray(), TotalCount = totalCount }; Results.Documents = new[] { searchDocuments }; }
protected IEnumerable<LucObject> GetResultPage(ScoreDoc[] hits, Searcher searcher, int howMany, bool allVersions, out bool noMoreHits) { var result = new List<LucObject>(); noMoreHits = false; if (hits.Length == 0) return result; var user = this.LucQuery.User; var currentUser = AccessProvider.Current.GetCurrentUser(); if (user == null) user = currentUser; var isCurrentUser = user.Id == currentUser.Id; var upperBound = hits.Length; var index = 0; while (true) { Document doc = searcher.Doc(hits[index].Doc); if (allVersions || IsPermitted(doc, user, isCurrentUser)) { result.Add(new LucObject(doc)); if (result.Count == howMany) { noMoreHits = false; break; } } if (++index >= upperBound) { noMoreHits = true; break; } } return result; //foreach (var hit in hits) //{ // Document doc = searcher.Doc(hit.doc); // if (allVersions || IsPermitted(doc, user, isCurrentUser)) // { // result.Add(new LucObject(doc)); // if (result.Count == howMany) // break; // } //} //return result; /* Logger.Write(this.LucQuery.QueryText); //var startIndex = this.StartIndex; var pageSize = this.LucQuery.PageSize; if (pageSize == 0) pageSize = Int32.MaxValue; var top = this.LucQuery.Top; if (top == 0) top = Int32.MaxValue; if (top < pageSize) pageSize = top; var countInPage = 0; var result = new List<LucObject>(); var user = this.LucQuery.User; var currentUser = AccessProvider.Current.GetCurrentUser(); if (user == null) user = currentUser; var isCurrentUser = user.Id == currentUser.Id; foreach (var hit in hits) { Document doc = searcher.Doc(hit.doc); if (allVersions || IsPermitted(doc, user, isCurrentUser)) { if (countInPage++ >= pageSize) break; result.Add(new LucObject(doc)); } } return result; */ }
private IEnumerable<LucObject> GetResultPage(ScoreDoc[] hits, Searcher searcher, bool allVersions) { Logger.Write(this.LucQuery.QueryText); //using (var traceOperation = Logger.TraceOperation(allVersions ? "Query paging" : "Query paging and security")) //{ var startIndex = this.LucQuery.StartIndex; var pageSize = this.LucQuery.PageSize; if (pageSize == 0) pageSize = Int32.MaxValue; var top = this.LucQuery.Top; if (top == 0) top = Int32.MaxValue; if (top < pageSize) pageSize = top; var count = 0; var countInPage = 0; var result = new List<LucObject>(); var user = this.LucQuery.User; var currentUser = AccessProvider.Current.GetCurrentUser(); if (user == null) user = currentUser; var isCurrentUser = user.Id == currentUser.Id; foreach (var hit in hits) { Document doc = searcher.Doc(hit.Doc); if (allVersions || IsPermitted(doc, user, isCurrentUser)) { if (++count >= startIndex) { if (countInPage++ >= pageSize) break; result.Add(new LucObject(doc)); } } } //traceOperation.IsSuccessful = true; return result; //} }
/// <summary> This demonstrates a typical paging search scenario, where the search engine presents /// pages of size n to the user. The user can then go to the next page if interested in /// the next hits. /// /// When the query is executed for the first time, then only enough results are collected /// to fill 5 result pages. If the user wants to page beyond this limit, then the query /// is executed another time and all hits are collected. /// /// </summary> public static void DoPagingSearch(StreamReader input, Searcher searcher, Query query, int hitsPerPage, bool raw, bool interactive) { // Collect enough docs to show 5 pages var collector = TopScoreDocCollector.Create(5 * hitsPerPage, false); searcher.Search(query, collector); var hits = collector.TopDocs().ScoreDocs; int numTotalHits = collector.TotalHits; Console.Out.WriteLine(numTotalHits + " total matching documents"); int start = 0; int end = Math.Min(numTotalHits, hitsPerPage); while (true) { if (end > hits.Length) { Console.Out.WriteLine("Only results 1 - " + hits.Length + " of " + numTotalHits + " total matching documents collected."); Console.Out.WriteLine("Collect more (y/n) ?"); String line = input.ReadLine(); if (String.IsNullOrEmpty(line) || line[0] == 'n') { break; } collector = TopScoreDocCollector.Create(numTotalHits, false); searcher.Search(query, collector); hits = collector.TopDocs().ScoreDocs; } end = Math.Min(hits.Length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format Console.Out.WriteLine("doc=" + hits[i].Doc + " score=" + hits[i].Score); continue; } Document doc = searcher.Doc(hits[i].Doc); String path = doc.Get("path"); if (path != null) { Console.Out.WriteLine((i + 1) + ". " + path); String title = doc.Get("title"); if (title != null) { Console.Out.WriteLine(" Title: " + doc.Get("title")); } } else { Console.Out.WriteLine((i + 1) + ". " + "No path for this document"); } } if (!interactive) { break; } if (numTotalHits >= end) { bool quit = false; while (true) { Console.Out.Write("Press "); if (start - hitsPerPage >= 0) { Console.Out.Write("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { Console.Out.Write("(n)ext page, "); } Console.Out.WriteLine("(q)uit or enter number to jump to a page."); String line = input.ReadLine(); if (String.IsNullOrEmpty(line) || line[0] == 'q') { quit = true; break; } if (line[0] == 'p') { start = Math.Max(0, start - hitsPerPage); break; } else if (line[0] == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page; if (Int32.TryParse(line, out page)) { if ((page - 1)*hitsPerPage < numTotalHits) { start = (page - 1)*hitsPerPage; break; } else { Console.Out.WriteLine("No such page"); } } else { Console.Out.WriteLine("Unrecognized page number. Quitting."); quit = true; break; } } } if (quit) break; end = Math.Min(numTotalHits, start + hitsPerPage); } } }
/// <summary> Checks to see if the hits are what we expected. /// /// </summary> /// <param name="query">the query to execute /// </param> /// <param name="description">the description of the search /// </param> /// <param name="expectedIds">the expected document ids of the hits /// </param> /// <param name="expectedScores">the expected scores of the hits /// /// </param> /// <throws> IOException </throws> protected internal static void assertHits(Searcher s, Query query, System.String description, System.String[] expectedIds, float[] expectedScores) { QueryUtils.Check(query, s); float tolerance = 1e-5f; // Hits hits = searcher.search(query); // hits normalizes and throws things off if one score is greater than 1.0 TopDocs topdocs = s.Search(query, null, 10000); /*** // display the hits System.out.println(hits.length() + " hits for search: \"" + description + '\"'); for (int i = 0; i < hits.length(); i++) { System.out.println(" " + FIELD_ID + ':' + hits.doc(i).get(FIELD_ID) + " (score:" + hits.score(i) + ')'); } *****/ // did we get the hits we expected Assert.AreEqual(expectedIds.Length, topdocs.TotalHits); for (int i = 0; i < topdocs.TotalHits; i++) { //System.out.println(i + " exp: " + expectedIds[i]); //System.out.println(i + " field: " + hits.doc(i).get(FIELD_ID)); int id = topdocs.ScoreDocs[i].Doc; float score = topdocs.ScoreDocs[i].Score; Document doc = s.Doc(id); Assert.AreEqual(expectedIds[i], doc.Get(FIELD_ID)); bool scoreEq = System.Math.Abs(expectedScores[i] - score) < tolerance; if (!scoreEq) { System.Console.Out.WriteLine(i + " warning, expected score: " + expectedScores[i] + ", actual " + score); System.Console.Out.WriteLine(s.Explain(query, id)); } Assert.AreEqual(expectedScores[i], score, tolerance); Assert.AreEqual(s.Explain(query, id).Value, score, tolerance); } }
protected internal virtual void PrintHits(System.String test, ScoreDoc[] h, Searcher searcher) { System.Console.Error.WriteLine("------- " + test + " -------"); for (int i = 0; i < h.Length; i++) { Document d = searcher.Doc(h[i].Doc); float score = h[i].Score; System.Console.Error.WriteLine("#" + i + ": {0.000000}" + score + " - " + d.Get("id") + " - " + d.Get("data")); } }
private IEnumerable<ContentSearchResult> EnumSearchResults(int start, Searcher searcher, TopDocsCollector collector, int limit) { TopDocs results = collector.TopDocs(); float max = results.GetMaxScore(); ScoreDoc[] found = results.scoreDocs; limit = Math.Min(limit, found.Length); for (int i = start; i < limit; i++) { ScoreDoc doc = found[i]; Document docInfo = searcher.Doc(doc.doc); ContentSearchResult.Builder builder = new ContentSearchResult.Builder(); builder.SetRanking((uint) Math.Max(0, Math.Min(100, (int) (doc.score/max*100f)))); builder.SetUri(docInfo.GetField("uri").StringValue()); builder.SetTitle(docInfo.GetField("title").StringValue()); builder.SetBlurb(docInfo.GetField("blurb").StringValue()); builder.SetModified(DateTime.ParseExact(docInfo.GetField("modified").StringValue(), "yyyy-MM-dd HH:mm:ss", CultureInfo.InvariantCulture, DateTimeStyles.None)); ContentRecord record; if (TryGetValue(builder.Uri, out record)) { builder.SetRecord(record); } yield return builder.Build(); } }