public Medidas(TopDocs hits, Searcher searcher, List<int> docs) { this.qtdDocsRelevantesRetornados = DocsRelevantesRecuperados(hits, searcher, docs); this.precisao = _Precisao(hits.ScoreDocs.Length); this.cobetura = _Cobertura(docs.Count); this.fmeasure = _FMeasure(); }
private void btn_search_Click(object sender, EventArgs e) { String q = txt_query.Text; foreach(String token in txt_query.Text.Split(' ','.','?')) { if( !synonyms.ContainsKey(token)) continue; foreach( String syn in synonyms[token]) q = q + " " + syn; } var query = text_parser.Parse(q); var cat_query = text_parser.Parse(txt_query.Text); var queryString = String.Format("({0}) OR ({1}) OR ({2})", query, cat_query.ToString().Replace("text", "cat"), cat_query.ToString().Replace("text", "subcat")); try { query = text_parser.Parse(queryString); } catch (Exception exp) { return; } result = searcher.Search(query,20); ShowResult(result); }
/// <summary> /// Initializes a new instance of the <see cref="SearchResults" /> class. /// </summary> /// <param name="searcher">The searcher.</param> /// <param name="reader">The reader.</param> /// <param name="docs">The hits.</param> /// <param name="criteria">The criteria.</param> /// <param name="query">The query.</param> public LuceneSearchResults(Searcher searcher, IndexReader reader, TopDocs docs, ISearchCriteria criteria, Query query) { Results = new SearchResults(criteria, null); CreateDocuments(searcher, docs); CreateFacets(reader, query); CreateSuggestions(reader, criteria); }
private static void GetResults(ref List<Airport> itemsList, TopDocs results, Searcher searcher) { foreach (ScoreDoc scoreDoc in results.ScoreDocs) { var item = new Airport(); Document doc = searcher.Doc(scoreDoc.Doc); item.id = doc.Get("Code"); item.label = doc.Get("CityName") + " - " + doc.Get("Name") + " (" + doc.Get("Code") + ")"; item.value = doc.Get("CityName") + " - " + doc.Get("Name") + " (" + doc.Get("Code") + ")"; itemsList.Add(item); } }
public static bool HitsIncludeTitle(IndexSearcher searcher, TopDocs topDocs, String title) { foreach (var scoreDoc in topDocs.ScoreDocs) { var doc = searcher.Doc(scoreDoc.Doc); var docTitle = doc.Get("title"); var decodedTitle = DecodeEncodedNonAsciiCharacters(docTitle); if (decodedTitle.Equals(title)) { return true; } } Console.WriteLine("title '" + title + "' not found"); return false; }
/// <summary> /// Creates result document collection from Lucene documents. /// </summary> /// <param name="searcher">The searcher.</param> /// <param name="topDocs">The hits.</param> private void CreateDocuments(Searcher searcher, TopDocs topDocs) { // if no documents found return if (topDocs == null) return; var entries = new List<ResultDocument>(); // get total hits var totalCount = topDocs.TotalHits; var recordsToRetrieve = Results.SearchCriteria.RecordsToRetrieve; var startIndex = Results.SearchCriteria.StartingRecord; if (recordsToRetrieve > totalCount) recordsToRetrieve = totalCount; for (var index = startIndex; index < startIndex + recordsToRetrieve; index++) { if (index >= totalCount) break; var document = searcher.Doc(topDocs.ScoreDocs[index].Doc); var doc = new ResultDocument(); var documentFields = document.GetFields(); using (var fi = documentFields.GetEnumerator()) { while (fi.MoveNext()) { if (fi.Current != null) { var field = fi.Current; doc.Add(new DocumentField(field.Name, field.StringValue)); } } } entries.Add(doc); } var searchDocuments = new ResultDocumentSet { Name = "Items", Documents = entries.OfType<IDocument>().ToArray(), TotalCount = totalCount }; Results.Documents = new[] { searchDocuments }; }
public IEnumerable <Document> Search(string propertyNameToSearch, string searchTerm, int topCount = int.MaxValue) { LuceneSearch.IndexSearcher searcher = new LuceneSearch.IndexSearcher(IndexDirectory); QueryParser parser = new QueryParser(LuceneVersion, propertyNameToSearch, AnalyzerProvider()); LuceneSearch.Query query = parser.Parse(searchTerm); LuceneSearch.TopDocs hits = searcher.Search(query, topCount); IndexReader reader = IndexReader.Open(IndexDirectory, true); for (int i = 0; i < hits.ScoreDocs.Length; i++) { LuceneSearch.ScoreDoc scoreDoc = hits.ScoreDocs[i]; Document doc = reader.Document(scoreDoc.Doc); yield return(doc); } }
private int DocsRelevantesRecuperados(TopDocs hits, Searcher searcher, List<int> docs) { int qtd = 0; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { Document doc = searcher.GetDocument(scoreDoc); string docId = doc.Get(Constants.FILE_NAME); string number = docId.Substring(docId.IndexOf('(')+1, docId.IndexOf(')') - docId.IndexOf('(') - 1); int num = Int32.Parse(number); if (docs.Contains(num)) qtd++; } return qtd; }
public void Test_Store_RAMDirectory() { Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory(); //Index 1 Doc Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); wr.AddDocument(doc); wr.Dispose(); //now serialize it System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream memoryStream = new System.IO.MemoryStream(); serializer.Serialize(memoryStream, ramDIR); //Close DIR ramDIR.Close(); ramDIR = null; //now deserialize memoryStream.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream); //Add 1 more doc wr = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); wr.AddDocument(doc); wr.Dispose(); //Search Lucene.Net.Search.IndexSearcher s = new Lucene.Net.Search.IndexSearcher(ramDIR2); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_CURRENT, "field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT)); Lucene.Net.Search.Query q = qp.Parse("value1"); Lucene.Net.Search.TopDocs topDocs = s.Search(q, 100); s.Close(); Assert.AreEqual(topDocs.TotalHits, 2, "See the issue: LUCENENET-174"); }
public void TestCustomExternalQuery() { QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, anlzr); String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. Query q1 = qp.Parse(qtxt); Query q = new CustomExternalQuery(q1); Log(q); IndexSearcher s = new IndexSearcher(dir); TopDocs hits = s.Search(q, 1000); Assert.AreEqual(N_DOCS, hits.TotalHits); for (int i = 0; i < N_DOCS; i++) { int doc = hits.ScoreDocs[i].Doc; float score = hits.ScoreDocs[i].Score; Assert.AreEqual(score, (float)1 + (4 * doc) % N_DOCS, 0.0001, "doc=" + doc); } s.Close(); }
public void WriteV2ResultTest( string indexName, int numDocs, Dictionary<string, string> commitUserData, int topDocsTotalHits, float topDocsMaxScore, int skip, int take, string expected) { var searcher = new MockSearcher(indexName, numDocs, commitUserData, versions: Constants.VersionResults); var topDocs = new TopDocs(topDocsTotalHits, Constants.ScoreDocs, topDocsMaxScore); var sb = new StringBuilder(); var sw = new StringWriter(sb); using (var writer = new JsonTextWriter(sw)) { ResponseFormatter.WriteV2Result(writer, searcher, topDocs, skip, take); Assert.Equal(expected, sb.ToString()); } }
const string TEXT_PASSAGE = "Passage Text"; //Text fields. For the Passage Text column. private void DisplayTopDoc(Lucene.Net.Search.TopDocs results) { int rank = 0; var searcher = myLuceneApp.CreateSearcher(); Lucene.Net.Documents.Document doc = null; foreach (ScoreDoc scoreDoc in results.ScoreDocs) { rank++; // retrieve the document from the 'ScoreDoc' object doc = searcher.Doc(scoreDoc.Doc); string field_URL = doc.Get(TEXT_URL).ToString(); string field_Text = doc.Get(TEXT_PASSAGE).ToString(); ResultsUpdate("Rank #" + rank); ResultsUpdate("\n"); ResultsUpdate("Rank #" + rank); ResultsUpdate("URL: " + field_URL); ResultsUpdate("Passage Text: "); ResultsUpdate(field_Text); ResultsUpdate("\n"); } }
void LUCENENET_100_ClientSearch() { try { Lucene.Net.Search.Searchable s = (Lucene.Net.Search.Searchable)Activator.GetObject(typeof(Lucene.Net.Search.Searchable), @"tcp://localhost:" + ANYPORT + "/Searcher"); Lucene.Net.Search.MultiSearcher searcher = new Lucene.Net.Search.MultiSearcher(new Lucene.Net.Search.Searchable[] { s }); Lucene.Net.Search.Query q = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field1", "moon")); Lucene.Net.Search.Sort sort = new Lucene.Net.Search.Sort(); sort.SetSort(new Lucene.Net.Search.SortField("field2", Lucene.Net.Search.SortField.INT)); Lucene.Net.Search.TopDocs h = searcher.Search(q, null, 100, sort); } catch (Exception ex) { LUCENENET_100_Exception = ex; } finally { LUCENENET_100_testFinished = true; } }
/// <summary> A search implementation which executes each /// <see cref="Searchable"/> in its own thread and waits for each search to complete /// and merge the results back together. /// </summary> public override TopDocs Search(Weight weight, Filter filter, int nDocs) { HitQueue hq = new HitQueue(nDocs, false); object lockObj = new object(); TopDocs[] results = new TopDocs[searchables.Length]; //search each searchable Parallel.For(0, searchables.Length, (i) => results[i] = MultiSearcherCallableNoSort(ThreadLock.MonitorLock, lockObj, searchables[i], weight, filter, nDocs, hq, i, starts)); int totalHits = 0; float maxScore = float.NegativeInfinity; foreach (TopDocs topDocs in results) { totalHits += topDocs.TotalHits; maxScore = Math.Max(maxScore, topDocs.MaxScore); } ScoreDoc[] scoreDocs = new ScoreDoc[hq.Size()]; for (int i = hq.Size() - 1; i >= 0; i--) // put docs in array scoreDocs[i] = hq.Pop(); return new TopDocs(totalHits, scoreDocs, maxScore); }
public virtual void TestSimple() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); Field field = NewTextField("foo", "", Field.Store.NO); doc.Add(field); Field dvField = new SingleDocValuesField("foo_boost", 0.0F); doc.Add(dvField); Field field2 = NewTextField("bar", "", Field.Store.NO); doc.Add(field2); field.SetStringValue("quick brown fox"); field2.SetStringValue("quick brown fox"); dvField.SetSingleValue(2f); // boost x2 iw.AddDocument(doc); field.SetStringValue("jumps over lazy brown dog"); field2.SetStringValue("jumps over lazy brown dog"); dvField.SetSingleValue(4f); // boost x4 iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); // no boosting IndexSearcher searcher1 = NewSearcher(ir, false, Similarity); Similarity @base = searcher1.Similarity; // boosting IndexSearcher searcher2 = NewSearcher(ir, false, Similarity); searcher2.Similarity = new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this, field, @base); // in this case, we searched on field "foo". first document should have 2x the score. TermQuery tq = new TermQuery(new Term("foo", "quick")); QueryUtils.Check(Random(), tq, searcher1, Similarity); QueryUtils.Check(Random(), tq, searcher2, Similarity); TopDocs noboost = searcher1.Search(tq, 10); TopDocs boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); //System.out.println(searcher2.Explain(tq, boost.ScoreDocs[0].Doc)); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 2f, SCORE_EPSILON); // this query matches only the second document, which should have 4x the score. tq = new TermQuery(new Term("foo", "jumps")); QueryUtils.Check(Random(), tq, searcher1, Similarity); QueryUtils.Check(Random(), tq, searcher2, Similarity); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 4f, SCORE_EPSILON); // search on on field bar just for kicks, nothing should happen, since we setup // our sim provider to only use foo_boost for field foo. tq = new TermQuery(new Term("bar", "quick")); QueryUtils.Check(Random(), tq, searcher1, Similarity); QueryUtils.Check(Random(), tq, searcher2, Similarity); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score, SCORE_EPSILON); ir.Dispose(); dir.Dispose(); }
override public void Run() { try { docs = (sort == null) ? searchable.Search(weight, filter, nDocs) : searchable.Search(weight, filter, nDocs, sort); } // Store the IOException for later use by the caller of this thread catch (System.IO.IOException ioe) { this.ioe = ioe; } if (this.ioe == null) { // if we are sorting by fields, we need to tell the field sorted hit queue // the actual type of fields, in case the original list contained AUTO. // if the searchable returns null for fields, we'll have problems. if (sort != null) { ((FieldDocSortedHitQueue) hq).SetFields(((TopFieldDocs) docs).fields); } ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; j < scoreDocs.Length; j++) { // merge scoreDocs into hq ScoreDoc scoreDoc = scoreDocs[j]; scoreDoc.doc += starts[i]; // convert doc //it would be so nice if we had a thread-safe insert lock (hq) { if (!hq.Insert(scoreDoc)) break; } // no more scores > minScore } } }
public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) { ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone(); Array.Sort(hits, Comparer <ScoreDoc> .Create((a, b) => a.Doc - b.Doc)); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; Weight weight = searcher.CreateNormalizedWeight(query); // Now merge sort docIDs from hits, with reader's leaves: int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; Scorer scorer = null; while (hitUpto < hits.Length) { ScoreDoc hit = hits[hitUpto]; int docID = hit.Doc; AtomicReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++; readerContext = leaves[readerUpto]; endDoc = readerContext.DocBase + readerContext.Reader.MaxDoc; } if (readerContext != null) { // We advanced to another segment: docBase = readerContext.DocBase; scorer = weight.GetScorer(readerContext, null); } int targetDoc = docID - docBase; int actualDoc = scorer.DocID; if (actualDoc < targetDoc) { actualDoc = scorer.Advance(targetDoc); } if (actualDoc == targetDoc) { // Query did match this doc: hit.Score = Combine(hit.Score, true, scorer.GetScore()); } else { // Query did not match this doc: if (Debugging.AssertsEnabled) { Debugging.Assert(actualDoc > targetDoc); } hit.Score = Combine(hit.Score, false, 0.0f); } hitUpto++; } // TODO: we should do a partial sort (of only topN) // instead, but typically the number of hits is // smallish: Array.Sort(hits, Comparer <ScoreDoc> .Create((a, b) => { // Sort by score descending, then docID ascending: // LUCENENET specific - compare bits rather than using equality operators to prevent these comparisons from failing in x86 in .NET Framework with optimizations enabled if (NumericUtils.SingleToSortableInt32(a.Score) > NumericUtils.SingleToSortableInt32(b.Score)) { return(-1); } else if (NumericUtils.SingleToSortableInt32(a.Score) < NumericUtils.SingleToSortableInt32(b.Score)) { return(1); } else { // this subtraction can't overflow int // because docIDs are >= 0: return(a.Doc - b.Doc); } })); if (topN < hits.Length) { ScoreDoc[] subset = new ScoreDoc[topN]; Array.Copy(hits, 0, subset, 0, topN); hits = subset; } return(new TopDocs(firstPassTopDocs.TotalHits, hits, hits[0].Score)); }
internal virtual void AssertPage(int pageStart, TopDocs all, TopDocs paged) { Assert.AreEqual(all.TotalHits, paged.TotalHits); for (int i = 0; i < paged.ScoreDocs.Length; i++) { ScoreDoc sd1 = all.ScoreDocs[pageStart + i]; ScoreDoc sd2 = paged.ScoreDocs[i]; if (VERBOSE) { Console.WriteLine(" hit " + (pageStart + i)); Console.WriteLine(" expected id=" + Searcher.Doc(sd1.Doc).Get("id") + " " + sd1); Console.WriteLine(" actual id=" + Searcher.Doc(sd2.Doc).Get("id") + " " + sd2); } Assert.AreEqual(sd1.Doc, sd2.Doc); Assert.AreEqual(sd1.Score, sd2.Score, 0f); if (sd1 is FieldDoc) { Assert.IsTrue(sd2 is FieldDoc); Assert.AreEqual(((FieldDoc)sd1).Fields, ((FieldDoc)sd2).Fields); } } }
public int RecordResultsAlreadySeenForDistinctQuery(TopDocs search, bool adjustStart, int pageSize, ref int start) { int itemsSkipped = 0; if (min == -1) { min = start; } min = Math.Min(min, search.TotalHits); // we are paging, we need to check that we don't have duplicates in the previous pages // see here for details: http://groups.google.com/group/ravendb/browse_frm/thread/d71c44aa9e2a7c6e if (isProjectionOrMapReduce == false) { for (int i = alreadyScannedPositions; i < min; i++) { if (i >= search.ScoreDocs.Length) { alreadyScannedPositions = i; var pageSizeIncreaseSize = min - search.ScoreDocs.Length; return pageSizeIncreaseSize; } var document = indexSearcher.Doc(search.ScoreDocs[i].Doc); var id = document.Get(Constants.DocumentIdFieldName); if (documentsAlreadySeenInPreviousPage.Add(id) == false) { // already seen this, need to expand the range we are scanning because the user // didn't take this into account min = Math.Min(min + 1, search.TotalHits); itemsSkipped++; } } alreadyScannedPositions = min; } if (fieldsToFetch.IsDistinctQuery) { // add results that were already there in previous pages for (int i = alreadyScannedPositionsForDistinct; i < min; i++) { if (i >= search.ScoreDocs.Length) { alreadyScannedPositionsForDistinct = i; var pageSizeIncreaseSize = min - search.ScoreDocs.Length; return pageSizeIncreaseSize; } Document document = indexSearcher.Doc(search.ScoreDocs[i].Doc); var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]); if (indexQueryResult.Projection.Count > 0 && // we don't consider empty projections to be relevant for distinct operations alreadyReturned.Add(indexQueryResult.Projection) == false) { min++; // we found a duplicate itemsSkipped++; } } alreadyScannedPositionsForDistinct = min; } if (adjustStart) start += itemsSkipped; return itemsSkipped; }
public override TopFieldDocs Search(Query query, int numHits, Sort sort) { Debug.Assert(sort != null); TopDocs[] shardHits = new TopDocs[NodeVersions.Length]; for (int nodeID = 0; nodeID < NodeVersions.Length; nodeID++) { if (nodeID == MyNodeID) { // My node; run using local shard searcher we // already aquired: shardHits[nodeID] = LocalSearch(query, numHits, sort); } else { shardHits[nodeID] = OuterInstance.OuterInstance.SearchNode(nodeID, NodeVersions, query, sort, numHits, null); } } // Merge: return (TopFieldDocs)TopDocs.Merge(sort, numHits, shardHits); }
public List<Document> GetTopDocuments(TopDocs topDocs) { return topDocs == null ? null : (from ScoreDoc doc in topDocs.ScoreDocs select GetDocument(doc.doc)).ToList(); }
public override TopDocs Rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) { ScoreDoc[] hits = (ScoreDoc[])firstPassTopDocs.ScoreDocs.Clone(); Array.Sort(hits, new ComparerAnonymousInnerClassHelper(this)); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; Weight weight = searcher.CreateNormalizedWeight(query); // Now merge sort docIDs from hits, with reader's leaves: int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; Scorer scorer = null; while (hitUpto < hits.Length) { ScoreDoc hit = hits[hitUpto]; int docID = hit.Doc; AtomicReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++; readerContext = leaves[readerUpto]; endDoc = readerContext.DocBase + readerContext.Reader.MaxDoc; } if (readerContext != null) { // We advanced to another segment: docBase = readerContext.DocBase; scorer = weight.GetScorer(readerContext, null); } int targetDoc = docID - docBase; int actualDoc = scorer.DocID; if (actualDoc < targetDoc) { actualDoc = scorer.Advance(targetDoc); } if (actualDoc == targetDoc) { // Query did match this doc: hit.Score = Combine(hit.Score, true, scorer.GetScore()); } else { // Query did not match this doc: Debug.Assert(actualDoc > targetDoc); hit.Score = Combine(hit.Score, false, 0.0f); } hitUpto++; } // TODO: we should do a partial sort (of only topN) // instead, but typically the number of hits is // smallish: Array.Sort(hits, new ComparerAnonymousInnerClassHelper2(this)); if (topN < hits.Length) { ScoreDoc[] subset = new ScoreDoc[topN]; Array.Copy(hits, 0, subset, 0, topN); hits = subset; } return(new TopDocs(firstPassTopDocs.TotalHits, hits, hits[0].Score)); }
/// <summary> /// Sugar API, calling <see cref="QueryRescorer.Rescore(IndexSearcher, TopDocs, int)"/> using a simple linear /// combination of firstPassScore + <paramref name="weight"/> * secondPassScore /// </summary> public static TopDocs Rescore(IndexSearcher searcher, TopDocs topDocs, Query query, double weight, int topN) { return(new QueryRescorerAnonymousInnerClassHelper(query, weight).Rescore(searcher, topDocs, topN)); }
public virtual void TestRandomQueries() { System.Random rnd = NewRandom(); System.String field = "data"; System.String[] vals = new System.String[] { "1", "2", "3", "4", "5", "6", "A", "Z", "B", "Y", "Z", "X", "foo" }; int maxLev = 4; // callback object to set a random setMinimumNumberShouldMatch TestBoolean2.Callback minNrCB = new AnonymousClassCallback(rnd, this); // increase number of iterations for more complete testing for (int i = 0; i < 1000; i++) { int lev = rnd.Next(maxLev); long seed = rnd.Next(System.Int32.MaxValue); BooleanQuery q1 = TestBoolean2.RandBoolQuery(new System.Random((System.Int32)seed), lev, field, vals, null); // BooleanQuery q2 = TestBoolean2.randBoolQuery(new Random(seed), lev, field, vals, minNrCB); BooleanQuery q2 = TestBoolean2.RandBoolQuery(new System.Random((System.Int32)seed), lev, field, vals, null); // only set minimumNumberShouldMatch on the top level query since setting // at a lower level can change the score. minNrCB.PostCreate(q2); // Can't use Hits because normalized scores will mess things // up. The non-sorting version of search() that returns TopDocs // will not normalize scores. TopDocs top1 = s.Search(q1, null, 100); TopDocs top2 = s.Search(q2, null, 100); QueryUtils.Check(q1, s); QueryUtils.Check(q2, s); // The constrained query // should be a superset to the unconstrained query. if (top2.totalHits > top1.totalHits) { //TestCase.fail("Constrained results not a subset:\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q2.ToString()); Assert.Fail("Constrained results not a subset:\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q2.ToString()); } for (int hit = 0; hit < top2.totalHits; hit++) { int id = top2.scoreDocs[hit].doc; float score = top2.scoreDocs[hit].score; bool found = false; // find this doc in other hits for (int other = 0; other < top1.totalHits; other++) { if (top1.scoreDocs[other].doc == id) { found = true; float otherScore = top1.scoreDocs[other].score; // check if scores match if (System.Math.Abs(otherScore - score) > 1.0e-6f) { //TestCase.fail("Doc " + id + " scores don't match\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q2.ToString()); Assert.Fail("Doc " + id + " scores don't match\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q2.ToString()); } } } // check if subset if (!found) { //TestCase.fail("Doc " + id + " not found\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q2.ToString()); Assert.Fail("Doc " + id + " not found\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q2.ToString()); } } } // System.out.println("Total hits:"+tot); }
/// <summary> /// Same as <see cref="Merge(Sort, int, TopDocs[])"/> but also slices the result at the same time based /// on the provided start and size. The return <c>TopDocs</c> will always have a scoreDocs with length of /// at most <see cref="Util.PriorityQueue{T}.Count"/>. /// </summary> public static TopDocs Merge(Sort sort, int start, int size, TopDocs[] shardHits) { Util.PriorityQueue <ShardRef> queue; if (sort == null) { queue = new ScoreMergeSortQueue(shardHits); } else { queue = new MergeSortQueue(sort, shardHits); } int totalHitCount = 0; int availHitCount = 0; float maxScore = float.MinValue; for (int shardIDX = 0; shardIDX < shardHits.Length; shardIDX++) { TopDocs shard = shardHits[shardIDX]; // totalHits can be non-zero even if no hits were // collected, when searchAfter was used: totalHitCount += shard.TotalHits; if (shard.ScoreDocs != null && shard.ScoreDocs.Length > 0) { availHitCount += shard.ScoreDocs.Length; queue.Add(new ShardRef(shardIDX)); maxScore = Math.Max(maxScore, shard.MaxScore); //System.out.println(" maxScore now " + maxScore + " vs " + shard.getMaxScore()); } } if (availHitCount == 0) { maxScore = float.NaN; } ScoreDoc[] hits; if (availHitCount <= start) { hits = new ScoreDoc[0]; } else { hits = new ScoreDoc[Math.Min(size, availHitCount - start)]; int requestedResultWindow = start + size; int numIterOnHits = Math.Min(availHitCount, requestedResultWindow); int hitUpto = 0; while (hitUpto < numIterOnHits) { Debug.Assert(queue.Count > 0); ShardRef @ref = queue.Pop(); ScoreDoc hit = shardHits[@ref.ShardIndex].ScoreDocs[@ref.HitIndex++]; hit.ShardIndex = @ref.ShardIndex; if (hitUpto >= start) { hits[hitUpto - start] = hit; } //System.out.println(" hitUpto=" + hitUpto); //System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score); hitUpto++; if (@ref.HitIndex < shardHits[@ref.ShardIndex].ScoreDocs.Length) { // Not done with this these TopDocs yet: queue.Add(@ref); } } } if (sort == null) { return(new TopDocs(totalHitCount, hits, maxScore)); } else { return(new TopFieldDocs(totalHitCount, hits, sort.GetSort(), maxScore)); } }
private void TestRandomTrieAndClassicRangeQuery(int precisionStep) { System.Random rnd = NewRandom(); System.String field = "field" + precisionStep; int termCountT = 0, termCountC = 0; for (int i = 0; i < 50; i++) { int lower = (int)(rnd.NextDouble() * noDocs * distance) + startOffset; int upper = (int)(rnd.NextDouble() * noDocs * distance) + startOffset; if (lower > upper) { int a = lower; lower = upper; upper = a; } // test inclusive range System.Int32 tempAux = (System.Int32)lower; System.Int32 tempAux2 = (System.Int32)upper; NumericRangeQuery tq = NumericRangeQuery.NewIntRange(field, precisionStep, tempAux, tempAux2, true, true); TermRangeQuery cq = new TermRangeQuery(field, NumericUtils.IntToPrefixCoded(lower), NumericUtils.IntToPrefixCoded(upper), true, true); TopDocs tTopDocs = searcher.Search(tq, 1); TopDocs cTopDocs = searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.totalHits, tTopDocs.totalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); termCountT += tq.GetTotalNumberOfTerms(); termCountC += cq.GetTotalNumberOfTerms(); // test exclusive range System.Int32 tempAux3 = (System.Int32)lower; System.Int32 tempAux4 = (System.Int32)upper; tq = NumericRangeQuery.NewIntRange(field, precisionStep, tempAux3, tempAux4, false, false); cq = new TermRangeQuery(field, NumericUtils.IntToPrefixCoded(lower), NumericUtils.IntToPrefixCoded(upper), false, false); tTopDocs = searcher.Search(tq, 1); cTopDocs = searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.totalHits, tTopDocs.totalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); termCountT += tq.GetTotalNumberOfTerms(); termCountC += cq.GetTotalNumberOfTerms(); // test left exclusive range System.Int32 tempAux5 = (System.Int32)lower; System.Int32 tempAux6 = (System.Int32)upper; tq = NumericRangeQuery.NewIntRange(field, precisionStep, tempAux5, tempAux6, false, true); cq = new TermRangeQuery(field, NumericUtils.IntToPrefixCoded(lower), NumericUtils.IntToPrefixCoded(upper), false, true); tTopDocs = searcher.Search(tq, 1); cTopDocs = searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.totalHits, tTopDocs.totalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); termCountT += tq.GetTotalNumberOfTerms(); termCountC += cq.GetTotalNumberOfTerms(); // test right exclusive range System.Int32 tempAux7 = (System.Int32)lower; System.Int32 tempAux8 = (System.Int32)upper; tq = NumericRangeQuery.NewIntRange(field, precisionStep, tempAux7, tempAux8, true, false); cq = new TermRangeQuery(field, NumericUtils.IntToPrefixCoded(lower), NumericUtils.IntToPrefixCoded(upper), true, false); tTopDocs = searcher.Search(tq, 1); cTopDocs = searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.totalHits, tTopDocs.totalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); termCountT += tq.GetTotalNumberOfTerms(); termCountC += cq.GetTotalNumberOfTerms(); } if (precisionStep == System.Int32.MaxValue) { Assert.AreEqual(termCountT, termCountC, "Total number of terms should be equal for unlimited precStep"); } else { System.Console.Out.WriteLine("Average number of terms during random search on '" + field + "':"); System.Console.Out.WriteLine(" Trie query: " + (((double)termCountT) / (50 * 4))); System.Console.Out.WriteLine(" Classical query: " + (((double)termCountC) / (50 * 4))); } }
public virtual void TestZeroPosIncr() { Directory dir = new RAMDirectory(); Token[] tokens = new Token[3]; tokens[0] = new Token(); tokens[0].Append("a"); tokens[0].PositionIncrement = 1; tokens[1] = new Token(); tokens[1].Append("b"); tokens[1].PositionIncrement = 0; tokens[2] = new Token(); tokens[2].Append("c"); tokens[2].PositionIncrement = 0; RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new TextField("field", new CannedTokenStream(tokens))); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("field", new CannedTokenStream(tokens))); writer.AddDocument(doc); IndexReader r = writer.GetReader(); writer.Dispose(); IndexSearcher s = NewSearcher(r); MultiPhraseQuery mpq = new MultiPhraseQuery(); //mpq.setSlop(1); // NOTE: not great that if we do the else clause here we // get different scores! MultiPhraseQuery counts that // phrase as occurring twice per doc (it should be 1, I // think?). this is because MultipleTermPositions is able to // return the same position more than once (0, in this // case): if (true) { mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0); mpq.Add(new Term[] { new Term("field", "a") }, 0); } else { #pragma warning disable 162 mpq.Add(new Term[] { new Term("field", "a") }, 0); mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0); #pragma warning restore 162 } TopDocs hits = s.Search(mpq, 2); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual(hits.ScoreDocs[0].Score, hits.ScoreDocs[1].Score, 1e-5); /* * for(int hit=0;hit<hits.TotalHits;hit++) { * ScoreDoc sd = hits.ScoreDocs[hit]; * System.out.println(" hit doc=" + sd.Doc + " score=" + sd.Score); * } */ r.Dispose(); dir.Dispose(); }
public virtual void TestCRTReopen() { //test behaving badly //should be high enough int maxStaleSecs = 20; //build crap data just to store it. string s = " abcdefghijklmnopqrstuvwxyz "; char[] chars = s.ToCharArray(); StringBuilder builder = new StringBuilder(2048); for (int i = 0; i < 2048; i++) { builder.Append(chars[Random.Next(chars.Length)]); } string content = builder.ToString(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); Directory dir = new NRTCachingDirectory(NewFSDirectory(CreateTempDir("nrt")), 5, 128); IndexWriterConfig config = new IndexWriterConfig( #pragma warning disable 612, 618 Version.LUCENE_46, #pragma warning restore 612, 618 new MockAnalyzer(Random)); config.SetIndexDeletionPolicy(sdp); config.SetOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(dir, config); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); TrackingIndexWriter tiw = new TrackingIndexWriter(iw); ControlledRealTimeReopenThread <IndexSearcher> controlledRealTimeReopenThread = new ControlledRealTimeReopenThread <IndexSearcher>(tiw, sm, maxStaleSecs, 0); controlledRealTimeReopenThread.IsBackground = (true); controlledRealTimeReopenThread.Start(); IList <ThreadJob> commitThreads = new List <ThreadJob>(); for (int i = 0; i < 500; i++) { if (i > 0 && i % 50 == 0) { ThreadJob commitThread = new RunnableAnonymousClass(this, sdp, dir, iw); commitThread.Start(); commitThreads.Add(commitThread); } Document d = new Document(); d.Add(new TextField("count", i + "", Field.Store.NO)); d.Add(new TextField("content", content, Field.Store.YES)); long start = Environment.TickCount; long l = tiw.AddDocument(d); controlledRealTimeReopenThread.WaitForGeneration(l); long wait = Environment.TickCount - start; assertTrue("waited too long for generation " + wait, wait < (maxStaleSecs * 1000)); IndexSearcher searcher = sm.Acquire(); TopDocs td = searcher.Search(new TermQuery(new Term("count", i + "")), 10); sm.Release(searcher); assertEquals(1, td.TotalHits); } foreach (ThreadJob commitThread in commitThreads) { commitThread.Join(); } controlledRealTimeReopenThread.Dispose(); sm.Dispose(); iw.Dispose(); dir.Dispose(); }
private void AssertSubsetOfSameScores(Query q, TopDocs top1, TopDocs top2) { // The constrained query // should be a subset to the unconstrained query. if (top2.TotalHits > top1.TotalHits) { Assert.Fail("Constrained results not a subset:\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q.ToString()); } for (int hit = 0; hit < top2.TotalHits; hit++) { int id = top2.ScoreDocs[hit].Doc; float score = top2.ScoreDocs[hit].Score; bool found = false; // find this doc in other hits for (int other = 0; other < top1.TotalHits; other++) { if (top1.ScoreDocs[other].Doc == id) { found = true; float otherScore = top1.ScoreDocs[other].Score; // check if scores match Assert.AreEqual(score, otherScore, CheckHits.ExplainToleranceDelta(score, otherScore), "Doc " + id + " scores don't match\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q.ToString()); } } // check if subset if (!found) { Assert.Fail("Doc " + id + " not found\n" + CheckHits.TopdocsString(top1, 0, 0) + CheckHits.TopdocsString(top2, 0, 0) + "for query:" + q.ToString()); } } }
public virtual void TestEnforceDeletions() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10))); // asserts below requires no unexpected merges: // NOTE: cannot use writer.getReader because RIW (on // flipping a coin) may give us a newly opened reader, // but we use .reopen on this reader below and expect to // (must) get an NRT reader: DirectoryReader reader = DirectoryReader.Open(writer.w, true); // same reason we don't wrap? IndexSearcher searcher = NewSearcher(reader, false); // add a doc, refresh the reader, and check that it's there Document doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1); Assert.AreEqual(1, docs.TotalHits, "Should find a hit..."); Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1"))); CachingWrapperFilter filter = new CachingWrapperFilter(startFilter); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.IsTrue(filter.SizeInBytes() > 0); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); Query constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // make sure we get a cache hit when we reopen reader // that had no change to deletions // fake delete (deletes nothing): writer.DeleteDocuments(new Term("foo", "bar")); IndexReader oldReader = reader; reader = RefreshReader(reader); Assert.IsTrue(reader == oldReader); int missCount = filter.MissCount; docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // cache hit: Assert.AreEqual(missCount, filter.MissCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher(reader, false); missCount = filter.MissCount; docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // cache hit Assert.AreEqual(missCount, filter.MissCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // apply deletes dynamically: filter = new CachingWrapperFilter(startFilter); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); missCount = filter.MissCount; Assert.IsTrue(missCount > 0); constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.MissCount); writer.AddDocument(doc); // NOTE: important to hold ref here so GC doesn't clear // the cache entry! Else the assert below may sometimes // fail: oldReader = reader; reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits..."); Assert.IsTrue(filter.MissCount > missCount); missCount = filter.MissCount; constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.MissCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); reader = RefreshReader(reader); searcher = NewSearcher(reader, false); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.MissCount); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); // CWF reused the same entry (it dynamically applied the deletes): Assert.AreEqual(missCount, filter.MissCount); // NOTE: silliness to make sure JRE does not eliminate // our holding onto oldReader to prevent // CachingWrapperFilter's WeakHashMap from dropping the // entry: Assert.IsTrue(oldReader != null); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestMultithreadedWaitForGeneration() { Thread CreateWorker(int threadNum, ControlledRealTimeReopenThread <IndexSearcher> controlledReopen, long generation, SearcherManager searcherManager, List <ThreadOutput> outputList) { ThreadStart threadStart = delegate { Stopwatch stopwatch = Stopwatch.StartNew(); controlledReopen.WaitForGeneration(generation); stopwatch.Stop(); double milliSecsWaited = stopwatch.Elapsed.TotalMilliseconds; int numRecs = 0; IndexSearcher indexSearcher = searcherManager.Acquire(); try { TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), 1); numRecs = topDocs.TotalHits; } finally { searcherManager.Release(indexSearcher); } lock (outputList) { outputList.Add(new ThreadOutput { ThreadNum = threadNum, NumRecs = numRecs, MilliSecsWaited = milliSecsWaited }); } }; return(new Thread(threadStart)); } int threadCount = 3; List <ThreadOutput> outputList = new List <ThreadOutput>(); RAMDirectory indexDir = new RAMDirectory(); Analyzer standardAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); IndexWriterConfig indexConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, standardAnalyzer); IndexWriter indexWriter = new IndexWriter(indexDir, indexConfig); TrackingIndexWriter trackingWriter = new TrackingIndexWriter(indexWriter); //Add two documents Document doc = new Document(); doc.Add(new Int32Field("id", 1, Field.Store.YES)); doc.Add(new StringField("name", "Doc1", Field.Store.YES)); long generation = trackingWriter.AddDocument(doc); doc.Add(new Int32Field("id", 2, Field.Store.YES)); doc.Add(new StringField("name", "Doc3", Field.Store.YES)); generation = trackingWriter.AddDocument(doc); SearcherManager searcherManager = new SearcherManager(indexWriter, applyAllDeletes: true, null); //Reopen SearcherManager every 2 secs via background thread if no thread waiting for newer generation. //Reopen SearcherManager after .2 secs if another thread IS waiting on a newer generation. double maxRefreshSecs = 2.0; double minRefreshSecs = .2; var controlledRealTimeReopenThread = new ControlledRealTimeReopenThread <IndexSearcher>(trackingWriter, searcherManager, maxRefreshSecs, minRefreshSecs); //Start() will start a seperate thread that will invoke the object's Run(). However, //calling Run() directly would execute that code on the current thread rather then a new thread //which would defeat the purpose of using controlledRealTimeReopenThread. This aspect of the API //is not as intuitive as it could be. ie. Call Start() not Run(). controlledRealTimeReopenThread.IsBackground = true; //Set as a background thread controlledRealTimeReopenThread.Name = "Controlled Real Time Reopen Thread"; controlledRealTimeReopenThread.Priority = (ThreadPriority)Math.Min((int)Thread.CurrentThread.Priority + 2, (int)ThreadPriority.Highest); controlledRealTimeReopenThread.Start(); //Create the threads for doing searchers List <Thread> threadList = new List <Thread>(); for (int i = 1; i <= threadCount; i++) { threadList.Add(CreateWorker(i, controlledRealTimeReopenThread, generation, searcherManager, outputList)); } //Start all the threads foreach (Thread thread in threadList) { thread.Start(); } //wait for the threads to finish. foreach (Thread thread in threadList) { thread.Join(); //will wait here until the thread terminates. } //Now make sure that no thread waited longer then our min refresh time //plus a small fudge factor. Also verify that all threads resported back and //each saw 2 records. //Verify all threads reported back a result. assertEquals(threadCount, outputList.Count); int millisecsPerSec = 1000; foreach (ThreadOutput output in outputList) { //Verify the thread saw exactly 2 docs assertEquals(2, output.NumRecs); //Verify the thread wait time was around what was expected. Assert.True(output.MilliSecsWaited <= (minRefreshSecs * millisecsPerSec) + 30); //30ms is fudged factor to account for call overhead } controlledRealTimeReopenThread.Dispose(); //will kill and join to the thread Assert.False(controlledRealTimeReopenThread.IsAlive); //to prove that Dispose really does kill the thread. searcherManager.Dispose(); indexWriter.Dispose(); indexDir.Dispose(); }
public virtual void TestInfiniteValues() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); doc.Add(new SingleField("float", float.NegativeInfinity, Field.Store.NO)); doc.Add(new Int32Field("int", int.MinValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new SingleField("float", float.PositiveInfinity, Field.Store.NO)); doc.Add(new Int32Field("int", int.MaxValue, Field.Store.NO)); writer.AddDocument(doc); doc = new Document(); doc.Add(new SingleField("float", 0.0f, Field.Store.NO)); doc.Add(new Int32Field("int", 0, Field.Store.NO)); writer.AddDocument(doc); foreach (float f in TestNumericUtils.FLOAT_NANs) { doc = new Document(); doc.Add(new SingleField("float", f, Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); IndexReader r = DirectoryReader.Open(dir); IndexSearcher s = NewSearcher(r); Query q = NumericRangeQuery.NewInt32Range("int", null, null, true, true); TopDocs topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewInt32Range("int", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewInt32Range("int", int.MinValue, int.MaxValue, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewInt32Range("int", int.MinValue, int.MaxValue, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewSingleRange("float", null, null, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewSingleRange("float", null, null, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewSingleRange("float", float.NegativeInfinity, float.PositiveInfinity, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewSingleRange("float", float.NegativeInfinity, float.PositiveInfinity, false, false); topDocs = s.Search(q, 10); Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count"); q = NumericRangeQuery.NewSingleRange("float", float.NaN, float.NaN, true, true); topDocs = s.Search(q, 10); Assert.AreEqual(TestNumericUtils.FLOAT_NANs.Length, topDocs.ScoreDocs.Length, "Score doc count"); r.Dispose(); dir.Dispose(); }
/// <summary> /// Since custom scoring modified the order of docs, map results /// by doc ids so that we can later compare/verify them. /// </summary> /// <param name="td"></param> /// <returns></returns> private IDictionary<int, float> TopDocsToMap(TopDocs td) { var h = new Dictionary<int, float>(); for (int i = 0; i < td.TotalHits; i++) { h[td.ScoreDocs[i].Doc] = td.ScoreDocs[i].Score; } return h; }
private void TestRandomTrieAndClassicRangeQuery(int precisionStep) { string field = "field" + precisionStep; int totalTermCountT = 0, totalTermCountC = 0, termCountT, termCountC; int num = TestUtil.NextInt32(Random, 10, 20); for (int i = 0; i < num; i++) { int lower = (int)(Random.NextDouble() * NoDocs * Distance) + StartOffset; int upper = (int)(Random.NextDouble() * NoDocs * Distance) + StartOffset; if (lower > upper) { int a = lower; lower = upper; upper = a; } BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_INT32), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_INT32); NumericUtils.Int32ToPrefixCodedBytes(lower, 0, lowerBytes); NumericUtils.Int32ToPrefixCodedBytes(upper, 0, upperBytes); // test inclusive range NumericRangeQuery <int> tq = NumericRangeQuery.NewInt32Range(field, precisionStep, lower, upper, true, true); TermRangeQuery cq = new TermRangeQuery(field, lowerBytes, upperBytes, true, true); TopDocs tTopDocs = Searcher.Search(tq, 1); TopDocs cTopDocs = Searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.TotalHits, tTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); totalTermCountT += termCountT = CountTerms(tq); totalTermCountC += termCountC = CountTerms(cq); CheckTermCounts(precisionStep, termCountT, termCountC); // test exclusive range tq = NumericRangeQuery.NewInt32Range(field, precisionStep, lower, upper, false, false); cq = new TermRangeQuery(field, lowerBytes, upperBytes, false, false); tTopDocs = Searcher.Search(tq, 1); cTopDocs = Searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.TotalHits, tTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); totalTermCountT += termCountT = CountTerms(tq); totalTermCountC += termCountC = CountTerms(cq); CheckTermCounts(precisionStep, termCountT, termCountC); // test left exclusive range tq = NumericRangeQuery.NewInt32Range(field, precisionStep, lower, upper, false, true); cq = new TermRangeQuery(field, lowerBytes, upperBytes, false, true); tTopDocs = Searcher.Search(tq, 1); cTopDocs = Searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.TotalHits, tTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); totalTermCountT += termCountT = CountTerms(tq); totalTermCountC += termCountC = CountTerms(cq); CheckTermCounts(precisionStep, termCountT, termCountC); // test right exclusive range tq = NumericRangeQuery.NewInt32Range(field, precisionStep, lower, upper, true, false); cq = new TermRangeQuery(field, lowerBytes, upperBytes, true, false); tTopDocs = Searcher.Search(tq, 1); cTopDocs = Searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.TotalHits, tTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); totalTermCountT += termCountT = CountTerms(tq); totalTermCountC += termCountC = CountTerms(cq); CheckTermCounts(precisionStep, termCountT, termCountC); } CheckTermCounts(precisionStep, totalTermCountT, totalTermCountC); if (VERBOSE && precisionStep != int.MaxValue) { Console.WriteLine("Average number of terms during random search on '" + field + "':"); Console.WriteLine(" Numeric query: " + (((double)totalTermCountT) / (num * 4))); Console.WriteLine(" Classical query: " + (((double)totalTermCountC) / (num * 4))); } }
public virtual void TestRandom() { Directory dir = NewDirectory(); int numDocs = AtLeast(1000); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); int[] idToNum = new int[numDocs]; int maxValue = TestUtil.NextInt32(Random, 10, 1000000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + i, Field.Store.YES)); int numTokens = TestUtil.NextInt32(Random, 1, 10); StringBuilder b = new StringBuilder(); for (int j = 0; j < numTokens; j++) { b.Append("a "); } doc.Add(NewTextField("field", b.ToString(), Field.Store.NO)); idToNum[i] = Random.Next(maxValue); doc.Add(new NumericDocValuesField("num", idToNum[i])); w.AddDocument(doc); } IndexReader r = w.GetReader(); w.Dispose(); IndexSearcher s = NewSearcher(r); int numHits = TestUtil.NextInt32(Random, 1, numDocs); bool reverse = Random.NextBoolean(); TopDocs hits = s.Search(new TermQuery(new Term("field", "a")), numHits); Rescorer rescorer = new SortRescorer(new Sort(new SortField("num", SortFieldType.INT32, reverse))); TopDocs hits2 = rescorer.Rescore(s, hits, numHits); int[] expected = new int[numHits]; for (int i = 0; i < numHits; i++) { expected[i] = hits.ScoreDocs[i].Doc; } int reverseInt = reverse ? -1 : 1; Array.Sort(expected, Comparer <int> .Create((a, b) => { int av = idToNum[Convert.ToInt32(r.Document(a).Get("id"))]; int bv = idToNum[Convert.ToInt32(r.Document(b).Get("id"))]; if (av < bv) { return(-reverseInt); } else if (bv < av) { return(reverseInt); } else { // Tie break by docID, ascending return(a - b); } })); bool fail = false; for (int i = 0; i < numHits; i++) { fail |= (int)expected[i] != hits2.ScoreDocs[i].Doc; } Assert.IsFalse(fail); r.Dispose(); dir.Dispose(); }
public virtual void TestExplain() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("id", "0", Field.Store.YES)); doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); // Do ordinary BooleanQuery: BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = GetSearcher(r); TopDocs hits = searcher.Search(bq, 10); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id")); // Now, resort using PhraseQuery: PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "wizard")); pq.Add(new Term("field", "oz")); Rescorer rescorer = new QueryRescorerAnonymousInnerClassHelper2(this, pq); TopDocs hits2 = rescorer.Rescore(searcher, hits, 10); // Resorting changed the order: Assert.AreEqual(2, hits2.TotalHits); Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id")); int docID = hits2.ScoreDocs[0].Doc; Explanation explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID); string s = explain.ToString(); Assert.IsTrue(s.Contains("TestQueryRescorer+")); Assert.IsTrue(s.Contains("combined first and second pass score")); Assert.IsTrue(s.Contains("first pass score")); Assert.IsTrue(s.Contains("= second pass score")); Assert.AreEqual(hits2.ScoreDocs[0].Score, explain.Value, 0.0f); docID = hits2.ScoreDocs[1].Doc; explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID); s = explain.ToString(); Assert.IsTrue(s.Contains("TestQueryRescorer+")); Assert.IsTrue(s.Contains("combined first and second pass score")); Assert.IsTrue(s.Contains("first pass score")); Assert.IsTrue(s.Contains("no second pass score")); Assert.IsFalse(s.Contains("= second pass score")); Assert.IsTrue(s.Contains("NON-MATCH")); Assert.IsTrue(Math.Abs(hits2.ScoreDocs[1].Score - explain.Value) < 0.0000001f); r.Dispose(); dir.Dispose(); }
public static System.String TopdocsString(TopDocs docs, int start, int end) { System.Text.StringBuilder sb = new System.Text.StringBuilder(); sb.Append("TopDocs totalHits=").Append(docs.totalHits).Append(" top=").Append(docs.scoreDocs.Length).Append('\n'); if (end <= 0) end = docs.scoreDocs.Length; else end = System.Math.Min(end, docs.scoreDocs.Length); for (int i = start; i < end; i++) { sb.Append('\t'); sb.Append(i); sb.Append(") doc="); sb.Append(docs.scoreDocs[i].doc); sb.Append("\tscore="); sb.Append(docs.scoreDocs[i].score); sb.Append('\n'); } return sb.ToString(); }
public virtual void TestRandom() { Directory dir = NewDirectory(); int numDocs = AtLeast(1000); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int[] idToNum = new int[numDocs]; int maxValue = TestUtil.NextInt(Random(), 10, 1000000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + i, Field.Store.YES)); int numTokens = TestUtil.NextInt(Random(), 1, 10); StringBuilder b = new StringBuilder(); for (int j = 0; j < numTokens; j++) { b.Append("a "); } doc.Add(NewTextField("field", b.ToString(), Field.Store.NO)); idToNum[i] = Random().Next(maxValue); doc.Add(new NumericDocValuesField("num", idToNum[i])); w.AddDocument(doc); } IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); int numHits = TestUtil.NextInt(Random(), 1, numDocs); bool reverse = Random().NextBoolean(); //System.out.println("numHits=" + numHits + " reverse=" + reverse); TopDocs hits = s.Search(new TermQuery(new Term("field", "a")), numHits); TopDocs hits2 = new QueryRescorerAnonymousInnerClassHelper3(this, new FixedScoreQuery(idToNum, reverse)) .Rescore(s, hits, numHits); int[] expected = new int[numHits]; for (int i = 0; i < numHits; i++) { expected[i] = hits.ScoreDocs[i].Doc; } int reverseInt = reverse ? -1 : 1; Array.Sort(expected, new ComparerAnonymousInnerClassHelper(this, idToNum, r, reverseInt)); bool fail = false; for (int i = 0; i < numHits; i++) { //System.out.println("expected=" + expected[i] + " vs " + hits2.ScoreDocs[i].Doc + " v=" + idToNum[Integer.parseInt(r.Document(expected[i]).Get("id"))]); if ((int)expected[i] != hits2.ScoreDocs[i].Doc) { //System.out.println(" diff!"); fail = true; } } Assert.IsFalse(fail); r.Dispose(); dir.Dispose(); }
private static string MakeResults(IndexSearcher searcher, TopDocs topDocs, int skip, int take, bool includeExplanation, Query query, long elapsed, IDictionary<string, int> rankings, PackageSearcherManager manager) { // note the use of a StringBuilder because we have the response data already formatted as JSON in the fields in the index StringBuilder strBldr = new StringBuilder(); string timestamp; if (!searcher.IndexReader.CommitUserData.TryGetValue("commit-time-stamp", out timestamp)) { timestamp = null; } strBldr.AppendFormat("{{\"totalHits\":{0},\"timeTakenInMs\":{1},\"index\":\"{2}\"", topDocs.TotalHits, elapsed, manager.IndexName); if (!String.IsNullOrEmpty(timestamp)) { strBldr.AppendFormat(",\"indexTimestamp\":\"{0}\"", timestamp); } if (includeExplanation) { // JsonConvert.Serialize does escaping and quoting. strBldr.AppendFormat(",\"executedQuery\":{0}", Newtonsoft.Json.JsonConvert.SerializeObject(query.ToString())); } strBldr.Append(",\"data\":["); bool hasResult = false; for (int i = skip; i < topDocs.ScoreDocs.Length; i++) { ScoreDoc scoreDoc = topDocs.ScoreDocs[i]; Document doc = searcher.Doc(scoreDoc.Doc); string data = doc.Get("Data"); string id = doc.Get("Id"); NuGet.Versioning.NuGetVersion ngVersion = new Versioning.NuGetVersion(doc.Get("Version")); if (!String.IsNullOrEmpty(id) && ngVersion != null) { Tuple<int,int> countRecord = manager.GetDownloadCount(id,ngVersion.ToNormalizedString()); if (countRecord != null) { // Patch the data in to the JSON JObject parsed = JObject.Parse(data); parsed["DownloadCount"] = countRecord.Item1; parsed["PackageRegistration"]["DownloadCount"] = countRecord.Item2; data = parsed.ToString(Formatting.None); } } if (includeExplanation) { data = AddExplanation(searcher, data, query, scoreDoc, rankings); } strBldr.Append(data); strBldr.Append(","); hasResult = true; } if (hasResult) { strBldr.Remove(strBldr.Length - 1, 1); } strBldr.Append("]}"); string result = strBldr.ToString(); return result; }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewStringField("id", "0", Field.Store.YES)); doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); // Do ordinary BooleanQuery: BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = GetSearcher(r); searcher.Similarity = new DefaultSimilarity(); TopDocs hits = searcher.Search(bq, 10); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id")); // Now, resort using PhraseQuery: PhraseQuery pq = new PhraseQuery(); pq.Slop = 5; pq.Add(new Term("field", "wizard")); pq.Add(new Term("field", "oz")); TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits2.TotalHits); Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id")); // Resort using SpanNearQuery: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard")); SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz")); SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true); TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits3.TotalHits); Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id")); r.Dispose(); dir.Dispose(); }
public override void Run() { try { docs = (sort == null)?searchable.Search(weight, filter, nDocs):searchable.Search(weight, filter, nDocs, sort); } // Store the IOException for later use by the caller of this thread catch (System.IO.IOException ioe) { this.ioe = ioe; } if (this.ioe == null) { // if we are sorting by fields, we need to tell the field sorted hit queue // the actual type of fields, in case the original list contained AUTO. // if the searchable returns null for fields, we'll have problems. if (sort != null) { TopFieldDocs docsFields = (TopFieldDocs) docs; // If one of the Sort fields is FIELD_DOC, need to fix its values, so that // it will break ties by doc Id properly. Otherwise, it will compare to // 'relative' doc Ids, that belong to two different searchables. for (int j = 0; j < docsFields.fields.Length; j++) { if (docsFields.fields[j].GetType() == SortField.DOC) { // iterate over the score docs and change their fields value for (int j2 = 0; j2 < docs.scoreDocs.Length; j2++) { FieldDoc fd = (FieldDoc) docs.scoreDocs[j2]; fd.fields[j] = (System.Int32) (((System.Int32) fd.fields[j]) + starts[i]); } break; } } ((FieldDocSortedHitQueue) hq).SetFields(docsFields.fields); } ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; j < scoreDocs.Length; j++) { // merge scoreDocs into hq ScoreDoc scoreDoc = scoreDocs[j]; scoreDoc.doc += starts[i]; // convert doc //it would be so nice if we had a thread-safe insert lock (hq) { if (!hq.Insert(scoreDoc)) break; } // no more scores > minScore } } }
internal virtual void TestSort(bool useFrom, bool VERBOSE) { IndexReader reader = null; Directory dir = null; if (!VERBOSE) { Console.WriteLine("Verbosity disabled. Enable manually if needed."); } int numDocs = VERBOSE ? AtLeast(50) : AtLeast(1000); //final int numDocs = AtLeast(50); string[] tokens = new string[] { "a", "b", "c", "d", "e" }; if (VERBOSE) { Console.WriteLine("TEST: make index"); } { dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); // w.setDoRandomForceMerge(false); // w.w.getConfig().SetMaxBufferedDocs(AtLeast(100)); string[] content = new string[AtLeast(20)]; for (int contentIDX = 0; contentIDX < content.Length; contentIDX++) { StringBuilder sb = new StringBuilder(); int numTokens = TestUtil.NextInt32(Random, 1, 10); for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) { sb.Append(tokens[Random.Next(tokens.Length)]).Append(' '); } content[contentIDX] = sb.ToString(); } for (int docIDX = 0; docIDX < numDocs; docIDX++) { Document doc = new Document(); doc.Add(NewStringField("string", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO)); doc.Add(NewTextField("text", content[Random.Next(content.Length)], Field.Store.NO)); doc.Add(new SingleField("float", (float)Random.NextDouble(), Field.Store.NO)); int intValue; if (Random.Next(100) == 17) { intValue = int.MinValue; } else if (Random.Next(100) == 17) { intValue = int.MaxValue; } else { intValue = Random.Next(); } doc.Add(new Int32Field("int", intValue, Field.Store.NO)); if (VERBOSE) { Console.WriteLine(" doc=" + doc); } w.AddDocument(doc); } reader = w.GetReader(); w.Dispose(); } // NOTE: sometimes reader has just one segment, which is // important to test IndexSearcher searcher = NewSearcher(reader); IndexReaderContext ctx = searcher.TopReaderContext; ShardSearcher[] subSearchers; int[] docStarts; if (ctx is AtomicReaderContext) { subSearchers = new ShardSearcher[1]; docStarts = new int[1]; subSearchers[0] = new ShardSearcher((AtomicReaderContext)ctx, ctx); docStarts[0] = 0; } else { CompositeReaderContext compCTX = (CompositeReaderContext)ctx; int size = compCTX.Leaves.Count; subSearchers = new ShardSearcher[size]; docStarts = new int[size]; int docBase = 0; for (int searcherIDX = 0; searcherIDX < subSearchers.Length; searcherIDX++) { AtomicReaderContext leave = compCTX.Leaves[searcherIDX]; subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX); docStarts[searcherIDX] = docBase; docBase += leave.Reader.MaxDoc; } } IList <SortField> sortFields = new List <SortField>(); sortFields.Add(new SortField("string", SortFieldType.STRING, true)); sortFields.Add(new SortField("string", SortFieldType.STRING, false)); sortFields.Add(new SortField("int", SortFieldType.INT32, true)); sortFields.Add(new SortField("int", SortFieldType.INT32, false)); sortFields.Add(new SortField("float", SortFieldType.SINGLE, true)); sortFields.Add(new SortField("float", SortFieldType.SINGLE, false)); sortFields.Add(new SortField(null, SortFieldType.SCORE, true)); sortFields.Add(new SortField(null, SortFieldType.SCORE, false)); sortFields.Add(new SortField(null, SortFieldType.DOC, true)); sortFields.Add(new SortField(null, SortFieldType.DOC, false)); for (int iter = 0; iter < 1000 * RANDOM_MULTIPLIER; iter++) { // TODO: custom FieldComp... Query query = new TermQuery(new Term("text", tokens[Random.Next(tokens.Length)])); Sort sort; if (Random.Next(10) == 4) { // Sort by score sort = null; } else { SortField[] randomSortFields = new SortField[TestUtil.NextInt32(Random, 1, 3)]; for (int sortIDX = 0; sortIDX < randomSortFields.Length; sortIDX++) { randomSortFields[sortIDX] = sortFields[Random.Next(sortFields.Count)]; } sort = new Sort(randomSortFields); } int numHits = TestUtil.NextInt32(Random, 1, numDocs + 5); //final int numHits = 5; if (VERBOSE) { Console.WriteLine("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits); } int from = -1; int size = -1; // First search on whole index: TopDocs topHits; if (sort == null) { if (useFrom) { TopScoreDocCollector c = TopScoreDocCollector.Create(numHits, Random.NextBoolean()); searcher.Search(query, c); from = TestUtil.NextInt32(Random, 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.GetTopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = searcher.Search(query, numHits); } } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean()); searcher.Search(query, c); if (useFrom) { from = TestUtil.NextInt32(Random, 0, numHits - 1); size = numHits - from; TopDocs tempTopHits = c.GetTopDocs(); if (from < tempTopHits.ScoreDocs.Length) { // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount // than TopDocs#merge currently has ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)]; Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length); tempTopHits.ScoreDocs = newScoreDocs; topHits = tempTopHits; } else { topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore); } } else { topHits = c.GetTopDocs(0, numHits); } } if (VERBOSE) { if (useFrom) { Console.WriteLine("from=" + from + " size=" + size); } Console.WriteLine(" top search: " + topHits.TotalHits + " totalHits; hits=" + (topHits.ScoreDocs == null ? "null" : topHits.ScoreDocs.Length + " maxScore=" + topHits.MaxScore)); if (topHits.ScoreDocs != null) { for (int hitIDX = 0; hitIDX < topHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = topHits.ScoreDocs[hitIDX]; Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } // ... then all shards: Weight w = searcher.CreateNormalizedWeight(query); TopDocs[] shardHits = new TopDocs[subSearchers.Length]; for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++) { TopDocs subHits; ShardSearcher subSearcher = subSearchers[shardIDX]; if (sort == null) { subHits = subSearcher.Search(w, numHits); } else { TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean()); subSearcher.Search(w, c); subHits = c.GetTopDocs(0, numHits); } shardHits[shardIDX] = subHits; if (VERBOSE) { Console.WriteLine(" shard=" + shardIDX + " " + subHits.TotalHits + " totalHits hits=" + (subHits.ScoreDocs == null ? "null" : subHits.ScoreDocs.Length.ToString())); if (subHits.ScoreDocs != null) { foreach (ScoreDoc sd in subHits.ScoreDocs) { Console.WriteLine(" doc=" + sd.Doc + " score=" + sd.Score); } } } } // Merge: TopDocs mergedHits; if (useFrom) { mergedHits = TopDocs.Merge(sort, from, size, shardHits); } else { mergedHits = TopDocs.Merge(sort, numHits, shardHits); } if (mergedHits.ScoreDocs != null) { // Make sure the returned shards are correct: for (int hitIDX = 0; hitIDX < mergedHits.ScoreDocs.Length; hitIDX++) { ScoreDoc sd = mergedHits.ScoreDocs[hitIDX]; Assert.AreEqual(ReaderUtil.SubIndex(sd.Doc, docStarts), sd.ShardIndex, "doc=" + sd.Doc + " wrong shard"); } } TestUtil.AssertEquals(topHits, mergedHits); } reader.Dispose(); dir.Dispose(); }
public static string TopdocsString(TopDocs docs, int start, int end) { StringBuilder sb = new StringBuilder(); sb.Append("TopDocs totalHits=").Append(docs.TotalHits).Append(" top=").Append(docs.ScoreDocs.Length).Append('\n'); if (end <= 0) { end = docs.ScoreDocs.Length; } else { end = Math.Min(end, docs.ScoreDocs.Length); } for (int i = start; i < end; i++) { sb.Append('\t'); sb.Append(i); sb.Append(") doc="); sb.Append(docs.ScoreDocs[i].Doc); sb.Append("\tscore="); sb.Append(docs.ScoreDocs[i].Score); sb.Append('\n'); } return sb.ToString(); }
public virtual void TestRandomQueries() { string[] vals = new string[] { "w1", "w2", "w3", "w4", "w5", "xx", "yy", "zzz" }; int tot = 0; BooleanQuery q1 = null; try { // increase number of iterations for more complete testing int num = AtLeast(20); for (int i = 0; i < num; i++) { int level = Random.Next(3); q1 = RandBoolQuery(new Random(Random.Next()), Random.NextBoolean(), level, field, vals, null); // Can't sort by relevance since floating point numbers may not quite // match up. Sort sort = Sort.INDEXORDER; QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, q1, Searcher); // baseline sim try { // a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop. Searcher.Similarity = BigSearcher.Similarity; // random sim QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, q1, Searcher); } finally { Searcher.Similarity = new DefaultSimilarity(); // restore } TopFieldCollector collector = TopFieldCollector.Create(sort, 1000, false, true, true, true); Searcher.Search(q1, null, collector); ScoreDoc[] hits1 = collector.GetTopDocs().ScoreDocs; collector = TopFieldCollector.Create(sort, 1000, false, true, true, false); Searcher.Search(q1, null, collector); ScoreDoc[] hits2 = collector.GetTopDocs().ScoreDocs; tot += hits2.Length; CheckHits.CheckEqual(q1, hits1, hits2); BooleanQuery q3 = new BooleanQuery(); q3.Add(q1, Occur.SHOULD); q3.Add(new PrefixQuery(new Term("field2", "b")), Occur.SHOULD); TopDocs hits4 = BigSearcher.Search(q3, 1); Assert.AreEqual(MulFactor * collector.TotalHits + NUM_EXTRA_DOCS / 2, hits4.TotalHits); } } catch (Exception) { // For easier debugging Console.WriteLine("failed query: " + q1); throw; } // System.out.println("Total hits:"+tot); }
public void TestEnforceDeletions() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); IndexReader reader = writer.GetReader(); IndexSearcher searcher = new IndexSearcher(reader); // add a doc, refresh the reader, and check that its there Document doc = new Document(); doc.Add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = new IndexSearcher(reader); TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1); Assert.AreEqual(1, docs.TotalHits, "Should find a hit..."); SpanFilter startFilter = new SpanQueryFilter(new SpanTermQuery(new Term("id", "1"))); // ignore deletions CachingSpanFilter filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.IGNORE); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); reader = RefreshReader(reader); searcher = new IndexSearcher(reader); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // force cache to regenerate: filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = new IndexSearcher(reader); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // make sure we get a cache hit when we reopen readers // that had no new deletions IndexReader newReader = RefreshReader(reader); Assert.IsTrue(reader != newReader); reader = newReader; searcher = new IndexSearcher(reader); int missCount = filter.missCount; docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.missCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); reader = RefreshReader(reader); searcher = new IndexSearcher(reader); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); }
private void CompareChildHits(IndexReader r, IndexReader joinR, TopDocs results, TopDocs joinResults) { assertEquals(results.TotalHits, joinResults.TotalHits); assertEquals(results.ScoreDocs.Length, joinResults.ScoreDocs.Length); for (int hitCount = 0; hitCount < results.ScoreDocs.Length; hitCount++) { ScoreDoc hit = results.ScoreDocs[hitCount]; ScoreDoc joinHit = joinResults.ScoreDocs[hitCount]; Document doc1 = r.Document(hit.Doc); Document doc2 = joinR.Document(joinHit.Doc); assertEquals("hit " + hitCount + " differs", doc1.Get("childID"), doc2.Get("childID")); // don't compare scores -- they are expected to differ assertTrue(hit is FieldDoc); assertTrue(joinHit is FieldDoc); FieldDoc hit0 = (FieldDoc)hit; FieldDoc joinHit0 = (FieldDoc)joinHit; assertArrayEquals(hit0.Fields, joinHit0.Fields); } }
private void Compare(TopDocs oldHits, TopDocs newHits) { Assert.AreEqual(oldHits.totalHits, newHits.totalHits); Assert.AreEqual(oldHits.scoreDocs.Length, newHits.scoreDocs.Length); ScoreDoc[] oldDocs = oldHits.scoreDocs; ScoreDoc[] newDocs = newHits.scoreDocs; for (int i = 0; i < oldDocs.Length; i++) { if (oldDocs[i] is FieldDoc) { System.Diagnostics.Debug.Assert(newDocs[i] is FieldDoc); FieldDoc oldHit = (FieldDoc) oldDocs[i]; FieldDoc newHit = (FieldDoc) newDocs[i]; Assert.AreEqual(oldHit.doc, newHit.doc, "hit " + i + " of " + oldDocs.Length + " differs: oldDoc=" + oldHit.doc + " vs newDoc=" + newHit.doc + " oldFields=" + _TestUtil.ArrayToString(oldHit.fields) + " newFields=" + _TestUtil.ArrayToString(newHit.fields)); Assert.AreEqual(oldHit.score, newHit.score, 0.00001); Assert.IsTrue(SupportClass.CollectionsHelper.Equals(oldHit.fields, newHit.fields)); } else { ScoreDoc oldHit = oldDocs[i]; ScoreDoc newHit = newDocs[i]; Assert.AreEqual(oldHit.doc, newHit.doc); Assert.AreEqual(oldHit.score, newHit.score, 0.00001); } } }
private IEnumerable<IAuditEntry> getResults(TopDocs ids, int page, IndexSearcher searcher) { int skip = page*20; return ids.ScoreDocs.Reverse().Skip(skip).Take(20).Select(x => new BasicAuditEntry(searcher.Doc(x.Doc), x.Doc)); }
// since custom scoring modifies the order of docs, map results // by doc ids so that we can later compare/verify them private System.Collections.Hashtable TopDocsToMap(TopDocs td) { System.Collections.Hashtable h = new System.Collections.Hashtable(); for (int i = 0; i < td.TotalHits; i++) { h[(System.Int32) td.ScoreDocs[i].Doc] = (float) td.ScoreDocs[i].Score; } return h; }
public void TestStraightForwardDemonstration() { RAMDirectory indexDir = new RAMDirectory(); Analyzer standardAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); IndexWriterConfig indexConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, standardAnalyzer); IndexWriter indexWriter = new IndexWriter(indexDir, indexConfig); TrackingIndexWriter trackingWriter = new TrackingIndexWriter(indexWriter); Document doc = new Document(); doc.Add(new Int32Field("id", 1, Field.Store.YES)); doc.Add(new StringField("name", "Doc1", Field.Store.YES)); trackingWriter.AddDocument(doc); SearcherManager searcherManager = new SearcherManager(indexWriter, applyAllDeletes: true, null); //Reopen SearcherManager every 1 secs via background thread if no thread waiting for newer generation. //Reopen SearcherManager after .2 secs if another thread IS waiting on a newer generation. var controlledRealTimeReopenThread = new ControlledRealTimeReopenThread <IndexSearcher>(trackingWriter, searcherManager, 1, 0.2); //Start() will start a seperate thread that will invoke the object's Run(). However, //calling Run() directly would execute that code on the current thread rather then a new thread //which would defeat the purpose of using controlledRealTimeReopenThread. This aspect of the API //is not as intuitive as it could be. ie. Call Start() not Run(). controlledRealTimeReopenThread.IsBackground = true; //Set as a background thread controlledRealTimeReopenThread.Name = "Controlled Real Time Reopen Thread"; controlledRealTimeReopenThread.Priority = (ThreadPriority)Math.Min((int)Thread.CurrentThread.Priority + 2, (int)ThreadPriority.Highest); controlledRealTimeReopenThread.Start(); //An indexSearcher only sees Doc1 IndexSearcher indexSearcher = searcherManager.Acquire(); try { TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), 1); assertEquals(1, topDocs.TotalHits); //There is only one doc } finally { searcherManager.Release(indexSearcher); } //Add a 2nd document doc = new Document(); doc.Add(new Int32Field("id", 2, Field.Store.YES)); doc.Add(new StringField("name", "Doc2", Field.Store.YES)); trackingWriter.AddDocument(doc); //Demonstrate that we can only see the first doc because we haven't //waited 1 sec or called WaitForGeneration indexSearcher = searcherManager.Acquire(); try { TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), 1); assertEquals(1, topDocs.TotalHits); //Can see both docs due to auto refresh after 1.1 secs } finally { searcherManager.Release(indexSearcher); } //Demonstrate that we can see both docs after we wait a little more //then 1 sec so that controlledRealTimeReopenThread max interval is exceeded //and it calls MaybeRefresh Thread.Sleep(1100); //wait 1.1 secs as ms indexSearcher = searcherManager.Acquire(); try { TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), 1); assertEquals(2, topDocs.TotalHits); //Can see both docs due to auto refresh after 1.1 secs } finally { searcherManager.Release(indexSearcher); } //Add a 3rd document doc = new Document(); doc.Add(new Int32Field("id", 3, Field.Store.YES)); doc.Add(new StringField("name", "Doc3", Field.Store.YES)); long generation = trackingWriter.AddDocument(doc); //Demonstrate that if we call WaitForGeneration our wait will be // .2 secs or less (the min interval we set earlier) and then we will //see all 3 documents. Stopwatch stopwatch = Stopwatch.StartNew(); controlledRealTimeReopenThread.WaitForGeneration(generation); stopwatch.Stop(); assertTrue(stopwatch.Elapsed.TotalMilliseconds <= 200 + 30); //30ms is fudged factor to account for call overhead. indexSearcher = searcherManager.Acquire(); try { TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), 1); assertEquals(3, topDocs.TotalHits); //Can see both docs due to auto refresh after 1.1 secs } finally { searcherManager.Release(indexSearcher); } controlledRealTimeReopenThread.Dispose(); searcherManager.Dispose(); indexWriter.Dispose(); indexDir.Dispose(); }
private void CompareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<int> joinResults) { // results is 'complete'; joinResults is a subset int resultUpto = 0; int joinGroupUpto = 0; ScoreDoc[] hits = results.ScoreDocs; IGroupDocs<int>[] groupDocs = joinResults.Groups; while (joinGroupUpto < groupDocs.Length) { IGroupDocs<int> group = groupDocs[joinGroupUpto++]; ScoreDoc[] groupHits = group.ScoreDocs; assertNotNull(group.GroupValue); Document parentDoc = joinR.Document(group.GroupValue); string parentID = parentDoc.Get("parentID"); //System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc); assertNotNull(parentID); assertTrue(groupHits.Length > 0); for (int hitIDX = 0; hitIDX < groupHits.Length; hitIDX++) { Document nonJoinHit = r.Document(hits[resultUpto++].Doc); Document joinHit = joinR.Document(groupHits[hitIDX].Doc); assertEquals(parentID, nonJoinHit.Get("parentID")); assertEquals(joinHit.Get("childID"), nonJoinHit.Get("childID")); } if (joinGroupUpto < groupDocs.Length) { // Advance non-join hit to the next parentID: //System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.Length=" + groupDocs.Length + " parentID=" + parentID); while (true) { assertTrue(resultUpto < hits.Length); if (!parentID.Equals(r.Document(hits[resultUpto].Doc).Get("parentID"))) { break; } resultUpto++; } } } }
public virtual void TestRandom() { Directory dir = NewDirectory(); int numDocs = AtLeast(1000); RandomIndexWriter w = new RandomIndexWriter(Random, dir); int[] idToNum = new int[numDocs]; int maxValue = TestUtil.NextInt32(Random, 10, 1000000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + i, Field.Store.YES)); int numTokens = TestUtil.NextInt32(Random, 1, 10); StringBuilder b = new StringBuilder(); for (int j = 0; j < numTokens; j++) { b.Append("a "); } doc.Add(NewTextField("field", b.ToString(), Field.Store.NO)); idToNum[i] = Random.Next(maxValue); doc.Add(new NumericDocValuesField("num", idToNum[i])); w.AddDocument(doc); } IndexReader r = w.GetReader(); w.Dispose(); IndexSearcher s = NewSearcher(r); int numHits = TestUtil.NextInt32(Random, 1, numDocs); bool reverse = Random.NextBoolean(); //System.out.println("numHits=" + numHits + " reverse=" + reverse); TopDocs hits = s.Search(new TermQuery(new Term("field", "a")), numHits); TopDocs hits2 = new QueryRescorerAnonymousClass3(this, new FixedScoreQuery(idToNum, reverse)) .Rescore(s, hits, numHits); int[] expected = new int[numHits]; for (int i = 0; i < numHits; i++) { expected[i] = hits.ScoreDocs[i].Doc; } int reverseInt = reverse ? -1 : 1; Array.Sort(expected, Comparer <int> .Create((a, b) => { try { int av = idToNum[Convert.ToInt32(r.Document(a).Get("id"))]; int bv = idToNum[Convert.ToInt32(r.Document(b).Get("id"))]; if (av < bv) { return(-reverseInt); } else if (bv < av) { return(reverseInt); } else { // Tie break by docID, ascending return(a - b); } } catch (Exception ioe) when(ioe.IsIOException()) { throw RuntimeException.Create(ioe); } }) ); bool fail = false; for (int i = 0; i < numHits; i++) { //System.out.println("expected=" + expected[i] + " vs " + hits2.ScoreDocs[i].Doc + " v=" + idToNum[Integer.parseInt(r.Document(expected[i]).Get("id"))]); if ((int)expected[i] != hits2.ScoreDocs[i].Doc) { //System.out.println(" diff!"); fail = true; } } Assert.IsFalse(fail); r.Dispose(); dir.Dispose(); }
private void RecordResultsAlreadySeenForDistinctQuery(IndexSearcher indexSearcher, TopDocs search, int start) { if (fieldsToFetch.IsDistinctQuery == false) return; // add results that were already there in previous pages var min = Math.Min(start, search.TotalHits); for (int i = 0; i < min; i++) { Document document = indexSearcher.Doc(search.ScoreDocs[i].doc); var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i].score); alreadyReturned.Add(indexQueryResult.Projection); } }
public virtual void TestRandomPhrases() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy())); IList <IList <string> > docs = new List <IList <string> >(); Documents.Document d = new Documents.Document(); Field f = NewTextField("f", "", Field.Store.NO); d.Add(f); Random r = Random; int NUM_DOCS = AtLeast(10); for (int i = 0; i < NUM_DOCS; i++) { // must be > 4096 so it spans multiple chunks int termCount = TestUtil.NextInt32(Random, 4097, 8200); IList <string> doc = new List <string>(); StringBuilder sb = new StringBuilder(); while (doc.Count < termCount) { if (r.Next(5) == 1 || docs.Count == 0) { // make new non-empty-string term string term; while (true) { term = TestUtil.RandomUnicodeString(r); if (term.Length > 0) { break; } } IOException priorException = null; TokenStream ts = analyzer.GetTokenStream("ignore", new StringReader(term)); try { ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { string text = termAttr.ToString(); doc.Add(text); sb.Append(text).Append(' '); } ts.End(); } catch (IOException e) { priorException = e; } finally { IOUtils.DisposeWhileHandlingException(priorException, ts); } } else { // pick existing sub-phrase IList <string> lastDoc = docs[r.Next(docs.Count)]; int len = TestUtil.NextInt32(r, 1, 10); int start = r.Next(lastDoc.Count - len); for (int k = start; k < start + len; k++) { string t = lastDoc[k]; doc.Add(t); sb.Append(t).Append(' '); } } } docs.Add(doc); f.SetStringValue(sb.ToString()); w.AddDocument(d); } IndexReader reader = w.GetReader(); IndexSearcher s = NewSearcher(reader); w.Dispose(); // now search int num = AtLeast(10); for (int i = 0; i < num; i++) { int docID = r.Next(docs.Count); IList <string> doc = docs[docID]; int numTerm = TestUtil.NextInt32(r, 2, 20); int start = r.Next(doc.Count - numTerm); PhraseQuery pq = new PhraseQuery(); StringBuilder sb = new StringBuilder(); for (int t = start; t < start + numTerm; t++) { pq.Add(new Term("f", doc[t])); sb.Append(doc[t]).Append(' '); } TopDocs hits = s.Search(pq, NUM_DOCS); bool found = false; for (int j = 0; j < hits.ScoreDocs.Length; j++) { if (hits.ScoreDocs[j].Doc == docID) { found = true; break; } } Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start); } reader.Dispose(); dir.Dispose(); }
public override TopDocs SearchAfter(ScoreDoc after, Query query, int numHits) { TopDocs[] shardHits = new TopDocs[NodeVersions.Length]; // results are merged in that order: score, shardIndex, doc. therefore we set // after to after.Score and depending on the nodeID we set doc to either: // - not collect any more documents with that score (only with worse score) // - collect more documents with that score (and worse) following the last collected document // - collect all documents with that score (and worse) ScoreDoc shardAfter = new ScoreDoc(after.Doc, after.Score); for (int nodeID = 0; nodeID < NodeVersions.Length; nodeID++) { if (nodeID < after.ShardIndex) { // all documents with after.Score were already collected, so collect // only documents with worse scores. NodeState.ShardIndexSearcher s = OuterInstance.OuterInstance.Nodes[nodeID].Acquire(NodeVersions); try { // Setting after.Doc to reader.MaxDoc-1 is a way to tell // TopScoreDocCollector that no more docs with that score should // be collected. note that in practice the shard which sends the // request to a remote shard won't have reader.MaxDoc at hand, so // it will send some arbitrary value which will be fixed on the // other end. shardAfter.Doc = s.IndexReader.MaxDoc - 1; } finally { OuterInstance.OuterInstance.Nodes[nodeID].Release(s); } } else if (nodeID == after.ShardIndex) { // collect all documents following the last collected doc with // after.Score + documents with worse scores. shardAfter.Doc = after.Doc; } else { // all documents with after.Score (and worse) should be collected // because they didn't make it to top-N in the previous round. shardAfter.Doc = -1; } if (nodeID == MyNodeID) { // My node; run using local shard searcher we // already aquired: shardHits[nodeID] = LocalSearchAfter(shardAfter, query, numHits); } else { shardHits[nodeID] = OuterInstance.OuterInstance.SearchNode(nodeID, NodeVersions, query, null, numHits, shardAfter); } //System.out.println(" node=" + nodeID + " totHits=" + shardHits[nodeID].TotalHits); } // Merge: return TopDocs.Merge(null, numHits, shardHits); }
static JObject AutoCompleteMakeVersionResult(NuGetSearcherManager searcherManager, bool includePrerelease, TopDocs topDocs) { var searcher = searcherManager.Get(); try { JObject result = new JObject(); result.Add("@context", new JObject { { "@vocab", "http://schema.nuget.org/schema#" } }); result.Add("indexName", searcherManager.IndexName); if (topDocs.TotalHits > 0) { ScoreDoc scoreDoc = topDocs.ScoreDocs[0]; JArray versions = searcherManager.GetVersionLists(searcher, scoreDoc.Doc); result.Add("totalHits", versions.Count()); result["data"] = versions; } else { result.Add("totalHits", 0); result["data"] = new JArray(); } return result; } finally { searcherManager.Release(searcher); } }
private void RecordResultsAlreadySeenForDistinctQuery(IndexSearcher indexSearcher, TopDocs search, int start, int pageSize) { var min = Math.Min(start, search.TotalHits); // we are paging, we need to check that we don't have duplicates in the previous page // see here for details: http://groups.google.com/group/ravendb/browse_frm/thread/d71c44aa9e2a7c6e if (parent.IsMapReduce == false && fieldsToFetch.IsProjection == false && start - pageSize >= 0 && start < search.TotalHits) { for (int i = start - pageSize; i < min; i++) { var document = indexSearcher.Doc(search.ScoreDocs[i].Doc); documentsAlreadySeenInPreviousPage.Add(document.Get(Constants.DocumentIdFieldName)); } } if (fieldsToFetch.IsDistinctQuery == false) return; // add results that were already there in previous pages for (int i = 0; i < min; i++) { Document document = indexSearcher.Doc(search.ScoreDocs[i].Doc); var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]); alreadyReturned.Add(indexQueryResult.Projection); } }
public static JToken AutoCompleteMakeResult(IndexSearcher searcher, TopDocs topDocs, int skip, int take, NuGetSearcherManager searcherManager, bool includeExplanation, Query query) { JArray array = new JArray(); for (int i = skip; i < Math.Min(skip + take, topDocs.ScoreDocs.Length); i++) { ScoreDoc scoreDoc = topDocs.ScoreDocs[i]; Document document = searcher.Doc(scoreDoc.Doc); string id = document.Get("Id"); array.Add(id); } JObject result = new JObject(); result.Add("@context", new JObject { { "@vocab", "http://schema.nuget.org/schema#" } }); result.Add("totalHits", topDocs.TotalHits); result.Add("indexName", searcherManager.IndexName); result.Add("data", array); if (includeExplanation) { JArray explanations = new JArray(); for (int i = skip; i < Math.Min(skip + take, topDocs.ScoreDocs.Length); i++) { ScoreDoc scoreDoc = topDocs.ScoreDocs[i]; Explanation explanation = searcher.Explain(query, scoreDoc.Doc); explanations.Add(explanation.ToString()); } result.Add("explanations", explanations); } return result; }
public virtual void TestMissingSecondPassScore() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(NewStringField("id", "0", Field.Store.YES)); doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("id", "1", Field.Store.YES)); // 1 extra token, but wizard and oz are close; doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO)); w.AddDocument(doc); IndexReader r = w.GetReader(); w.Dispose(); // Do ordinary BooleanQuery: BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); IndexSearcher searcher = GetSearcher(r); TopDocs hits = searcher.Search(bq, 10); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id")); // Now, resort using PhraseQuery, no slop: PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "wizard")); pq.Add(new Term("field", "oz")); TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits2.TotalHits); Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id")); // Resort using SpanNearQuery: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard")); SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz")); SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true); TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10); // Resorting changed the order: Assert.AreEqual(2, hits3.TotalHits); Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id")); Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id")); r.Dispose(); dir.Dispose(); }
public static void Main(System.String[] args) { try { string idxPathDir = string.Empty; System.String usage = Assembly.GetEntryAssembly().FullName + " <index_directory>"; if (args.Length < 1) { System.Console.Error.WriteLine("Usage: " + usage); System.Environment.Exit(1); System.Console.Out.WriteLine("Press any key..."); System.Console.ReadKey(); } else { idxPathDir = args[0]; } string BasePAth = System.IO.Directory.GetParent(Assembly.GetEntryAssembly().FullName).FullName; idxPathDir = Path.GetFullPath(Path.Combine(BasePAth, idxPathDir)); // Check whether the "index" directory exists. // Exit program, If not. if (System.IO.Directory.Exists(idxPathDir) == false) { System.Console.Out.WriteLine("Cannot load index from '" + idxPathDir + "' directory, please make sure it exists."); System.Console.WriteLine("Press any key..."); System.Console.ReadKey(); System.Environment.Exit(1); } Searcher searcher = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(idxPathDir))); Analyzer analyzer = new StandardAnalyzer(new Lucene.Net.Util.Version()); // Create a new StreamReader using standard input as the stream System.IO.StreamReader streamReader = new System.IO.StreamReader( // Sets reader's input stream to the standard input stream new System.IO.StreamReader( System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, // Sets reader's encoding to whatever standard input is using new System.IO.StreamReader( System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); while (true) { System.Console.Out.Write("Query: "); string line = streamReader.ReadLine(); if (line.Length <= 0) { break; } QueryParser parser = new QueryParser(new Lucene.Net.Util.Version(), line, analyzer); Query query = parser.Parse(line); System.Console.Out.WriteLine("Searching for: " + query.ToString("contents")); // http://stackoverflow.com/questions/14966208/hits-object-deprecated-in-lucene-net-3-03-how-do-i-replace-it Lucene.Net.Search.TopDocs results = searcher.Search(query, 100); System.Console.Out.WriteLine(results.TotalHits + " total matching documents"); foreach (ScoreDoc scoreDoc in results.ScoreDocs) { // retrieve the document from the 'ScoreDoc' object Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc); string docPath = doc.Get("path"); System.Console.Out.WriteLine(docPath); } } searcher.Dispose(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }