/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); System.Collections.IEnumerator i = reader.GetFieldNames(IndexReader.FieldOption.ALL).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) i.Current; // update fieldToReader map System.String field = fi.Key.ToString(); if (fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); }
public void TestDeletesNumDocs() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); Field id = new Field("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(id); id.SetValue("0"); w.AddDocument(doc); id.SetValue("1"); w.AddDocument(doc); IndexReader r = w.GetReader(); Assert.AreEqual(2, r.NumDocs()); r.Close(); w.DeleteDocuments(new Term("id", "0")); r = w.GetReader(); Assert.AreEqual(1, r.NumDocs()); r.Close(); w.DeleteDocuments(new Term("id", "1")); r = w.GetReader(); Assert.AreEqual(0, r.NumDocs()); r.Close(); w.Close(); dir.Close(); }
public virtual void TestDocCount() { Directory dir = new RAMDirectory(); IndexWriter writer = null; IndexReader reader = null; int i; try { writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents for (i = 0; i < 100; i++) { AddDoc(writer); } Assert.AreEqual(100, writer.DocCount()); writer.Close(); // delete 40 documents reader = IndexReader.Open(dir); for (i = 0; i < 40; i++) { reader.Delete(i); } reader.Close(); // test doc count before segments are merged/index is optimized writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); Assert.AreEqual(100, writer.DocCount()); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Optimize(); Assert.AreEqual(60, writer.DocCount()); writer.Close(); // check that the index reader gives the same numbers. reader = IndexReader.Open(dir); Assert.AreEqual(60, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); } }
public virtual void TestDeleteAll() { for (int pass = 0; pass < 2; pass++) { bool autoCommit = (0 == pass); Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); modifier.SetMaxBufferedDocs(2); modifier.SetMaxBufferedDeleteTerms(2); int id = 0; int value_Renamed = 100; for (int i = 0; i < 7; i++) { AddDoc(modifier, ++id, value_Renamed); } modifier.Commit(); IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(7, reader.NumDocs()); reader.Close(); // Add 1 doc (so we will have something buffered) AddDoc(modifier, 99, value_Renamed); // Delete all modifier.DeleteAll(); // Delete all shouldn't be on disk yet reader = IndexReader.Open(dir); Assert.AreEqual(7, reader.NumDocs()); reader.Close(); // Add a doc and update a doc (after the deleteAll, before the commit) AddDoc(modifier, 101, value_Renamed); UpdateDoc(modifier, 102, value_Renamed); // commit the delete all modifier.Commit(); // Validate there are no docs left reader = IndexReader.Open(dir); Assert.AreEqual(2, reader.NumDocs()); reader.Close(); modifier.Close(); dir.Close(); } }
public virtual void TestMoreMerges() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); Directory aux2 = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(aux2, true); writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(10); writer.AddIndexesNoOptimize(new Directory[] { aux }); Assert.AreEqual(30, writer.DocCount()); Assert.AreEqual(3, writer.GetSegmentCount()); writer.Close(); IndexReader reader = IndexReader.Open(aux); for (int i = 0; i < 27; i++) { reader.DeleteDocument(i); } Assert.AreEqual(3, reader.NumDocs()); reader.Close(); reader = IndexReader.Open(aux2); for (int i = 0; i < 8; i++) { reader.DeleteDocument(i); } Assert.AreEqual(22, reader.NumDocs()); reader.Close(); writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(6); writer.SetMergeFactor(4); writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 }); Assert.AreEqual(1025, writer.DocCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1025); }
public virtual void TestBatchDeletes() { for (int pass = 0; pass < 2; pass++) { bool autoCommit = (0 == pass); Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); modifier.SetMaxBufferedDocs(2); modifier.SetMaxBufferedDeleteTerms(2); int id = 0; int value_Renamed = 100; for (int i = 0; i < 7; i++) { AddDoc(modifier, ++id, value_Renamed); } modifier.Commit(); IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(7, reader.NumDocs()); reader.Close(); id = 0; modifier.DeleteDocuments(new Term("id", System.Convert.ToString(++id))); modifier.DeleteDocuments(new Term("id", System.Convert.ToString(++id))); modifier.Commit(); reader = IndexReader.Open(dir); Assert.AreEqual(5, reader.NumDocs()); reader.Close(); Term[] terms = new Term[3]; for (int i = 0; i < terms.Length; i++) { terms[i] = new Term("id", System.Convert.ToString(++id)); } modifier.DeleteDocuments(terms); modifier.Commit(); reader = IndexReader.Open(dir); Assert.AreEqual(2, reader.NumDocs()); reader.Close(); modifier.Close(); dir.Close(); } }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) { // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); } if (reader.NumDocs() != numDocs) { throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); } ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; IEnumerator <string> i = fields.GetEnumerator(); while (i.MoveNext()) { //// update fieldToReader map string field = i.Current; //if (fieldToReader[field] == null) if (!fieldToReader.ContainsKey(field)) { fieldToReader[field] = reader; } } if (!ignoreStoredFields) { storedFieldReaders.Add(reader); // add to storedFieldReaders } readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
public virtual void TestMergeAfterCopy() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexReader reader = IndexReader.Open(aux); for (int i = 0; i < 20; i++) { reader.DeleteDocument(i); } Assert.AreEqual(10, reader.NumDocs()); reader.Close(); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(4); writer.SetMergeFactor(4); writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); Assert.AreEqual(1020, writer.DocCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1020); }
public override void DoWork() { IndexReader r = IndexReader.Open(directory, true); Assert.AreEqual(100, r.NumDocs()); r.Close(); }
public virtual void TestFieldCacheRangeFilterDoubles() { IndexReader reader = IndexReader.Open((Directory)signedIndex.index, true, null); IndexSearcher Search = new IndexSearcher(reader); int numDocs = reader.NumDocs(); System.Double minIdO = (double)(minId + .5); System.Double medIdO = (double)((float)minIdO + ((double)(maxId - minId)) / 2.0); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", minIdO, medIdO, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs / 2, result.Length, "find all"); int count = 0; result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, medIdO, F, T), numDocs, null).ScoreDocs; count += result.Length; result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", medIdO, null, F, F), numDocs, null).ScoreDocs; count += result.Length; Assert.AreEqual(numDocs, count, "sum of two concenatted ranges"); result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, null, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); System.Double tempAux = (double)System.Double.PositiveInfinity; result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", tempAux, null, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(0, result.Length, "infinity special case"); System.Double tempAux2 = (double)System.Double.NegativeInfinity; result = Search.Search(q, FieldCacheRangeFilter.NewDoubleRange("id", null, tempAux2, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(0, result.Length, "infinity special case"); }
public virtual void TestVerifyIndex() { IndexReader reader = IndexReader.Open(mDirectory, true, null); Assert.AreEqual(8, reader.NumDocs()); reader.Close(); }
public virtual void TestAddIndexesAndDoDeletesThreads() { int numIter = 5; int numDirs = 3; Directory mainDir = new MockRAMDirectory(); IndexWriter mainWriter = new IndexWriter(mainDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); mainWriter.SetInfoStream(infoStream); AddDirectoriesThreads addDirThreads = new AddDirectoriesThreads(this, numIter, mainWriter); addDirThreads.LaunchThreads(numDirs); addDirThreads.JoinThreads(); //Assert.AreEqual(100 + numDirs * (3 * numIter / 4) * addDirThreads.NUM_THREADS // * addDirThreads.NUM_INIT_DOCS, addDirThreads.mainWriter.numDocs()); Assert.AreEqual(addDirThreads.count.IntValue(), addDirThreads.mainWriter.NumDocs()); addDirThreads.Close(true); Assert.IsTrue(addDirThreads.failures.Count == 0); _TestUtil.CheckIndex(mainDir); IndexReader reader = IndexReader.Open(mainDir); Assert.AreEqual(addDirThreads.count.IntValue(), reader.NumDocs()); //Assert.AreEqual(100 + numDirs * (3 * numIter / 4) * addDirThreads.NUM_THREADS // * addDirThreads.NUM_INIT_DOCS, reader.numDocs()); reader.Close(); addDirThreads.CloseDir(); mainDir.Close(); }
/// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms /// /// </summary> /// <param name="query">Query to extract term texts from</param> /// <param name="reader">used to compute IDF which can be used to a) score selected fragments better /// b) use graded highlights eg chaning intensity of font color</param> /// <param name="fieldName">the field on which Inverse Document Frequency (IDF) calculations are based</param> /// <returns> an array of the terms used in a query, plus their weights.</returns> public static WeightedTerm[] GetIdfWeightedTerms(Query query, IndexReader reader, string fieldName) { WeightedTerm[] terms = GetTerms(query, false, fieldName); int totalNumDocs = reader.NumDocs(); foreach (WeightedTerm t in terms) { try { int docFreq = reader.DocFreq(new Term(fieldName, t.Term)); // docFreq counts deletes if (totalNumDocs < docFreq) { docFreq = totalNumDocs; } //IDF algorithm taken from DefaultSimilarity class var idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0); t.Weight *= idf; } catch (IOException e) { //ignore } } return terms; }
public virtual void TestFieldCacheRangeFilterRand() { IndexReader reader = IndexReader.Open((Directory)signedIndex.index, true, null); IndexSearcher Search = new IndexSearcher(reader); System.String minRP = Pad(signedIndex.minR); System.String maxRP = Pad(signedIndex.maxR); int numDocs = reader.NumDocs(); Assert.AreEqual(numDocs, 1 + maxId - minId, "num of docs"); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); // test extremes, bounded on both ends result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "find all"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, T, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but biggest"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "all but smallest"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, maxRP, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 2, result.Length, "all but extremes"); // unbounded result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, T, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "smallest and up"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, T), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs, result.Length, "biggest and down"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, null, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not smallest, but up"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, maxRP, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(numDocs - 1, result.Length, "not biggest, but down"); // very small sets result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(0, result.Length, "min,min,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, F, F), numDocs, null).ScoreDocs; Assert.AreEqual(0, result.Length, "max,max,F,F"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", minRP, minRP, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(1, result.Length, "min,min,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", null, minRP, F, T), numDocs, null).ScoreDocs; Assert.AreEqual(1, result.Length, "nul,min,F,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, maxRP, T, T), numDocs, null).ScoreDocs; Assert.AreEqual(1, result.Length, "max,max,T,T"); result = Search.Search(q, FieldCacheRangeFilter.NewStringRange("rand", maxRP, null, T, F), numDocs, null).ScoreDocs; Assert.AreEqual(1, result.Length, "max,nul,T,T"); }
public virtual void TestAfterClose() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); // create the index CreateIndexNoClose(false, "test", writer); IndexReader r = writer.GetReader(); writer.Close(); _TestUtil.CheckIndex(dir1); // reader should remain usable even after IndexWriter is closed: Assert.AreEqual(100, r.NumDocs()); Query q = new TermQuery(new Term("indexname", "test")); Assert.AreEqual(100, new IndexSearcher(r).Search(q, 10).totalHits); try { r.Reopen(); Assert.Fail("failed to hit AlreadyClosedException"); } catch (AlreadyClosedException ace) { // expected } r.Close(); dir1.Close(); }
public virtual void TestRAMDirectory_Renamed() { Directory dir = FSDirectory.Open(indexDir); MockRAMDirectory ramDir = new MockRAMDirectory(dir); // close the underlaying directory dir.Close(); // Check size Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); // open reader to test document count IndexReader reader = IndexReader.Open((Directory)ramDir, true, null); Assert.AreEqual(docsToAdd, reader.NumDocs()); // open search zo check if all doc's are there IndexSearcher searcher = new IndexSearcher(reader); // search for all documents for (int i = 0; i < docsToAdd; i++) { Document doc = searcher.Doc(i, null); Assert.IsTrue(doc.GetField("content") != null); } // cleanup reader.Close(); searcher.Close(); }
private void TestTermVectors() { // check: int numDocs = reader.NumDocs(); long start = 0L; for (int docId = 0; docId < numDocs; docId++) { start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); ITermFreqVector[] vectors = reader.GetTermFreqVectors(docId, null); timeElapsed += (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - start; // verify vectors result VerifyVectors(vectors, docId); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); ITermFreqVector vector = reader.GetTermFreqVector(docId, "field", null); timeElapsed += (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - start; vectors = new ITermFreqVector[1]; vectors[0] = vector; VerifyVectors(vectors, docId); } }
public virtual void TestExpungeDeletes() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); Field id = new Field("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(id); id.SetValue("0"); w.AddDocument(doc, null); id.SetValue("1"); w.AddDocument(doc, null); w.DeleteDocuments(null, new Term("id", "0")); IndexReader r = w.GetReader(null); w.ExpungeDeletes(null); w.Close(); r.Close(); r = IndexReader.Open(dir, true, null); Assert.AreEqual(1, r.NumDocs()); Assert.IsFalse(r.HasDeletions); r.Close(); dir.Close(); }
public void TestRollbackIntegrityWithBufferFlush() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(new Field("pk", i.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); w.AddDocument(doc); } w.Close(); // If buffer size is small enough to cause a flush, errors ensue... w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); w.SetMaxBufferedDocs(2); Term pkTerm = new Term("pk", ""); for (int i = 0; i < 3; i++) { Document doc = new Document(); String value = i.ToString(); doc.Add(new Field("pk", value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); doc.Add(new Field("text", "foo", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); w.UpdateDocument(pkTerm.CreateTerm(value), doc); } w.Rollback(); IndexReader r = IndexReader.Open(dir, true); Assert.AreEqual(5, r.NumDocs(), "index should contain same number of docs post rollback"); r.Close(); dir.Close(); }
public virtual void TestBatchDeletes() { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null); modifier.SetMaxBufferedDocs(2); modifier.SetMaxBufferedDeleteTerms(2); int id = 0; int value_Renamed = 100; for (int i = 0; i < 7; i++) { AddDoc(modifier, ++id, value_Renamed); } modifier.Commit(null); IndexReader reader = IndexReader.Open(dir, true, null); Assert.AreEqual(7, reader.NumDocs()); reader.Close(); id = 0; modifier.DeleteDocuments(null, new Term("id", System.Convert.ToString(++id))); modifier.DeleteDocuments(null, new Term("id", System.Convert.ToString(++id))); modifier.Commit(null); reader = IndexReader.Open(dir, true, null); Assert.AreEqual(5, reader.NumDocs()); reader.Close(); Term[] terms = new Term[3]; for (int i = 0; i < terms.Length; i++) { terms[i] = new Term("id", System.Convert.ToString(++id)); } modifier.DeleteDocuments(null, terms); modifier.Commit(null); reader = IndexReader.Open(dir, true, null); Assert.AreEqual(2, reader.NumDocs()); reader.Close(); modifier.Close(); dir.Close(); }
/* * Run one indexer and 2 searchers against single index as * stress test. */ public virtual void RunTest(Directory directory) { TimedThread[] threads = new TimedThread[4]; IndexWriter writer = new MockIndexWriter(this, directory, true, ANALYZER, true); writer.SetMaxBufferedDocs(7); writer.SetMergeFactor(3); // Establish a base index of 100 docs: for (int i = 0; i < 100; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.Commit(); IndexReader r = IndexReader.Open(directory); Assert.AreEqual(100, r.NumDocs()); r.Close(); IndexerThread indexerThread = new IndexerThread(writer, threads); threads[0] = indexerThread; indexerThread.Start(); IndexerThread indexerThread2 = new IndexerThread(writer, threads); threads[1] = indexerThread2; indexerThread2.Start(); SearcherThread searcherThread1 = new SearcherThread(directory, threads); threads[2] = searcherThread1; searcherThread1.Start(); SearcherThread searcherThread2 = new SearcherThread(directory, threads); threads[3] = searcherThread2; searcherThread2.Start(); indexerThread.Join(); indexerThread2.Join(); searcherThread1.Join(); searcherThread2.Join(); writer.Close(); Assert.IsTrue(!indexerThread.failed, "hit unexpected exception in indexer"); Assert.IsTrue(!indexerThread2.failed, "hit unexpected exception in indexer2"); Assert.IsTrue(!searcherThread1.failed, "hit unexpected exception in search1"); Assert.IsTrue(!searcherThread2.failed, "hit unexpected exception in search2"); //System.out.println(" Writer: " + indexerThread.count + " iterations"); //System.out.println("Searcher 1: " + searcherThread1.count + " searchers created"); //System.out.println("Searcher 2: " + searcherThread2.count + " searchers created"); }
private void VerifyNumDocs(Directory dir, int numDocs) { IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(numDocs, reader.MaxDoc()); Assert.AreEqual(numDocs, reader.NumDocs()); reader.Close(); }
public virtual void TestDeleteOldIndex() { foreach (string name in OldNames) { if (VERBOSE) { Console.WriteLine("TEST: oldName=" + name); } // Try one delete: Directory dir = NewDirectory(OldIndexDirs[name]); IndexReader ir = DirectoryReader.Open(dir); Assert.AreEqual(35, ir.NumDocs()); ir.Dispose(); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); iw.DeleteDocuments(new Term("id", "3")); iw.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(34, ir.NumDocs()); ir.Dispose(); // Delete all but 1 document: iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); for (int i = 0; i < 35; i++) { iw.DeleteDocuments(new Term("id", "" + i)); } // Verify NRT reader takes: ir = DirectoryReader.Open(iw, true); iw.Dispose(); Assert.AreEqual(1, ir.NumDocs(), "index " + name); ir.Dispose(); // Verify non-NRT reader takes: ir = DirectoryReader.Open(dir); Assert.AreEqual(1, ir.NumDocs(), "index " + name); ir.Dispose(); dir.Dispose(); } }
private void MergeTermInfos() { int base_Renamed = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; TermEnum termEnum = reader.Terms(); SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); base_Renamed += reader.NumDocs(); if (smi.Next()) { queue.Put(smi); } // initialize queue else { smi.Close(); } } SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; while (queue.Size() > 0) { int matchSize = 0; // pop matching terms match[matchSize++] = (SegmentMergeInfo)queue.Pop(); Term term = match[0].term; SegmentMergeInfo top = (SegmentMergeInfo)queue.Top(); while (top != null && term.CompareTo(top.term) == 0) { match[matchSize++] = (SegmentMergeInfo)queue.Pop(); top = (SegmentMergeInfo)queue.Top(); } int df = MergeTermInfo(match, matchSize); // add new TermInfo if (checkAbort != null) { checkAbort.Work(df / 3.0); } while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.Next()) { queue.Put(smi); } // restore queue else { smi.Close(); // done with a segment } } } }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of <see cref="IndexReader.MaxDoc" /> /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc; this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions; } if (reader.MaxDoc != maxDoc) { // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc); } if (reader.NumDocs() != numDocs) { throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); } ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; foreach (var field in fields) { // update fieldToReader map // Do a containskey firt to mimic java behavior if (!fieldToReader.ContainsKey(field) || fieldToReader[field] == null) { fieldToReader[field] = reader; } } if (!ignoreStoredFields) { storedFieldReaders.Add(reader); // add to storedFieldReaders } readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
public virtual void TestFlushExceptions() { MockRAMDirectory directory = new MockRAMDirectory(); FailOnlyOnFlush failure = new FailOnlyOnFlush(); directory.FailOn(failure); IndexWriter writer = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); int extraCount = 0; for (int i = 0; i < 10; i++) { for (int j = 0; j < 20; j++) { idField.SetValue(System.Convert.ToString(i * 20 + j)); writer.AddDocument(doc); } while (true) { // must cycle here because sometimes the merge flushes // the doc we just added and so there's nothing to // flush, and we don't hit the exception writer.AddDocument(doc); failure.SetDoFail(); try { writer.Flush(true, false, true); if (failure.hitExc) { Assert.Fail("failed to hit IOException"); } extraCount++; } catch (System.IO.IOException ioe) { failure.ClearDoFail(); break; } } } writer.Close(); IndexReader reader = IndexReader.Open(directory, true); Assert.AreEqual(200 + extraCount, reader.NumDocs()); reader.Close(); directory.Close(); }
public virtual void TestDeleteAllNRT() { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null); modifier.SetMaxBufferedDocs(2); modifier.SetMaxBufferedDeleteTerms(2); int id = 0; int value_Renamed = 100; for (int i = 0; i < 7; i++) { AddDoc(modifier, ++id, value_Renamed); } modifier.Commit(null); IndexReader reader = modifier.GetReader(null); Assert.AreEqual(7, reader.NumDocs()); reader.Close(); AddDoc(modifier, ++id, value_Renamed); AddDoc(modifier, ++id, value_Renamed); // Delete all modifier.DeleteAll(null); reader = modifier.GetReader(null); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); // Roll it back modifier.Rollback(null); modifier.Close(); // Validate that the docs are still there reader = IndexReader.Open(dir, true, null); Assert.AreEqual(7, reader.NumDocs()); reader.Close(); dir.Close(); }
public virtual void TestCrashWhileIndexing() { IndexWriter writer = InitIndex(); MockRAMDirectory dir = (MockRAMDirectory)writer.Directory; Crash(writer); IndexReader reader = IndexReader.Open((Directory)dir, true, null); Assert.IsTrue(reader.NumDocs() < 157); }
public virtual void TestNoWaitClose() { RAMDirectory directory = new MockRAMDirectory(); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); for (int pass = 0; pass < 2; pass++) { bool autoCommit = pass == 0; IndexWriter writer = new IndexWriter(directory, autoCommit, ANALYZER, true); for (int iter = 0; iter < 10; iter++) { ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(100); for (int j = 0; j < 201; j++) { idField.SetValue(System.Convert.ToString(iter * 201 + j)); writer.AddDocument(doc); } int delID = iter * 201; for (int j = 0; j < 20; j++) { writer.DeleteDocuments(new Term("id", System.Convert.ToString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we // stress out aborting them on close: writer.SetMergeFactor(3); writer.AddDocument(doc); writer.Flush(); writer.Close(false); IndexReader reader = IndexReader.Open(directory); Assert.AreEqual((1 + iter) * 182, reader.NumDocs()); reader.Close(); // Reopen writer = new IndexWriter(directory, autoCommit, ANALYZER, false); } writer.Close(); } directory.Close(); }
public virtual void TestRAMDeletes() { for (int pass = 0; pass < 2; pass++) { for (int t = 0; t < 2; t++) { bool autoCommit = (0 == pass); Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); modifier.SetMaxBufferedDocs(4); modifier.SetMaxBufferedDeleteTerms(4); int id = 0; int value_Renamed = 100; AddDoc(modifier, ++id, value_Renamed); if (0 == t) { modifier.DeleteDocuments(new Term("value", System.Convert.ToString(value_Renamed))); } else { modifier.DeleteDocuments(new TermQuery(new Term("value", System.Convert.ToString(value_Renamed)))); } AddDoc(modifier, ++id, value_Renamed); if (0 == t) { modifier.DeleteDocuments(new Term("value", System.Convert.ToString(value_Renamed))); Assert.AreEqual(2, modifier.GetNumBufferedDeleteTerms()); Assert.AreEqual(1, modifier.GetBufferedDeleteTermsSize()); } else { modifier.DeleteDocuments(new TermQuery(new Term("value", System.Convert.ToString(value_Renamed)))); } AddDoc(modifier, ++id, value_Renamed); Assert.AreEqual(0, modifier.GetSegmentCount()); modifier.Flush(); modifier.Commit(); IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(1, reader.NumDocs()); int hitCount = GetHitCount(dir, new Term("id", System.Convert.ToString(id))); Assert.AreEqual(1, hitCount); reader.Close(); modifier.Close(); dir.Close(); } } }
public virtual void TestRollbackDeletionPolicy() { for (int i = 0; i < 2; i++) { // Unless you specify a prior commit point, rollback // should not work: new IndexWriter(dir, new WhitespaceAnalyzer(), new DeleteLastCommitPolicy(this), MaxFieldLength.UNLIMITED).Close(); IndexReader r = IndexReader.Open(dir); Assert.AreEqual(100, r.NumDocs()); r.Close(); } }
public virtual void TestReopenWriteableToReadOnly() { Directory dir1 = new MockRAMDirectory(); TestIndexReaderReopen.CreateIndex(dir1, true); IndexReader reader = IndexReader.Open(dir1, false); int docCount = reader.NumDocs(); Assert.IsTrue(DeleteWorked(1, reader)); Assert.AreEqual(docCount - 1, reader.NumDocs()); IndexReader readOnlyReader = reader.Reopen(true); Assert.IsTrue(IsReadOnly(readOnlyReader), "reader isn't read only"); Assert.IsFalse(DeleteWorked(1, readOnlyReader)); Assert.AreEqual(docCount - 1, readOnlyReader.NumDocs()); reader.Close(); readOnlyReader.Close(); dir1.Close(); }
public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet bitSet = new OpenBitSet(reader.NumDocs()); TermDocs termDocs = reader.TermDocs(new Term("TenantId", _tenantId)); while (termDocs.Next()) { if (termDocs.Freq > 0) { bitSet.Set(termDocs.Doc); } } return bitSet; }
public override IEnumerable<Row> Execute(IEnumerable<Row> rows) { if (_indexDirectory == null) yield break; try { _reader = IndexReader.Open(_indexDirectory, true); } catch (Exception) { Warn("Failed to open lucene index in {0}.", _indexDirectory.Directory.FullName); yield break; } var docCount = _reader.NumDocs(); Info("Found {0} documents in lucene index.", docCount); for (var i = 0; i < docCount; i++) { if (_reader.IsDeleted(i)) continue; var doc = _reader.Document(i); var row = new Row(); foreach (var field in doc.GetFields().Where(field => field.IsStored)) { switch (field.Name) { case "dropped": row[field.Name] = Convert.ToBoolean(field.StringValue); break; default: row[field.Name] = field.StringValue; break; } } yield return row; } }
// Returns true if there are docs to search and creates the readers and searchers // in that case. Otherwise, returns false. private bool BuildSearchers (out IndexReader primary_reader, out LNS.IndexSearcher primary_searcher, out IndexReader secondary_reader, out LNS.IndexSearcher secondary_searcher) { primary_searcher = null; secondary_reader = null; secondary_searcher = null; primary_reader = LuceneCommon.GetReader (PrimaryStore); if (primary_reader.NumDocs() == 0) { ReleaseReader (primary_reader); primary_reader = null; return false; } primary_searcher = new LNS.IndexSearcher (primary_reader); if (SecondaryStore != null) { secondary_reader = LuceneCommon.GetReader (SecondaryStore); if (secondary_reader.NumDocs () == 0) { ReleaseReader (secondary_reader); secondary_reader = null; } } if (secondary_reader != null) secondary_searcher = new LNS.IndexSearcher (secondary_reader); return true; }
/* * Automatically adds stop words for the given field with terms exceeding the maxPercentDocs * * @param reader The {@link IndexReader} which will be consulted to identify potential stop words that * exceed the required document frequency * @param fieldName The field for which stopwords will be added * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which * contain a term, after which the word is considered to be a stop word. * @return The number of stop words identified. * @throws IOException */ public int AddStopWords(IndexReader reader, String fieldName, float maxPercentDocs) { return AddStopWords(reader, fieldName, (int) (reader.NumDocs() * maxPercentDocs)); }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); System.Collections.Generic.ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; System.Collections.IEnumerator i = fields.GetEnumerator(); while (i.MoveNext()) { // update fieldToReader map System.String field = (System.String) i.Current; if (fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
protected void SetItemCount (IndexReader reader) { last_item_count = reader.NumDocs (); }
protected override IEnumerable<LucObject> DoExecute(Query query, bool allVersions, IndexReader idxReader, out int totalCount) { Searcher searcher = null; IEnumerable<LucObject> result; totalCount = 0; BeginFullExecutingTime(); searcher = new IndexSearcher(idxReader); var numDocs = idxReader.NumDocs(); var start = this.LucQuery.Skip; var maxtop = numDocs - start; if (maxtop < 1) return EmptyResult; int top = this.LucQuery.Top != 0 ? this.LucQuery.Top : this.LucQuery.PageSize; if (top == 0) top = 100000; var howMany = (top < int.MaxValue / 2) ? top * 2 : int.MaxValue; // numDocs; // * 4; // * 2; bool noMorePage = false; if ((long)howMany > maxtop) { howMany = maxtop - start; noMorePage = true; } var numHits = howMany + start; if (numHits > numDocs) numHits = numDocs; try { //==================================================== var collector = CreateCollector(numHits); BeginKernelTime(); searcher.Search(query, collector); FinishKernelTime(); BeginCollectingTime(); //var topDocs = collector.TopDocs(start, howMany); var topDocs = (this.LucQuery.SortFields.Length > 0) ? ((TopFieldCollector)collector).TopDocs(start, howMany) : ((TopScoreDocCollector)collector).TopDocs(start, howMany); totalCount = topDocs.TotalHits; var hits = topDocs.ScoreDocs; FinishCollectingTime(); //==================================================== BeginPagingTime(); bool noMoreHits; result = GetResultPage(hits, searcher, top, allVersions, out noMoreHits); FinishPagingTime(); if (result.Count() < top && !noMorePage /*&& !noMoreHits*/) { //re-search numHits = numDocs - start; collector = CreateCollector(numHits); searcher.Search(query, collector); //topDocs = collector.TopDocs(start); topDocs = (this.LucQuery.SortFields.Length > 0) ? ((TopFieldCollector)collector).TopDocs(start, howMany) : ((TopScoreDocCollector)collector).TopDocs(start, howMany); hits = topDocs.ScoreDocs; result = GetResultPage(hits, searcher, top, allVersions, out noMoreHits); } return result; } catch { FinishKernelTime(); FinishCollectingTime(); FinishPagingTime(); throw; } finally { if (searcher != null) searcher.Close(); searcher = null; FinishFullExecutingTime(); } }
public static void VerifyEquals(IndexReader r1, IndexReader r2, System.String idField) { Assert.AreEqual(r1.NumDocs(), r2.NumDocs()); bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc()); int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping TermDocs termDocs1 = r1.TermDocs(); TermDocs termDocs2 = r2.TermDocs(); // create mapping from id2 space to id2 based on idField idField = StringHelper.Intern(idField); TermEnum termEnum = r1.Terms(new Term(idField, "")); do { Term term = termEnum.Term(); if (term == null || (System.Object) term.Field() != (System.Object) idField) break; termDocs1.Seek(termEnum); if (!termDocs1.Next()) { // This doc is deleted and wasn't replaced termDocs2.Seek(termEnum); Assert.IsFalse(termDocs2.Next()); continue; } int id1 = termDocs1.Doc(); Assert.IsFalse(termDocs1.Next()); termDocs2.Seek(termEnum); Assert.IsTrue(termDocs2.Next()); int id2 = termDocs2.Doc(); Assert.IsFalse(termDocs2.Next()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (System.Exception t) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); System.Console.Out.WriteLine(" d1=" + r1.Document(id1)); System.Console.Out.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2)); } catch (System.Exception e) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1); System.Console.Out.WriteLine(" d1=" + tv1); if (tv1 != null) for (int i = 0; i < tv1.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv1[i]); } TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2); System.Console.Out.WriteLine(" d2=" + tv2); if (tv2 != null) for (int i = 0; i < tv2.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv2[i]); } throw e; } } while (termEnum.Next()); termEnum.Close(); // Verify postings TermEnum termEnum1 = r1.Terms(new Term("", "")); TermEnum termEnum2 = r2.Terms(new Term("", "")); // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs()]; long[] info2 = new long[r2.NumDocs()]; for (; ; ) { Term term1, term2; // iterate until we get some docs int len1; for (; ; ) { len1 = 0; term1 = termEnum1.Term(); if (term1 == null) break; termDocs1.Seek(termEnum1); while (termDocs1.Next()) { int d1 = termDocs1.Doc(); int f1 = termDocs1.Freq(); info1[len1] = (((long) d1) << 32) | f1; len1++; } if (len1 > 0) break; if (!termEnum1.Next()) break; } // iterate until we get some docs int len2; for (; ; ) { len2 = 0; term2 = termEnum2.Term(); if (term2 == null) break; termDocs2.Seek(termEnum2); while (termDocs2.Next()) { int d2 = termDocs2.Doc(); int f2 = termDocs2.Freq(); info2[len2] = (((long) r2r1[d2]) << 32) | f2; len2++; } if (len2 > 0) break; if (!termEnum2.Next()) break; } if (!hasDeletes) Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq()); Assert.AreEqual(len1, len2); if (len1 == 0) break; // no more terms Assert.AreEqual(term1, term2); // sort info2 to get it into ascending docid System.Array.Sort(info2, 0, len2 - 0); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i]); } termEnum1.Next(); termEnum2.Next(); } }
protected override SearchResult DoExecute(Query query, bool allVersions, IndexReader idxReader, Stopwatch timer) { var numDocs = idxReader.NumDocs(); var start = this.LucQuery.Skip; var maxtop = numDocs - start; if (maxtop < 1) return SearchResult.Empty; var user = this.LucQuery.User; var currentUser = AccessProvider.Current.GetCurrentUser(); if (user == null) user = currentUser; var isCurrentUser = user.Id == currentUser.Id; int top = this.LucQuery.Top != 0 ? this.LucQuery.Top : this.LucQuery.PageSize; if (top == 0) top = int.MaxValue; var searcher = new IndexSearcher(idxReader); var p = new SearchParams { query = query, allVersions = allVersions, searcher = searcher, user = user, isCurrentUser = isCurrentUser, skip = start, timer = timer, top = top }; SearchResult r = null; SearchResult r1 = null; try { var defaultTops = SenseNet.ContentRepository.Storage.StorageContext.Search.DefaultTopAndGrowth; var howManyList = new List<int>(defaultTops); if (howManyList[howManyList.Count - 1] == 0) howManyList[howManyList.Count - 1] = int.MaxValue; if (top < int.MaxValue) { var howMany = (top < int.MaxValue / 2) ? top * 2 : int.MaxValue; // numDocs; // * 4; // * 2; if ((long)howMany > maxtop) howMany = maxtop - start; while (howManyList.Count > 0) { if (howMany < howManyList[0]) break; howManyList.RemoveAt(0); } howManyList.Insert(0, howMany); } for (var i = 0; i < howManyList.Count; i++) { var defaultTop = howManyList[i]; if (defaultTop == 0) defaultTop = numDocs; p.howMany = defaultTop; p.useHowMany = i < howManyList.Count - 1; var maxSize = i == 0 ? numDocs : r.totalCount; p.collectorSize = Math.Min(defaultTop, maxSize - p.skip) + p.skip; r1 = Search(p); if (i == 0) r = r1; else r.Add(r1); p.skip += r.nextIndex; p.top = top - r.result.Count; if (r.result.Count >= top || r.result.Count >= r.totalCount) break; } p.timer.Stop(); return r; } finally { if (searcher != null) { searcher.Close(); searcher = null; } } }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of <see cref="IndexReader.MaxDoc" /> /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc; this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions; } if (reader.MaxDoc != maxDoc) // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc); if (reader.NumDocs() != numDocs) throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; foreach(var field in fields) { // update fieldToReader map // Do a containskey firt to mimic java behavior if (!fieldToReader.ContainsKey(field) || fieldToReader[field] == null) fieldToReader[field] = reader; } if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
public static void AssertIndexEquals(IndexReader index1, IndexReader index2) { Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs."); Assert.AreEqual(index1.MaxDoc, index2.MaxDoc, "IndexReaders have different values for maxDoc."); Assert.AreEqual(index1.HasDeletions, index2.HasDeletions, "Only one IndexReader has deletions."); Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized."); // check field names System.Collections.Generic.ICollection<string> fieldsNames1 = index1.GetFieldNames(FieldOption.ALL); System.Collections.Generic.ICollection<string> fieldsNames2 = index1.GetFieldNames(FieldOption.ALL); System.Collections.Generic.ICollection<IFieldable> fields1 = null; System.Collections.Generic.ICollection<IFieldable> fields2 = null; Assert.AreEqual(fieldsNames1.Count, fieldsNames2.Count, "IndexReaders have different numbers of fields."); System.Collections.IEnumerator it1 = fieldsNames1.GetEnumerator(); System.Collections.IEnumerator it2 = fieldsNames2.GetEnumerator(); while (it1.MoveNext() && it2.MoveNext()) { Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names."); } // check norms it1 = fieldsNames1.GetEnumerator(); while (it1.MoveNext()) { System.String curField = (System.String) it1.Current; byte[] norms1 = index1.Norms(curField); byte[] norms2 = index2.Norms(curField); if (norms1 != null && norms2 != null) { Assert.AreEqual(norms1.Length, norms2.Length); for (int i = 0; i < norms1.Length; i++) { Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'."); } } else { Assert.AreSame(norms1, norms2); } } // check deletions for (int i = 0; i < index1.MaxDoc; i++) { Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index."); } // check stored fields for (int i = 0; i < index1.MaxDoc; i++) { if (!index1.IsDeleted(i)) { Document doc1 = index1.Document(i); Document doc2 = index2.Document(i); fields1 = doc1.GetFields(); fields2 = doc2.GetFields(); Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + "."); it1 = fields1.GetEnumerator(); it2 = fields2.GetEnumerator(); while (it1.MoveNext() && it2.MoveNext()) { Field curField1 = (Field) it1.Current; Field curField2 = (Field) it2.Current; Assert.AreEqual(curField1.Name, curField2.Name, "Different fields names for doc " + i + "."); Assert.AreEqual(curField1.StringValue, curField2.StringValue, "Different field values for doc " + i + "."); } } } // check dictionary and posting lists TermEnum enum1 = index1.Terms(); TermEnum enum2 = index2.Terms(); TermPositions tp1 = index1.TermPositions(); TermPositions tp2 = index2.TermPositions(); while (enum1.Next()) { Assert.IsTrue(enum2.Next()); Assert.AreEqual(enum1.Term, enum2.Term, "Different term in dictionary."); tp1.Seek(enum1.Term); tp2.Seek(enum1.Term); while (tp1.Next()) { Assert.IsTrue(tp2.Next()); Assert.AreEqual(tp1.Doc, tp2.Doc, "Different doc id in postinglist of term " + enum1.Term + "."); Assert.AreEqual(tp1.Freq, tp2.Freq, "Different term frequence in postinglist of term " + enum1.Term + "."); for (int i = 0; i < tp1.Freq; i++) { Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term + "."); } } } }