public virtual void TestMergeAfterCopy() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexReader reader = IndexReader.Open(aux, false); for (int i = 0; i < 20; i++) { reader.DeleteDocument(i); } Assert.AreEqual(10, reader.NumDocs()); reader.Close(); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(4); writer.MergeFactor = 4; writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); Assert.AreEqual(1020, writer.MaxDoc()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1020); }
public override void SetUp() { base.SetUp(); System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } indexDir = new System.IO.DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex")); Directory dir = FSDirectory.Open(indexDir); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); // add some documents Document doc = null; for (int i = 0; i < docsToAdd; i++) { doc = new Document(); doc.Add(new Field("content", English.IntToEnglish(i).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } Assert.AreEqual(docsToAdd, writer.MaxDoc()); writer.Close(); dir.Close(); }
private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document(), null); } w.Commit(null); w.DeleteDocuments(null, new MatchAllDocsQuery()); w.Commit(null); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(null), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open((Directory)d, true, null); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return(d); }
public virtual void TestAddSelf() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); IndexWriter writer = null; writer = NewWriter(dir, true); // add 100 documents AddDocs(writer, 100); Assert.AreEqual(100, writer.MaxDoc()); writer.Close(); writer = NewWriter(aux, true); writer.UseCompoundFile = false; // use one without a compound file writer.SetMaxBufferedDocs(1000); // add 140 documents in separate files AddDocs(writer, 40); writer.Close(); writer = NewWriter(aux, true); writer.UseCompoundFile = false; // use one without a compound file writer.SetMaxBufferedDocs(1000); AddDocs(writer, 100); writer.Close(); writer = NewWriter(dir, false); try { // cannot add self writer.AddIndexesNoOptimize(new Directory[] { aux, dir }); Assert.IsTrue(false); } catch (System.ArgumentException) { Assert.AreEqual(100, writer.MaxDoc()); } writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 100); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < docFields.Length; i++) { Document document = new Document(); document.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document, null); } writer.Close(); searcher = new IndexSearcher(directory, true, null); // Make big index dir2 = new MockRAMDirectory(directory); // First multiply small test index: mulFactor = 1; int docCount = 0; do { Directory copy = new RAMDirectory(dir2, null); IndexWriter indexWriter = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null); indexWriter.AddIndexesNoOptimize(null, new[] { copy }); docCount = indexWriter.MaxDoc(); indexWriter.Close(); mulFactor *= 2; } while (docCount < 3000); IndexWriter w = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null); Document doc = new Document(); doc.Add(new Field("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { w.AddDocument(doc, null); } doc = new Document(); doc.Add(new Field("field2", "big bad bug", Field.Store.NO, Field.Index.ANALYZED)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { w.AddDocument(doc, null); } // optimize to 1 segment w.Optimize(null); reader = w.GetReader(null); w.Close(); bigSearcher = new IndexSearcher(reader); }
public virtual void TestStallControl() { int[] numThreads = new int[] { 4 + Random().Next(8), 1 }; int numDocumentsToIndex = 50 + Random().Next(50); for (int i = 0; i < numThreads.Length; i++) { AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); MockDirectoryWrapper dir = NewMockDirectory(); // mock a very slow harddisk sometimes here so that flushing is very slow dir.Throttling = MockDirectoryWrapper.Throttling_e.SOMETIMES; IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy(); iwc.SetFlushPolicy(flushPolicy); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numThreads[i] == 1 ? 1 : 2); iwc.SetIndexerThreadPool(threadPool); // with such a small ram buffer we should be stalled quiet quickly iwc.SetRAMBufferSizeMB(0.25); IndexWriter writer = new IndexWriter(dir, iwc); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc()); if (numThreads[i] == 1) { Assert.IsFalse("single thread must not block numThreads: " + numThreads[i], docsWriter.FlushControl.StallControl.HasBlocked()); } if (docsWriter.FlushControl.PeakNetBytes > (2d * iwc.RAMBufferSizeMB * 1024d * 1024d)) { Assert.IsTrue(docsWriter.FlushControl.StallControl.WasStalled()); } AssertActiveBytesAfter(flushControl); writer.Dispose(true); dir.Dispose(); } }
public virtual void TestFlushDocCount() { int[] numThreads = new int[] { 2 + AtLeast(1), 1 }; for (int i = 0; i < numThreads.Length; i++) { int numDocumentsToIndex = 50 + AtLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetMaxBufferedDocs(2 + AtLeast(10)); iwc.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsTrue(flushPolicy.FlushOnDocCount()); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms()); Assert.IsFalse(flushPolicy.FlushOnRAM()); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads[i], writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads[i]); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc()); Assert.IsTrue(flushPolicy.PeakDocCountWithoutFlush <= iwc.MaxBufferedDocs, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes()); dir.Dispose(); } }
public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged) { // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes: DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/idx/", dirName)); TestUtil.Rm(indexDir); Directory dir = NewFSDirectory(indexDir); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; mp.MaxCFSSegmentSizeMB = double.PositiveInfinity; // TODO: remove randomness IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp); IndexWriter writer = new IndexWriter(dir, conf); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); if (fullyMerged) { writer.ForceMerge(1); } writer.Dispose(); if (!fullyMerged) { // open fresh writer so we get no prx file in the added segment mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; // TODO: remove randomness conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp); writer = new IndexWriter(dir, conf); AddNoProxDoc(writer); writer.Dispose(); conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES); writer = new IndexWriter(dir, conf); Term searchTerm = new Term("id", "7"); writer.DeleteDocuments(searchTerm); writer.Dispose(); } dir.Dispose(); return(indexDir); }
public virtual void TestMergeDocCount0() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 100; for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); IndexReader reader = IndexReader.Open(dir, false, null); reader.DeleteDocuments(new Term("content", "aaa"), null); reader.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 5; // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } writer.Commit(null); ((ConcurrentMergeScheduler)writer.MergeScheduler).Sync(); writer.Commit(null); CheckInvariants(writer); Assert.AreEqual(10, writer.MaxDoc()); writer.Close(); }
public virtual void TestNoMergeAfterCopy() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 4; writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); Assert.AreEqual(1060, writer.MaxDoc()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1060); }
public virtual void TestCrashAfterReopen() { IndexWriter writer = InitIndex(); MockRAMDirectory dir = (MockRAMDirectory)writer.Directory; writer.Close(); writer = InitIndex(dir); Assert.AreEqual(314, writer.MaxDoc()); Crash(writer); /* * System.out.println("\n\nTEST: open reader"); * String[] l = dir.list(); * Arrays.sort(l); * for(int i=0;i<l.length;i++) * System.out.println("file " + i + " = " + l[i] + " " + * dir.fileLength(l[i]) + " bytes"); */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Assert.IsTrue(reader.NumDocs() >= 157); }
public virtual void CreateIndex(System.String dirName, bool doCFS) { RmDir(dirName); dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = doCFS; writer.SetMaxBufferedDocs(10); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); writer.Close(); // open fresh writer so we get no prx file in the added segment writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = doCFS; writer.SetMaxBufferedDocs(10); AddNoProxDoc(writer); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); }
public virtual void TestNoCopySegments() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(9); writer.MergeFactor = 4; AddDocs(writer, 2); writer.AddIndexesNoOptimize(null, new Directory[] { aux }); Assert.AreEqual(1032, writer.MaxDoc()); Assert.AreEqual(2, writer.GetSegmentCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1032); }
public void OpenWriterWithCommit() { SegmentsGenCommit sgCommit = new SegmentsGenCommit(this.directory); IndexWriter writer = new IndexWriter(this.directory, new WhitespaceAnalyzer(), null, IndexWriter.MaxFieldLength.UNLIMITED, sgCommit); Assert.AreEqual(10, writer.MaxDoc()); IndexReader reader = writer.GetReader(); IndexCommit commit = reader.GetIndexCommit(); Assert.AreEqual(commit.GetGeneration(), sgCommit.GetGeneration()); Assert.AreEqual(commit.GetSegmentsFileName(), sgCommit.GetSegmentsFileName()); }
private void GenerateRandomDocs(int numCats, int numDocs) { using (var dir = new IsolatedStorageDirectory(IndexDir)) { Random rgen = new Random(); string[] categories = Enumerable.Range(0, numCats).Select(x => RandomString(4, rgen)).ToArray(); IEnumerable<Document> docs = Enumerable.Range(0, numDocs).Select(x => RandomDocument(categories[rgen.Next(0, numCats - 1)], rgen)); using (IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true)) { System.Threading.Tasks.Parallel.ForEach(docs, d => { writer.AddDocument(d); //multi-access to writer }); Assert.AreEqual(docs.Count(), writer.MaxDoc(), "Unexpected error in \"writer.AddDocument\""); } } }
protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled) { int numDocumentsToIndex = 10 + AtLeast(30); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetRAMBufferSizeMB(maxRamMB); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsFalse(flushPolicy.FlushOnDocCount()); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms()); Assert.IsTrue(flushPolicy.FlushOnRAM()); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due numThreads=" + numThreads); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc()); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); if (flushPolicy.HasMarkedPending) { Assert.IsTrue(maxRAMBytes < flushControl.PeakActiveBytes); } if (ensureNotStalled) { Assert.IsFalse(docsWriter.FlushControl.StallControl.WasStalled()); } writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes()); dir.Dispose(); }
internal DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain) { InitBlock(); this.directory = directory; this.writer = writer; this.similarity = writer.GetSimilarity(); flushedDocCount = writer.MaxDoc(); consumer = indexingChain.GetChain(this); if (consumer is DocFieldProcessor) { docFieldProcessor = (DocFieldProcessor) consumer; } }
public virtual void TestSimpleCase() { // main directory Directory dir = new RAMDirectory(); // two auxiliary directories Directory aux = new RAMDirectory(); Directory aux2 = new RAMDirectory(); IndexWriter writer = null; writer = NewWriter(dir, true); // add 100 documents AddDocs(writer, 100); Assert.AreEqual(100, writer.MaxDoc()); writer.Close(); writer = NewWriter(aux, true); writer.UseCompoundFile = false; // use one without a compound file // add 40 documents in separate files AddDocs(writer, 40); Assert.AreEqual(40, writer.MaxDoc()); writer.Close(); writer = NewWriter(aux2, true); // add 40 documents in compound files AddDocs2(writer, 50); Assert.AreEqual(50, writer.MaxDoc()); writer.Close(); // test doc count before segments are merged writer = NewWriter(dir, false); Assert.AreEqual(100, writer.MaxDoc()); writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 }); Assert.AreEqual(190, writer.MaxDoc()); writer.Close(); // make sure the old index is correct VerifyNumDocs(aux, 40); // make sure the new index is correct VerifyNumDocs(dir, 190); // now add another set in. Directory aux3 = new RAMDirectory(); writer = NewWriter(aux3, true); // add 40 documents AddDocs(writer, 40); Assert.AreEqual(40, writer.MaxDoc()); writer.Close(); // test doc count before segments are merged/index is optimized writer = NewWriter(dir, false); Assert.AreEqual(190, writer.MaxDoc()); writer.AddIndexesNoOptimize(new Directory[] { aux3 }); Assert.AreEqual(230, writer.MaxDoc()); writer.Close(); // make sure the new index is correct VerifyNumDocs(dir, 230); VerifyTermDocs(dir, new Term("content", "aaa"), 180); VerifyTermDocs(dir, new Term("content", "bbb"), 50); // now optimize it. writer = NewWriter(dir, false); writer.Optimize(); writer.Close(); // make sure the new index is correct VerifyNumDocs(dir, 230); VerifyTermDocs(dir, new Term("content", "aaa"), 180); VerifyTermDocs(dir, new Term("content", "bbb"), 50); // now add a single document Directory aux4 = new RAMDirectory(); writer = NewWriter(aux4, true); AddDocs2(writer, 1); writer.Close(); writer = NewWriter(dir, false); Assert.AreEqual(230, writer.MaxDoc()); writer.AddIndexesNoOptimize(new Directory[] { aux4 }); Assert.AreEqual(231, writer.MaxDoc()); writer.Close(); VerifyNumDocs(dir, 231); VerifyTermDocs(dir, new Term("content", "bbb"), 51); }
public virtual void TestMergeDocCount0() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 100; for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); IndexReader reader = IndexReader.Open(dir, false); reader.DeleteDocuments(new Term("content", "aaa")); reader.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 5; // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } writer.Commit(); ((ConcurrentMergeScheduler)writer.MergeScheduler).Sync(); writer.Commit(); CheckInvariants(writer); Assert.AreEqual(10, writer.MaxDoc()); writer.Close(); }
public virtual void TestMergeAfterCopy() { // main directory Directory dir = NewDirectory(); // auxiliary directory Directory aux = NewDirectory(); SetUpDirs(dir, aux, true); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); IndexWriter writer = new IndexWriter(aux, dontMergeConfig); for (int i = 0; i < 20; i++) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(aux); Assert.AreEqual(10, reader.NumDocs()); reader.Dispose(); writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(4).SetMergePolicy(NewLogMergePolicy(4))); if (VERBOSE) { Console.WriteLine("\nTEST: now addIndexes"); } writer.AddIndexes(aux, new MockDirectoryWrapper(Random(), new RAMDirectory(aux, NewIOContext(Random())))); Assert.AreEqual(1020, writer.MaxDoc()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Dispose(); dir.Dispose(); aux.Dispose(); }
public int GetIndexSize() { return(writer.MaxDoc()); }
/// <summary> /// Update Index by Language /// </summary> /// <param name="indexDir"></param> /// <param name="lng"></param> /// <returns></returns> public void UpdateIndex(Lucene.Net.Store.Directory indexDir, String lng) { #if DEBUG T.TraceMessage("Begin full indexing , language {0}, time: {1}", lng, DateTime.Now.ToLongTimeString()); #endif IndexWriter writer = null; int step = 1000; int totalDoc = 0; try { DateTime beginTime = DateTime.Now; int num = 0, numPar = 0; num = new IssuesDao().GetTotalPublicationByLanguage(lng); if (IndexWriter.IsLocked(indexDir)) IndexWriter.Unlock(indexDir); writer = new IndexWriter(indexDir, this.Analizer, true, new IndexWriter.MaxFieldLength(2500000)); // change number of documents to store in memory before writing them to the disk //writer.SetMergeFactor(); if (num != 0) { if (step >= num) { long ini = DateTime.Now.Millisecond; totalDoc = Index(writer, 1, num, lng); long fin = DateTime.Now.Millisecond; } else { int numStep = (num / step); int cont = 1; int start = 0; int end = 0; for (int i = 0; i < numStep; i++) { start = (cont + (i * step)); end = start + step; DateTime ini = DateTime.Now; numPar = Index(writer, start, end, lng); DateTime fin = DateTime.Now; TimeSpan ts = fin.Subtract(ini); #if DEBUG T.TraceMessage("Indexing from {0} to {1} took {2} seconds", start, end, (ts.Minutes * 60 + ts.Seconds)); #endif } if (num != end) { start = end; end = num; numPar = Index(writer, start, end + 1, lng); } } } DateTime endTime = DateTime.Now; TimeSpan tsFull = endTime.Subtract(beginTime); //Commit writer writer.Commit(); writer.Dispose(); #if DEBUG T.TraceMessage("Full indexing took {0} seconds, indexed {1} documents", ((tsFull.Hours * 3600) + (tsFull.Minutes * 60) + tsFull.Seconds), writer.MaxDoc()); #endif } catch (Exception ex) { if (writer != null) { writer.Optimize(); writer.Commit(); writer.Dispose(); } T.TraceError("Error Full Index , class {0}, language {1} ", CLASS_NAME, lng); T.TraceError(ex); throw ex; } }
public int IndexPublication(Lucene.Net.Store.Directory indexDir, IssueDocumentDto bean) { IndexWriter writer = null; Document doc = null; Field field = null; IndexWriter idxModif = null; int numIndexed = 0; try { if (IndexWriter.IsLocked(indexDir)) IndexWriter.Unlock(indexDir); idxModif = new IndexWriter(indexDir, this.Analizer, false, new IndexWriter.MaxFieldLength(2500000)); doc = new Document(); doc.Add(new Field("issue_id", bean.IssueId.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (bean.CompanyId != -1) doc.Add(new Field("company_id", bean.CompanyId.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (bean.IssueIdPerCompany != -1) doc.Add(new Field("issue_per_company", bean.IssueIdPerCompany.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (bean.PriorityId != -1) doc.Add(new Field("priority_id", bean.PriorityId.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (bean.StatusId != -1) doc.Add(new Field("status_id", bean.StatusId.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (!string.IsNullOrEmpty(bean.CurrentOwner)) doc.Add(new Field("curr_owner", bean.CurrentOwner, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (!string.IsNullOrEmpty(bean.LastOwner)) doc.Add(new Field("last_owner", bean.LastOwner, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (!string.IsNullOrEmpty(bean.ProjectName)) doc.Add(new Field("project_name", bean.ProjectName, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (bean.AssignedCUDate != DateTime.MinValue) doc.Add(new Field("assigned_cu_date", DateTools.DateToString(bean.AssignedCUDate, DateTools.Resolution.DAY), Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED)); if (bean.DestinationDate != DateTime.MinValue) doc.Add(new Field("destination_date", DateTools.DateToString(bean.DestinationDate, DateTools.Resolution.DAY), Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED)); if (!string.IsNullOrEmpty(bean.DescLast)) doc.Add(new Field("desc_last", bean.DescLast, Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED)); if (bean.ReadUsersBitIds1 != -1) doc.Add(new Field("read_usr_bit1", bean.ReadUsersBitIds1.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (bean.ReadUsersBitIds2 != -1) doc.Add(new Field("read_usr_bit1", bean.ReadUsersBitIds2.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (bean.LastUpdate != DateTime.MinValue) doc.Add(new Field("last_update", DateTools.DateToString(bean.LastUpdate, DateTools.Resolution.DAY), Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED)); if (bean.StartDate != DateTime.MinValue) doc.Add(new Field("start_date", DateTools.DateToString(bean.StartDate, DateTools.Resolution.DAY), Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED)); if (bean.IsAllDay) doc.Add(new Field("is_all_day", "1", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); else doc.Add(new Field("is_all_day", "0", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); if (bean.DestReminderDate != DateTime.MinValue) doc.Add(new Field("dest_rem_date", DateTools.DateToString(bean.DestReminderDate, DateTools.Resolution.DAY), Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED)); if (bean.ReccurenceId != -1) doc.Add(new Field("reccurence_id", bean.ReccurenceId.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); idxModif.AddDocument(doc); idxModif.Optimize(); idxModif.Commit(); numIndexed = idxModif.MaxDoc(); idxModif.Dispose(); } catch { if (writer != null) { idxModif.Optimize(); idxModif.Commit(); idxModif.Dispose(); } throw; } return numIndexed; }
public virtual void TestRandom() { int numThreads = 1 + Random().Next(8); int numDocumentsToIndex = 50 + AtLeast(70); AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); iwc.SetFlushPolicy(flushPolicy); int numDWPT = 1 + Random().Next(8); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.FlushControl; Assert.AreEqual(0, flushControl.FlushBytes(), " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, true); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes(), " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs()); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc()); if (flushPolicy.FlushOnRAM() && !flushPolicy.FlushOnDocCount() && !flushPolicy.FlushOnDeleteTerms()) { long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); if (flushPolicy.HasMarkedPending) { Assert.IsTrue("max: " + maxRAMBytes + " " + flushControl.PeakActiveBytes, maxRAMBytes <= flushControl.PeakActiveBytes); } } AssertActiveBytesAfter(flushControl); writer.Commit(); Assert.AreEqual(0, flushControl.ActiveBytes()); IndexReader r = DirectoryReader.Open(dir); Assert.AreEqual(numDocumentsToIndex, r.NumDocs()); Assert.AreEqual(numDocumentsToIndex, r.MaxDoc()); if (!flushPolicy.FlushOnRAM()) { Assert.IsFalse("never stall if we don't flush on RAM", docsWriter.FlushControl.StallControl.WasStalled()); Assert.IsFalse("never block if we don't flush on RAM", docsWriter.FlushControl.StallControl.HasBlocked()); } r.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestExpungeDeletes() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); Document document = new Document(); document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); for (int i = 0; i < 10; i++) writer.AddDocument(document); writer.Close(); IndexReader ir = IndexReader.Open(dir); Assert.AreEqual(10, ir.MaxDoc()); Assert.AreEqual(10, ir.NumDocs()); ir.DeleteDocument(0); ir.DeleteDocument(7); Assert.AreEqual(8, ir.NumDocs()); ir.Close(); writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Assert.AreEqual(8, writer.NumDocs()); Assert.AreEqual(10, writer.MaxDoc()); writer.ExpungeDeletes(); Assert.AreEqual(8, writer.NumDocs()); writer.Close(); ir = IndexReader.Open(dir); Assert.AreEqual(8, ir.MaxDoc()); Assert.AreEqual(8, ir.NumDocs()); ir.Close(); dir.Close(); }
internal DocumentsWriter(Directory directory, IndexWriter writer) { this.directory = directory; this.writer = writer; this.similarity = writer.GetSimilarity(); flushedDocCount = writer.MaxDoc(); byteBlockAllocator = new ByteBlockAllocator(this); waitQueue = new WaitQueue(this); /* This is the current indexing chain: DocConsumer / DocConsumerPerThread --> code: DocFieldProcessor / DocFieldProcessorPerThread --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField --> code: DocInverter / DocInverterPerThread / DocInverterPerField --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField --> code: TermsHash / TermsHashPerThread / TermsHashPerField --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField */ // TODO FI: this should be something the user can pass in // Build up indexing chain: TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(this); TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); InvertedDocConsumer termsHash = new TermsHash(this, true, freqProxWriter, new TermsHash(this, false, termVectorsWriter, null)); NormsWriter normsWriter = new NormsWriter(); DocInverter docInverter = new DocInverter(termsHash, normsWriter); StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(this); DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter); consumer = docFieldProcessor = new DocFieldProcessor(this, docFieldConsumers); }
public virtual void TestDocCount() { Directory dir = new RAMDirectory(); IndexWriter writer = null; IndexReader reader = null; int i; IndexWriter.SetDefaultWriteLockTimeout(2000); Assert.AreEqual(2000, IndexWriter.GetDefaultWriteLockTimeout()); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); IndexWriter.SetDefaultWriteLockTimeout(1000); // add 100 documents for (i = 0; i < 100; i++) { AddDoc(writer); } Assert.AreEqual(100, writer.DocCount()); writer.Close(); // delete 40 documents reader = IndexReader.Open(dir); for (i = 0; i < 40; i++) { reader.DeleteDocument(i); } reader.Close(); // test doc count before segments are merged/index is optimized writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Assert.AreEqual(100, writer.DocCount()); writer.Close(); reader = IndexReader.Open(dir); Assert.AreEqual(100, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, true, new WhitespaceAnalyzer()); Assert.AreEqual(100, writer.MaxDoc()); Assert.AreEqual(60, writer.NumDocs()); writer.Optimize(); Assert.AreEqual(60, writer.MaxDoc()); Assert.AreEqual(60, writer.NumDocs()); writer.Close(); // check that the index reader gives the same numbers. reader = IndexReader.Open(dir); Assert.AreEqual(60, reader.MaxDoc()); Assert.AreEqual(60, reader.NumDocs()); reader.Close(); // make sure opening a new index for create over // this existing one works correctly: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Assert.AreEqual(0, writer.MaxDoc()); Assert.AreEqual(0, writer.NumDocs()); writer.Close(); }
public virtual void TestMoreMerges() { // main directory Directory dir = NewDirectory(); // auxiliary directory Directory aux = NewDirectory(); Directory aux2 = NewDirectory(); SetUpDirs(dir, aux, true); IndexWriter writer = NewWriter(aux2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(10))); writer.AddIndexes(aux); Assert.AreEqual(30, writer.MaxDoc()); Assert.AreEqual(3, writer.SegmentCount); writer.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(aux, dontMergeConfig); for (int i = 0; i < 27; i++) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(aux); Assert.AreEqual(3, reader.NumDocs()); reader.Dispose(); dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(aux2, dontMergeConfig); for (int i = 0; i < 8; i++) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); reader = DirectoryReader.Open(aux2); Assert.AreEqual(22, reader.NumDocs()); reader.Dispose(); writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(6).SetMergePolicy(NewLogMergePolicy(4))); writer.AddIndexes(aux, aux2); Assert.AreEqual(1040, writer.MaxDoc()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Dispose(); dir.Dispose(); aux.Dispose(); aux2.Dispose(); }
public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged) { // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes: DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/4x/", dirName)); TestUtil.Rm(indexDir); Directory dir = NewFSDirectory(indexDir); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; mp.MaxCFSSegmentSizeMB = double.PositiveInfinity; // TODO: remove randomness IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(10).SetMergePolicy(mp).SetUseCompoundFile(doCFS); IndexWriter writer = new IndexWriter(dir, conf); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); if (fullyMerged) { writer.ForceMerge(1); } writer.Dispose(); if (!fullyMerged) { // open fresh writer so we get no prx file in the added segment mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; // TODO: remove randomness conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMaxBufferedDocs(10).SetMergePolicy(mp).SetUseCompoundFile(doCFS); writer = new IndexWriter(dir, conf); AddNoProxDoc(writer); writer.Dispose(); writer = new IndexWriter(dir, conf.SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES)); Term searchTerm = new Term("id", "7"); writer.DeleteDocuments(searchTerm); writer.Dispose(); } dir.Dispose(); return indexDir; }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexWithAdds(System.String dirName) { System.String origDirName = dirName; dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); // open writer IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); // add 10 docs for (int i = 0; i < 10; i++) { AddDoc(writer, 35 + i); } // make sure writer sees right total -- writer seems not to know about deletes in .del? int expected; if (Compare(origDirName, "24") < 0) { expected = 45; } else { expected = 46; } Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count"); writer.Close(); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Document d = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); TestHits(hits, 44, searcher.IndexReader); searcher.Close(); // make sure we can do delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float)2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 43, searcher.IndexReader); searcher.Close(); // optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); TestHits(hits, 43, searcher.IndexReader); Assert.AreEqual("22", d.Get("id"), "wrong first document"); searcher.Close(); dir.Close(); }
public virtual void TestForceMergeDeletes() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)); Document document = new Document(); FieldType customType = new FieldType(); customType.Stored = true; FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED); customType1.Tokenized = false; customType1.StoreTermVectors = true; customType1.StoreTermVectorPositions = true; customType1.StoreTermVectorOffsets = true; Field idField = NewStringField("id", "", Field.Store.NO); document.Add(idField); Field storedField = NewField("stored", "stored", customType); document.Add(storedField); Field termVectorField = NewField("termVector", "termVector", customType1); document.Add(termVectorField); for (int i = 0; i < 10; i++) { idField.StringValue = "" + i; writer.AddDocument(document); } writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); Assert.AreEqual(10, ir.MaxDoc()); Assert.AreEqual(10, ir.NumDocs()); ir.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(dir, dontMergeConfig); writer.DeleteDocuments(new Term("id", "0")); writer.DeleteDocuments(new Term("id", "7")); writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(8, ir.NumDocs()); ir.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); Assert.AreEqual(8, writer.NumDocs()); Assert.AreEqual(10, writer.MaxDoc()); writer.ForceMergeDeletes(); Assert.AreEqual(8, writer.NumDocs()); writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(8, ir.MaxDoc()); Assert.AreEqual(8, ir.NumDocs()); ir.Dispose(); dir.Dispose(); }
public virtual void TestExactFileNames() { System.String outputDir = "lucene.backwardscompat0.index"; RmDir(outputDir); try { Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir))); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetRAMBufferSizeMB(16.0); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); int contentFieldIndex = -1; for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment"); // Now verify file names: System.String[] expected; expected = new System.String[] { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" }; System.String[] actual = dir.ListAll(); System.Array.Sort(expected); System.Array.Sort(actual); if (!CollectionsHelper.Equals(expected, actual)) { Assert.Fail("incorrect filenames in index: expected:\n " + AsString(expected) + "\n actual:\n " + AsString(actual)); } dir.Close(); } finally { RmDir(outputDir); } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < docFields.Length; i++) { Document document = new Document(); document.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Close(); searcher = new IndexSearcher(directory, true); // Make big index dir2 = new MockRAMDirectory(directory); // First multiply small test index: mulFactor = 1; int docCount = 0; do { Directory copy = new RAMDirectory(dir2); IndexWriter indexWriter = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.AddIndexesNoOptimize(new[] {copy}); docCount = indexWriter.MaxDoc(); indexWriter.Close(); mulFactor *= 2; } while (docCount < 3000); IndexWriter w = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { w.AddDocument(doc); } doc = new Document(); doc.Add(new Field("field2", "big bad bug", Field.Store.NO, Field.Index.ANALYZED)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { w.AddDocument(doc); } // optimize to 1 segment w.Optimize(); reader = w.GetReader(); w.Close(); bigSearcher = new IndexSearcher(reader); }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexWithAdds(System.String dirName) { System.String origDirName = dirName; dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); // open writer IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); // add 10 docs for (int i = 0; i < 10; i++) { AddDoc(writer, 35 + i); } // make sure writer sees right total -- writer seems not to know about deletes in .del? int expected; if (Compare(origDirName, "24") < 0) { expected = 45; } else { expected = 46; } Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count"); writer.Close(); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Document d = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); TestHits(hits, 44, searcher.IndexReader); searcher.Close(); // make sure we can do delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float) 2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 43, searcher.IndexReader); searcher.Close(); // optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); TestHits(hits, 43, searcher.IndexReader); Assert.AreEqual("22", d.Get("id"), "wrong first document"); searcher.Close(); dir.Close(); }
public virtual void runTest(Directory directory, MergeScheduler merger) { IndexWriter writer = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMaxBufferedDocs(2); if (merger != null) { writer.SetMergeScheduler(merger, null); } for (int iter = 0; iter < NUM_ITER; iter++) { int iterFinal = iter; writer.MergeFactor = 1000; for (int i = 0; i < 200; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d, null); } writer.MergeFactor = 4; //writer.setInfoStream(System.out); ThreadClass[] threads = new ThreadClass[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { int iFinal = i; IndexWriter writerFinal = writer; threads[i] = new AnonymousClassThread(writerFinal, iFinal, iterFinal, this); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); } Assert.IsTrue(!failed); int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS))); // System.out.println("TEST: now index=" + writer.segString()); Assert.AreEqual(expectedDocCount, writer.MaxDoc()); writer.Close(); writer = new IndexWriter(directory, ANALYZER, false, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMaxBufferedDocs(2); IndexReader reader = IndexReader.Open(directory, true, null); Assert.IsTrue(reader.IsOptimized()); Assert.AreEqual(expectedDocCount, reader.NumDocs()); reader.Close(); } writer.Close(); }
public virtual void CreateIndex(System.String dirName, bool doCFS) { RmDir(dirName); dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = doCFS; writer.SetMaxBufferedDocs(10); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); writer.Close(); // open fresh writer so we get no prx file in the added segment writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = doCFS; writer.SetMaxBufferedDocs(10); AddNoProxDoc(writer); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float) 1.5); reader.Close(); }
public virtual void TestExactFileNames() { System.String outputDir = "lucene.backwardscompat0.index"; RmDir(outputDir); try { Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir))); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetRAMBufferSizeMB(16.0); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float) 1.5); reader.Close(); // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); int contentFieldIndex = -1; for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment"); // Now verify file names: System.String[] expected; expected = new System.String[] {"_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen"}; System.String[] actual = dir.ListAll(); System.Array.Sort(expected); System.Array.Sort(actual); if (!CollectionsHelper.Equals(expected, actual)) { Assert.Fail("incorrect filenames in index: expected:\n " + AsString(expected) + "\n actual:\n " + AsString(actual)); } dir.Close(); } finally { RmDir(outputDir); } }
public virtual void runTest(Directory directory, MergeScheduler merger) { IndexWriter writer = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMaxBufferedDocs(2); if (merger != null) writer.SetMergeScheduler(merger); for (int iter = 0; iter < NUM_ITER; iter++) { int iterFinal = iter; writer.MergeFactor = 1000; for (int i = 0; i < 200; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.MergeFactor = 4; //writer.setInfoStream(System.out); ThreadClass[] threads = new ThreadClass[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { int iFinal = i; IndexWriter writerFinal = writer; threads[i] = new AnonymousClassThread(writerFinal, iFinal, iterFinal, this); } for (int i = 0; i < NUM_THREADS; i++) threads[i].Start(); for (int i = 0; i < NUM_THREADS; i++) threads[i].Join(); Assert.IsTrue(!failed); int expectedDocCount = (int) ((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS))); // System.out.println("TEST: now index=" + writer.segString()); Assert.AreEqual(expectedDocCount, writer.MaxDoc()); writer.Close(); writer = new IndexWriter(directory, ANALYZER, false, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMaxBufferedDocs(2); IndexReader reader = IndexReader.Open(directory, true); Assert.IsTrue(reader.IsOptimized()); Assert.AreEqual(expectedDocCount, reader.NumDocs()); reader.Close(); } writer.Close(); }