private IndexWriterConfig NewWriterConfig() { IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); // prevent any merges by default. conf.SetMergePolicy(NoMergePolicy.COMPOUND_FILES); return(conf); }
[Slow] // LUCENENET: occasionally public virtual void TestStallControl() { // LUCENENET specific - disable the test if asserts are not enabled AssumeTrue("This test requires asserts to be enabled.", Debugging.AssertsEnabled); int[] numThreads = new int[] { 4 + Random.Next(8), 1 }; int numDocumentsToIndex = 50 + Random.Next(50); for (int i = 0; i < numThreads.Length; i++) { AtomicInt32 numDocs = new AtomicInt32(numDocumentsToIndex); MockDirectoryWrapper dir = NewMockDirectory(); // mock a very slow harddisk sometimes here so that flushing is very slow dir.Throttling = Throttling.SOMETIMES; IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy(); iwc.SetFlushPolicy(flushPolicy); DocumentsWriterPerThreadPool threadPool = new DocumentsWriterPerThreadPool(numThreads[i] == 1 ? 1 : 2); iwc.SetIndexerThreadPool(threadPool); // with such a small ram buffer we should be stalled quiet quickly iwc.SetRAMBufferSizeMB(0.25); IndexWriter writer = new IndexWriter(dir, iwc); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(numDocs, writer, lineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); if (numThreads[i] == 1) { assertFalse("single thread must not block numThreads: " + numThreads[i], docsWriter.flushControl.stallControl.HasBlocked); } if (docsWriter.flushControl.peakNetBytes > (2d * iwc.RAMBufferSizeMB * 1024d * 1024d)) { Assert.IsTrue(docsWriter.flushControl.stallControl.WasStalled); } AssertActiveBytesAfter(flushControl); writer.Dispose(true); dir.Dispose(); } }
public virtual void TestFlushDocCount() { // LUCENENET specific - disable the test if asserts are not enabled AssumeTrue("This test requires asserts to be enabled.", Debugging.AssertsEnabled); int[] numThreads = new int[] { 2 + AtLeast(1), 1 }; for (int i = 0; i < numThreads.Length; i++) { int numDocumentsToIndex = 50 + AtLeast(30); AtomicInt32 numDocs = new AtomicInt32(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new DocumentsWriterPerThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetMaxBufferedDocs(2 + AtLeast(10)); iwc.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsTrue(flushPolicy.FlushOnDocCount); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms); Assert.IsFalse(flushPolicy.FlushOnRAM); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(numDocs, writer, lineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due numThreads=" + numThreads[i]); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); Assert.IsTrue(flushPolicy.peakDocCountWithoutFlush <= iwc.MaxBufferedDocs, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes); dir.Dispose(); } }
public virtual void TestInvalidValues() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Test IndexDeletionPolicy Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.DelPolicy.GetType()); conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.DelPolicy.GetType()); try { conf.SetIndexDeletionPolicy(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test MergeScheduler Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType()); conf.SetMergeScheduler(new SerialMergeScheduler()); Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType()); try { conf.SetMergeScheduler(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test Similarity: // we shouldnt assert what the default is, just that its not null. Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity); conf.SetSimilarity(new MySimilarity()); Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType()); try { conf.SetSimilarity(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test IndexingChain Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain); conf.SetIndexingChain(new MyIndexingChain()); Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType()); try { conf.SetIndexingChain(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } try { conf.SetMaxBufferedDeleteTerms(0); Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetMaxBufferedDocs(1); Assert.Fail("should not have succeeded to set maxBufferedDocs to 1"); } catch (System.ArgumentException e) { // this is expected } try { // Disable both MAX_BUF_DOCS and RAM_SIZE_MB conf.SetMaxBufferedDocs(4); conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well"); } catch (System.ArgumentException e) { // this is expected } conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); try { conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well"); } catch (System.ArgumentException e) { // this is expected } // Test setReaderTermsIndexDivisor try { conf.SetReaderTermsIndexDivisor(0); Assert.Fail("should not have succeeded to set termsIndexDivisor to 0"); } catch (System.ArgumentException e) { // this is expected } // Setting to -1 is ok conf.SetReaderTermsIndexDivisor(-1); try { conf.SetReaderTermsIndexDivisor(-2); Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(2048); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(0); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); } catch (System.ArgumentException e) { // this is expected } // Test MergePolicy Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType()); conf.SetMergePolicy(new LogDocMergePolicy()); Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType()); try { conf.SetMergePolicy(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } }
public virtual void TestTonsOfUpdates() { // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM Directory dir = NewDirectory(); Random random = Random; IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc IndexWriter writer = new IndexWriter(dir, conf); // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) int numDocs = AtLeast(20000); int numBinaryFields = AtLeast(5); int numTerms = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs ISet <string> updateTerms = new JCG.HashSet <string>(); while (updateTerms.Count < numTerms) { updateTerms.Add(TestUtil.RandomSimpleString(random)); } // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); // build a large index with many BDV fields and update terms for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10); for (int j = 0; j < numUpdateTerms; j++) { doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO)); } for (int j = 0; j < numBinaryFields; j++) { long val = random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val))); doc.Add(new NumericDocValuesField("cf" + j, val * 2)); } writer.AddDocument(doc); } writer.Commit(); // commit so there's something to apply to // set to flush every 2048 bytes (approximately every 12 updates), so we get // many flushes during binary updates writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024); int numUpdates = AtLeast(100); // System.out.println("numUpdates=" + numUpdates); for (int i = 0; i < numUpdates; i++) { int field = random.Next(numBinaryFields); Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms)); long value = random.Next(); writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2); } writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { for (int i = 0; i < numBinaryFields; i++) { AtomicReader r = context.AtomicReader; BinaryDocValues f = r.GetBinaryDocValues("f" + i); NumericDocValues cf = r.GetNumericDocValues("cf" + i); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j); } } } reader.Dispose(); dir.Dispose(); }
public virtual void TestDeletes1() { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new J2N.Randomizer(Random.NextInt64()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergeScheduler(new SerialMergeScheduler()); iwc.SetMaxBufferedDocs(5000); iwc.SetRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(this, false); iwc.SetMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit1"); writer.Commit(); Assert.AreEqual(1, writer.SegmentCount); for (int x = 5; x < 10; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "2", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit2"); writer.Commit(); Assert.AreEqual(2, writer.SegmentCount); for (int x = 10; x < 15; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.DeleteDocuments(new Term("id", "1")); writer.DeleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.Flush(false, false); Assert.IsTrue(writer.bufferedUpdatesStream.Any()); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.GetReader(); Assert.IsFalse(writer.bufferedUpdatesStream.Any()); r1.Dispose(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.DeleteDocuments(new Term("id", "2")); writer.Flush(false, false); fsmp = (RangeMergePolicy)writer.Config.MergePolicy; fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; writer.MaybeMerge(); Assert.AreEqual(2, writer.SegmentCount); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.GetReader(); int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2); Assert.IsTrue(id2docs is null); r2.Dispose(); /* * /// // added docs are in the ram buffer * /// for (int x = 15; x < 20; x++) { * /// writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2)); * /// System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); * /// } * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// // delete from the ram buffer * /// writer.DeleteDocuments(new Term("id", Integer.toString(13))); * /// * /// Term id3 = new Term("id", Integer.toString(3)); * /// * /// // delete from the 1st segment * /// writer.DeleteDocuments(id3); * /// * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// //System.out * /// // .println("segdels1:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// // we cause a merge to happen * /// fsmp.doMerge = true; * /// fsmp.start = 0; * /// fsmp.length = 2; * /// System.out.println("maybeMerge "+writer.SegmentInfos); * /// * /// SegmentInfo info0 = writer.SegmentInfos[0]; * /// SegmentInfo info1 = writer.SegmentInfos[1]; * /// * /// writer.MaybeMerge(); * /// System.out.println("maybeMerge after "+writer.SegmentInfos); * /// // there should be docs in RAM * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// // assert we've merged the 1 and 2 segments * /// // and still have a segment leftover == 2 * /// Assert.AreEqual(2, writer.SegmentInfos.Size()); * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos)); * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos)); * /// * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// IndexReader r = writer.GetReader(); * /// IndexReader r1 = r.getSequentialSubReaders()[0]; * /// printDelDocs(r1.GetLiveDocs()); * /// int[] docs = toDocsArray(id3, null, r); * /// System.out.println("id3 docs:"+Arrays.toString(docs)); * /// // there shouldn't be any docs for id:3 * /// Assert.IsTrue(docs is null); * /// r.Dispose(); * /// * /// part2(writer, fsmp); * /// */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); //System.out.println("close"); writer.Dispose(); dir.Dispose(); }
protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled) { int numDocumentsToIndex = 10 + AtLeast(30); AtomicInt32 numDocs = new AtomicInt32(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetRAMBufferSizeMB(maxRamMB); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsFalse(flushPolicy.FlushOnDocCount); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms); Assert.IsTrue(flushPolicy.FlushOnRAM); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due numThreads=" + numThreads); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); if (flushPolicy.HasMarkedPending) { Assert.IsTrue(maxRAMBytes < flushControl.peakActiveBytes); } if (ensureNotStalled) { Assert.IsFalse(docsWriter.flushControl.stallControl.WasStalled); } writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes); dir.Dispose(); }
public virtual void DoTestLongPostingsNoPositions(IndexOptions options) { // Don't use TestUtil.getTempDir so that we own the // randomness (ie same seed will point to same dir): Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random.NextInt64())); int NUM_DOCS = AtLeast(2000); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } string s1 = GetRandomTerm(null); string s2 = GetRandomTerm(s1); if (VERBOSE) { Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); /* * for(int idx=0;idx<s1.Length();idx++) { * System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); * } * for(int idx=0;idx<s2.Length();idx++) { * System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); * } */ } FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); for (int idx = 0; idx < NUM_DOCS; idx++) { if (Random.NextBoolean()) { isS1.Set(idx); } } IndexReader r; if (true) { IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()); iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random.NextDouble()); iwc.SetMaxBufferedDocs(-1); RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = options; for (int idx = 0; idx < NUM_DOCS; idx++) { Document doc = new Document(); string s = isS1.Get(idx) ? s1 : s2; Field f = NewField("field", s, ft); int count = TestUtil.NextInt32(Random, 1, 4); for (int ct = 0; ct < count; ct++) { doc.Add(f); } riw.AddDocument(doc); } r = riw.GetReader(); riw.Dispose(); } else { #pragma warning disable 162 r = DirectoryReader.Open(dir); #pragma warning restore 162 } /* * if (VERBOSE) { * System.out.println("TEST: terms"); * TermEnum termEnum = r.Terms(); * while(termEnum.Next()) { * System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length()); * Assert.IsTrue(termEnum.DocFreq() > 0); * System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length()); * System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length()); * final String s = termEnum.Term().Text(); * for(int idx=0;idx<s.Length();idx++) { * System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); * } * } * } */ Assert.AreEqual(NUM_DOCS, r.NumDocs); Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string term; bool doS1; if (Random.NextBoolean()) { term = s1; doS1 = true; } else { term = s2; doS1 = false; } if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term); } DocsEnum docs; DocsEnum postings; if (options == IndexOptions.DOCS_ONLY) { docs = TestUtil.Docs(Random, r, "field", new BytesRef(term), null, null, DocsFlags.NONE); postings = null; } else { docs = postings = TestUtil.Docs(Random, r, "field", new BytesRef(term), null, null, DocsFlags.FREQS); Debug.Assert(postings != null); } Debug.Assert(docs != null); int docID = -1; while (docID < DocIdSetIterator.NO_MORE_DOCS) { int what = Random.Next(3); if (what == 0) { if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do next()"); } // nextDoc int expected = docID + 1; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = docs.NextDoc(); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3 && postings != null) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4); } } else { // advance int targetDocID; if (docID == -1) { targetDocID = Random.Next(NUM_DOCS + 1); } else { targetDocID = docID + TestUtil.NextInt32(Random, 1, NUM_DOCS - docID); } if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); } int expected = targetDocID; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = docs.Advance(targetDocID); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3 && postings != null) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4, "got invalid freq=" + freq); } } } } r.Dispose(); dir.Dispose(); }
public virtual void TestInvalidValues() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Test IndexDeletionPolicy Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.IndexDeletionPolicy.GetType()); conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.IndexDeletionPolicy.GetType()); try { conf.SetIndexDeletionPolicy(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } // Test MergeScheduler #if !FEATURE_CONCURRENTMERGESCHEDULER Assert.AreEqual(typeof(TaskMergeScheduler), conf.MergeScheduler.GetType()); #else Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType()); #endif conf.SetMergeScheduler(new SerialMergeScheduler()); Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType()); try { conf.SetMergeScheduler(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } // Test Similarity: // we shouldnt assert what the default is, just that its not null. Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity); conf.SetSimilarity(new MySimilarity()); Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType()); try { conf.SetSimilarity(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } // Test IndexingChain Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain); conf.SetIndexingChain(new MyIndexingChain()); Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType()); try { conf.SetIndexingChain(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } try { conf.SetMaxBufferedDeleteTerms(0); Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { conf.SetMaxBufferedDocs(1); Assert.Fail("should not have succeeded to set maxBufferedDocs to 1"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { // Disable both MAX_BUF_DOCS and RAM_SIZE_MB conf.SetMaxBufferedDocs(4); conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); try { conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } // Test setReaderTermsIndexDivisor try { conf.SetReaderTermsIndexDivisor(0); Assert.Fail("should not have succeeded to set termsIndexDivisor to 0"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } // Setting to -1 is ok conf.SetReaderTermsIndexDivisor(-1); try { conf.SetReaderTermsIndexDivisor(-2); Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(2048); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(0); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } // Test MergePolicy Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType()); conf.SetMergePolicy(new LogDocMergePolicy()); Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType()); try { conf.SetMergePolicy(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } }
public virtual void TestLongPostings_Mem() { // Don't use TestUtil.getTempDir so that we own the // randomness (ie same seed will point to same dir): Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random.NextInt64())); int NUM_DOCS = AtLeast(2000); if (Verbose) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } string s1 = GetRandomTerm(null); string s2 = GetRandomTerm(s1); if (Verbose) { Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); /* * for(int idx=0;idx<s1.length();idx++) { * System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); * } * for(int idx=0;idx<s2.length();idx++) { * System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); * } */ } FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); for (int idx = 0; idx < NUM_DOCS; idx++) { if (Random.NextBoolean()) { isS1.Set(idx); } } IndexReader r; IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()); iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random.NextDouble()); iwc.SetMaxBufferedDocs(-1); RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc); for (int idx = 0; idx < NUM_DOCS; idx++) { Document doc = new Document(); string s = isS1.Get(idx) ? s1 : s2; Field f = NewTextField("field", s, Field.Store.NO); int count = TestUtil.NextInt32(Random, 1, 4); for (int ct = 0; ct < count; ct++) { doc.Add(f); } riw.AddDocument(doc); } r = riw.GetReader(); riw.Dispose(); /* * if (VERBOSE) { * System.out.println("TEST: terms"); * TermEnum termEnum = r.Terms(); * while(termEnum.Next()) { * System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().length()); * Assert.IsTrue(termEnum.DocFreq() > 0); * System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.length()); * System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.length()); * final String s = termEnum.Term().Text(); * for(int idx=0;idx<s.length();idx++) { * System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); * } * } * } */ Assert.AreEqual(NUM_DOCS, r.NumDocs); Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string term; bool doS1; if (Random.NextBoolean()) { term = s1; doS1 = true; } else { term = s2; doS1 = false; } if (Verbose) { Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1); } DocsAndPositionsEnum postings = MultiFields.GetTermPositionsEnum(r, null, "field", new BytesRef(term)); int docID = -1; while (docID < DocIdSetIterator.NO_MORE_DOCS) { int what = Random.Next(3); if (what == 0) { if (Verbose) { Console.WriteLine("TEST: docID=" + docID + "; do next()"); } // nextDoc int expected = docID + 1; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = postings.NextDoc(); if (Verbose) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { Assert.AreEqual(pos, postings.NextPosition()); if (Random.NextBoolean()) { var dummy = postings.GetPayload(); if (Random.NextBoolean()) { dummy = postings.GetPayload(); // get it again } } } } } else { // advance int targetDocID; if (docID == -1) { targetDocID = Random.Next(NUM_DOCS + 1); } else { targetDocID = docID + TestUtil.NextInt32(Random, 1, NUM_DOCS - docID); } if (Verbose) { Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); } int expected = targetDocID; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = postings.Advance(targetDocID); if (Verbose) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { Assert.AreEqual(pos, postings.NextPosition()); if (Random.NextBoolean()) { var dummy = postings.GetPayload(); if (Random.NextBoolean()) { dummy = postings.GetPayload(); // get it again } } } } } } } r.Dispose(); dir.Dispose(); }
public virtual void TestDeletes1() { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergeScheduler(new SerialMergeScheduler()); iwc.SetMaxBufferedDocs(5000); iwc.SetRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(this, false); iwc.SetMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit1"); writer.Commit(); Assert.AreEqual(1, writer.SegmentCount); for (int x = 5; x < 10; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "2", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit2"); writer.Commit(); Assert.AreEqual(2, writer.SegmentCount); for (int x = 10; x < 15; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.DeleteDocuments(new Term("id", "1")); writer.DeleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.Flush(false, false); Assert.IsTrue(writer.BufferedUpdatesStreamAny); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.Reader; Assert.IsFalse(writer.BufferedUpdatesStreamAny); r1.Dispose(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.DeleteDocuments(new Term("id", "2")); writer.Flush(false, false); fsmp = (RangeMergePolicy)writer.Config.MergePolicy; fsmp.DoMerge = true; fsmp.Start = 0; fsmp.Length = 2; writer.MaybeMerge(); Assert.AreEqual(2, writer.SegmentCount); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.Reader; int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2); Assert.IsTrue(id2docs == null); r2.Dispose(); /* /// // added docs are in the ram buffer /// for (int x = 15; x < 20; x++) { /// writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2)); /// System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); /// } /// Assert.IsTrue(writer.numRamDocs() > 0); /// // delete from the ram buffer /// writer.DeleteDocuments(new Term("id", Integer.toString(13))); /// /// Term id3 = new Term("id", Integer.toString(3)); /// /// // delete from the 1st segment /// writer.DeleteDocuments(id3); /// /// Assert.IsTrue(writer.numRamDocs() > 0); /// /// //System.out /// // .println("segdels1:" + writer.docWriter.deletesToString()); /// /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); /// /// // we cause a merge to happen /// fsmp.doMerge = true; /// fsmp.start = 0; /// fsmp.Length = 2; /// System.out.println("maybeMerge "+writer.SegmentInfos); /// /// SegmentInfo info0 = writer.SegmentInfos.Info(0); /// SegmentInfo info1 = writer.SegmentInfos.Info(1); /// /// writer.MaybeMerge(); /// System.out.println("maybeMerge after "+writer.SegmentInfos); /// // there should be docs in RAM /// Assert.IsTrue(writer.numRamDocs() > 0); /// /// // assert we've merged the 1 and 2 segments /// // and still have a segment leftover == 2 /// Assert.AreEqual(2, writer.SegmentInfos.Size()); /// Assert.IsFalse(segThere(info0, writer.SegmentInfos)); /// Assert.IsFalse(segThere(info1, writer.SegmentInfos)); /// /// //System.out.println("segdels2:" + writer.docWriter.deletesToString()); /// /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); /// /// IndexReader r = writer.GetReader(); /// IndexReader r1 = r.getSequentialSubReaders()[0]; /// printDelDocs(r1.GetLiveDocs()); /// int[] docs = toDocsArray(id3, null, r); /// System.out.println("id3 docs:"+Arrays.toString(docs)); /// // there shouldn't be any docs for id:3 /// Assert.IsTrue(docs == null); /// r.Dispose(); /// /// part2(writer, fsmp); /// */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); //System.out.println("close"); writer.Dispose(); dir.Dispose(); }