public virtual void TestTotalBytesSize() { Directory d = NewDirectory(); if (d is MockDirectoryWrapper) { ((MockDirectoryWrapper)d).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMaxBufferedDocs(5); iwc.SetMergeScheduler(new TrackingCMS()); if (TestUtil.GetPostingsFormat("id").Equals("SimpleText")) { // no iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())); } RandomIndexWriter w = new RandomIndexWriter(Random(), d, iwc); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new StringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); if (Random().NextBoolean()) { w.DeleteDocuments(new Term("id", "" + Random().Next(i + 1))); } } Assert.IsTrue(((TrackingCMS)w.w.Config.MergeScheduler).TotMergedBytes != 0); w.Dispose(); d.Dispose(); }
public virtual void TestIndexWriterDirtSimple() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); TieredMergePolicy tmp = NewTieredMergePolicy(); iwc.SetMergePolicy(tmp); iwc.SetMaxBufferedDocs(2); tmp.MaxMergeAtOnce = 100; tmp.SegmentsPerTier = 100; tmp.ForceMergeDeletesPctAllowed = 30.0; IndexWriter w = new IndexWriter(dir, iwc); int numDocs = 2; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO)); w.AddDocument(doc); } Assert.AreEqual(numDocs, w.MaxDoc); Assert.AreEqual(numDocs, w.NumDocs); }
// [Test, Timeout(300000)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestEmptyDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); // make sure that the fact that documents might be empty is not a problem Document emptyDoc = new Document(); int numDocs = Random().NextBoolean() ? 1 : AtLeast(1000); for (int i = 0; i < numDocs; ++i) { iw.AddDocument(emptyDoc); } iw.Commit(); DirectoryReader rd = DirectoryReader.Open(dir); for (int i = 0; i < numDocs; ++i) { Document doc = rd.Document(i); Assert.IsNotNull(doc); Assert.IsTrue(doc.Fields.Count <= 0); } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestReadSkip() { Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); FieldType ft = new FieldType(); ft.IsStored = true; ft.Freeze(); string @string = TestUtil.RandomSimpleString(Random(), 50); var bytes = @string.GetBytes(Encoding.UTF8); long l = Random().NextBoolean() ? Random().Next(42) : Random().NextLong(); int i = Random().NextBoolean() ? Random().Next(42) : Random().Next(); float f = Random().NextFloat(); double d = Random().NextDouble(); IList <Field> fields = Arrays.AsList(new Field("bytes", bytes, ft), new Field("string", @string, ft), new Int64Field("long", l, Field.Store.YES), new Int32Field("int", i, Field.Store.YES), new SingleField("float", f, Field.Store.YES), new DoubleField("double", d, Field.Store.YES) ); for (int k = 0; k < 100; ++k) { Document doc = new Document(); foreach (Field fld in fields) { doc.Add(fld); } iw.w.AddDocument(doc); } iw.Commit(); DirectoryReader reader = DirectoryReader.Open(dir); int docID = Random().Next(100); foreach (Field fld in fields) { string fldName = fld.Name; Document sDoc = reader.Document(docID, Collections.Singleton(fldName)); IIndexableField sField = sDoc.GetField(fldName); if (typeof(Field) == fld.GetType()) { Assert.AreEqual(fld.GetBinaryValue(), sField.GetBinaryValue()); Assert.AreEqual(fld.GetStringValue(), sField.GetStringValue()); } else { #pragma warning disable 612, 618 Assert.AreEqual(fld.GetNumericValue(), sField.GetNumericValue()); #pragma warning restore 612, 618 } } reader.Dispose(); iw.Dispose(); dir.Dispose(); }
private IndexWriterConfig NewWriterConfig() { IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); // prevent any merges by default. conf.SetMergePolicy(NoMergePolicy.COMPOUND_FILES); return(conf); }
[Slow] // LUCENENET: occasionally public virtual void TestStallControl() { // LUCENENET specific - disable the test if asserts are not enabled AssumeTrue("This test requires asserts to be enabled.", Debugging.AssertsEnabled); int[] numThreads = new int[] { 4 + Random.Next(8), 1 }; int numDocumentsToIndex = 50 + Random.Next(50); for (int i = 0; i < numThreads.Length; i++) { AtomicInt32 numDocs = new AtomicInt32(numDocumentsToIndex); MockDirectoryWrapper dir = NewMockDirectory(); // mock a very slow harddisk sometimes here so that flushing is very slow dir.Throttling = Throttling.SOMETIMES; IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy(); iwc.SetFlushPolicy(flushPolicy); DocumentsWriterPerThreadPool threadPool = new DocumentsWriterPerThreadPool(numThreads[i] == 1 ? 1 : 2); iwc.SetIndexerThreadPool(threadPool); // with such a small ram buffer we should be stalled quiet quickly iwc.SetRAMBufferSizeMB(0.25); IndexWriter writer = new IndexWriter(dir, iwc); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(numDocs, writer, lineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due"); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); if (numThreads[i] == 1) { assertFalse("single thread must not block numThreads: " + numThreads[i], docsWriter.flushControl.stallControl.HasBlocked); } if (docsWriter.flushControl.peakNetBytes > (2d * iwc.RAMBufferSizeMB * 1024d * 1024d)) { Assert.IsTrue(docsWriter.flushControl.stallControl.WasStalled); } AssertActiveBytesAfter(flushControl); writer.Dispose(true); dir.Dispose(); } }
public virtual void TestFlushDocCount() { // LUCENENET specific - disable the test if asserts are not enabled AssumeTrue("This test requires asserts to be enabled.", Debugging.AssertsEnabled); int[] numThreads = new int[] { 2 + AtLeast(1), 1 }; for (int i = 0; i < numThreads.Length; i++) { int numDocumentsToIndex = 50 + AtLeast(30); AtomicInt32 numDocs = new AtomicInt32(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new DocumentsWriterPerThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetMaxBufferedDocs(2 + AtLeast(10)); iwc.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsTrue(flushPolicy.FlushOnDocCount); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms); Assert.IsFalse(flushPolicy.FlushOnRAM); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads[i]]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(numDocs, writer, lineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due numThreads=" + numThreads[i]); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); Assert.IsTrue(flushPolicy.peakDocCountWithoutFlush <= iwc.MaxBufferedDocs, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes); dir.Dispose(); } }
// [Test, Timeout(300000)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestConcurrentReads() { Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); // make sure the readers are properly cloned Document doc = new Document(); Field field = new StringField("fld", "", Field.Store.YES); doc.Add(field); int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; ++i) { field.SetStringValue("" + i); iw.AddDocument(doc); } iw.Commit(); DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); int concurrentReads = AtLeast(5); int readsPerThread = AtLeast(50); IList <ThreadClass> readThreads = new List <ThreadClass>(); AtomicObject <Exception> ex = new AtomicObject <Exception>(); for (int i = 0; i < concurrentReads; ++i) { readThreads.Add(new ThreadAnonymousInnerClassHelper(numDocs, rd, searcher, readsPerThread, ex, i)); } foreach (ThreadClass thread in readThreads) { thread.Start(); } foreach (ThreadClass thread in readThreads) { thread.Join(); } rd.Dispose(); if (ex.Value != null) { throw ex.Value; } iw.Dispose(); dir.Dispose(); }
public virtual void TestConcurrentReads() { using Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 30)); using RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConf); // make sure the readers are properly cloned Document doc = new Document(); Field field = new StringField("fld", "", Field.Store.YES); doc.Add(field); int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; ++i) { field.SetStringValue("" + i); iw.AddDocument(doc); } iw.Commit(); AtomicReference <Exception> ex = new AtomicReference <Exception>(); using (DirectoryReader rd = DirectoryReader.Open(dir)) { IndexSearcher searcher = new IndexSearcher(rd); int concurrentReads = AtLeast(5); int readsPerThread = AtLeast(50); IList <ThreadJob> readThreads = new JCG.List <ThreadJob>(); for (int i = 0; i < concurrentReads; ++i) { readThreads.Add(new ThreadAnonymousClass(numDocs, rd, searcher, readsPerThread, ex)); } foreach (ThreadJob thread in readThreads) { thread.Start(); } foreach (ThreadJob thread in readThreads) { thread.Join(); } } // rd.Dispose(); if (ex.Value != null) { ExceptionDispatchInfo.Capture(ex.Value).Throw(); // LUCENENET: Rethrow to preserve stack details from the other thread } }
public virtual void TestPartialMerge() { int num = AtLeast(10); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); conf.SetMergeScheduler(new SerialMergeScheduler()); TieredMergePolicy tmp = NewTieredMergePolicy(); conf.SetMergePolicy(tmp); conf.SetMaxBufferedDocs(2); tmp.MaxMergeAtOnce = 3; tmp.SegmentsPerTier = 6; IndexWriter w = new IndexWriter(dir, conf); int maxCount = 0; int numDocs = TestUtil.NextInt32(Random, 20, 100); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO)); w.AddDocument(doc); int count = w.SegmentCount; maxCount = Math.Max(count, maxCount); Assert.IsTrue(count >= maxCount - 3, "count=" + count + " maxCount=" + maxCount); } w.Flush(true, true); int segmentCount = w.SegmentCount; int targetCount = TestUtil.NextInt32(Random, 1, segmentCount); if (VERBOSE) { Console.WriteLine("TEST: merge to " + targetCount + " segs (current count=" + segmentCount + ")"); } w.ForceMerge(targetCount); Assert.AreEqual(targetCount, w.SegmentCount); w.Dispose(); dir.Dispose(); } }
public virtual void TestUpdateDifferentDocsInDifferentGens() { // update same document multiple times across generations Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); conf.SetMaxBufferedDocs(4); IndexWriter writer = new IndexWriter(dir, conf); int numDocs = AtLeast(10); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("id", "doc" + i, Store.NO)); long value = Random.Next(); doc.Add(new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.ToBytes(value))); doc.Add(new NumericDocValuesField("cf", value * 2)); writer.AddDocument(doc); } int numGens = AtLeast(5); BytesRef scratch = new BytesRef(); for (int i = 0; i < numGens; i++) { int doc = Random.Next(numDocs); Term t = new Term("id", "doc" + doc); long value = Random.NextInt64(); writer.UpdateBinaryDocValue(t, "f", TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(t, "cf", value * 2); DirectoryReader reader = DirectoryReader.Open(writer, true); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; BinaryDocValues fbdv = r.GetBinaryDocValues("f"); NumericDocValues cfndv = r.GetNumericDocValues("cf"); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cfndv.Get(j), TestBinaryDocValuesUpdates.GetValue(fbdv, j, scratch) * 2); } } reader.Dispose(); } writer.Dispose(); dir.Dispose(); }
public virtual void TestForceMergeDeletes() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); TieredMergePolicy tmp = NewTieredMergePolicy(); conf.SetMergePolicy(tmp); conf.SetMaxBufferedDocs(4); tmp.MaxMergeAtOnce = 100; tmp.SegmentsPerTier = 100; tmp.ForceMergeDeletesPctAllowed = 30.0; IndexWriter w = new IndexWriter(dir, conf); for (int i = 0; i < 80; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO)); w.AddDocument(doc); } Assert.AreEqual(80, w.MaxDoc); Assert.AreEqual(80, w.NumDocs); if (VERBOSE) { Console.WriteLine("\nTEST: delete docs"); } w.DeleteDocuments(new Term("content", "0")); w.ForceMergeDeletes(); Assert.AreEqual(80, w.MaxDoc); Assert.AreEqual(60, w.NumDocs); if (VERBOSE) { Console.WriteLine("\nTEST: forceMergeDeletes2"); } ((TieredMergePolicy)w.Config.MergePolicy).ForceMergeDeletesPctAllowed = 10.0; w.ForceMergeDeletes(); Assert.AreEqual(60, w.NumDocs); Assert.AreEqual(60, w.MaxDoc); w.Dispose(); dir.Dispose(); }
public virtual void TestMaxMergeCount() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); int maxMergeCount = TestUtil.NextInt(Random(), 1, 5); int maxMergeThreads = TestUtil.NextInt(Random(), 1, maxMergeCount); CountdownEvent enoughMergesWaiting = new CountdownEvent(maxMergeCount); AtomicInteger runningMergeCount = new AtomicInteger(0); AtomicBoolean failed = new AtomicBoolean(); if (VERBOSE) { Console.WriteLine("TEST: maxMergeCount=" + maxMergeCount + " maxMergeThreads=" + maxMergeThreads); } ConcurrentMergeScheduler cms = new ConcurrentMergeSchedulerAnonymousInnerClassHelper(this, maxMergeCount, enoughMergesWaiting, runningMergeCount, failed); cms.SetMaxMergesAndThreads(maxMergeCount, maxMergeThreads); iwc.SetMergeScheduler(cms); iwc.SetMaxBufferedDocs(2); TieredMergePolicy tmp = new TieredMergePolicy(); iwc.SetMergePolicy(tmp); tmp.MaxMergeAtOnce = 2; tmp.SegmentsPerTier = 2; IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); doc.Add(NewField("field", "field", TextField.TYPE_NOT_STORED)); while (enoughMergesWaiting.CurrentCount != 0 && !failed.Get()) { for (int i = 0; i < 10; i++) { w.AddDocument(doc); } } w.Dispose(false); dir.Dispose(); }
public virtual void TestTonsOfUpdates() { // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM Directory dir = NewDirectory(); Random random = Random; IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc IndexWriter writer = new IndexWriter(dir, conf); // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) int numDocs = AtLeast(20000); int numBinaryFields = AtLeast(5); int numTerms = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs ISet <string> updateTerms = new JCG.HashSet <string>(); while (updateTerms.Count < numTerms) { updateTerms.Add(TestUtil.RandomSimpleString(random)); } // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); // build a large index with many BDV fields and update terms for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10); for (int j = 0; j < numUpdateTerms; j++) { doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO)); } for (int j = 0; j < numBinaryFields; j++) { long val = random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val))); doc.Add(new NumericDocValuesField("cf" + j, val * 2)); } writer.AddDocument(doc); } writer.Commit(); // commit so there's something to apply to // set to flush every 2048 bytes (approximately every 12 updates), so we get // many flushes during binary updates writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024); int numUpdates = AtLeast(100); // System.out.println("numUpdates=" + numUpdates); for (int i = 0; i < numUpdates; i++) { int field = random.Next(numBinaryFields); Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms)); long value = random.Next(); writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2); } writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { for (int i = 0; i < numBinaryFields; i++) { AtomicReader r = context.AtomicReader; BinaryDocValues f = r.GetBinaryDocValues("f" + i); NumericDocValues cf = r.GetNumericDocValues("cf" + i); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j); } } } reader.Dispose(); dir.Dispose(); }
public virtual void TestInvalidValues() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Test IndexDeletionPolicy Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.IndexDeletionPolicy.GetType()); conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.IndexDeletionPolicy.GetType()); try { conf.SetIndexDeletionPolicy(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } // Test MergeScheduler #if !FEATURE_CONCURRENTMERGESCHEDULER Assert.AreEqual(typeof(TaskMergeScheduler), conf.MergeScheduler.GetType()); #else Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType()); #endif conf.SetMergeScheduler(new SerialMergeScheduler()); Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType()); try { conf.SetMergeScheduler(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } // Test Similarity: // we shouldnt assert what the default is, just that its not null. Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity); conf.SetSimilarity(new MySimilarity()); Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType()); try { conf.SetSimilarity(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } // Test IndexingChain Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain); conf.SetIndexingChain(new MyIndexingChain()); Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType()); try { conf.SetIndexingChain(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } try { conf.SetMaxBufferedDeleteTerms(0); Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { conf.SetMaxBufferedDocs(1); Assert.Fail("should not have succeeded to set maxBufferedDocs to 1"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { // Disable both MAX_BUF_DOCS and RAM_SIZE_MB conf.SetMaxBufferedDocs(4); conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); try { conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } // Test setReaderTermsIndexDivisor try { conf.SetReaderTermsIndexDivisor(0); Assert.Fail("should not have succeeded to set termsIndexDivisor to 0"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } // Setting to -1 is ok conf.SetReaderTermsIndexDivisor(-1); try { conf.SetReaderTermsIndexDivisor(-2); Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(2048); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(0); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // this is expected } // Test MergePolicy Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType()); conf.SetMergePolicy(new LogDocMergePolicy()); Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType()); try { conf.SetMergePolicy(null); Assert.Fail(); } #pragma warning disable 168 catch (System.ArgumentException e) #pragma warning restore 168 { // ok } }
public virtual void TestDeletes1() { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new J2N.Randomizer(Random.NextInt64()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergeScheduler(new SerialMergeScheduler()); iwc.SetMaxBufferedDocs(5000); iwc.SetRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(this, false); iwc.SetMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit1"); writer.Commit(); Assert.AreEqual(1, writer.SegmentCount); for (int x = 5; x < 10; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "2", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit2"); writer.Commit(); Assert.AreEqual(2, writer.SegmentCount); for (int x = 10; x < 15; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.DeleteDocuments(new Term("id", "1")); writer.DeleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.Flush(false, false); Assert.IsTrue(writer.bufferedUpdatesStream.Any()); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.GetReader(); Assert.IsFalse(writer.bufferedUpdatesStream.Any()); r1.Dispose(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.DeleteDocuments(new Term("id", "2")); writer.Flush(false, false); fsmp = (RangeMergePolicy)writer.Config.MergePolicy; fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; writer.MaybeMerge(); Assert.AreEqual(2, writer.SegmentCount); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.GetReader(); int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2); Assert.IsTrue(id2docs is null); r2.Dispose(); /* * /// // added docs are in the ram buffer * /// for (int x = 15; x < 20; x++) { * /// writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2)); * /// System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); * /// } * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// // delete from the ram buffer * /// writer.DeleteDocuments(new Term("id", Integer.toString(13))); * /// * /// Term id3 = new Term("id", Integer.toString(3)); * /// * /// // delete from the 1st segment * /// writer.DeleteDocuments(id3); * /// * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// //System.out * /// // .println("segdels1:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// // we cause a merge to happen * /// fsmp.doMerge = true; * /// fsmp.start = 0; * /// fsmp.length = 2; * /// System.out.println("maybeMerge "+writer.SegmentInfos); * /// * /// SegmentInfo info0 = writer.SegmentInfos[0]; * /// SegmentInfo info1 = writer.SegmentInfos[1]; * /// * /// writer.MaybeMerge(); * /// System.out.println("maybeMerge after "+writer.SegmentInfos); * /// // there should be docs in RAM * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// // assert we've merged the 1 and 2 segments * /// // and still have a segment leftover == 2 * /// Assert.AreEqual(2, writer.SegmentInfos.Size()); * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos)); * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos)); * /// * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// IndexReader r = writer.GetReader(); * /// IndexReader r1 = r.getSequentialSubReaders()[0]; * /// printDelDocs(r1.GetLiveDocs()); * /// int[] docs = toDocsArray(id3, null, r); * /// System.out.println("id3 docs:"+Arrays.toString(docs)); * /// // there shouldn't be any docs for id:3 * /// Assert.IsTrue(docs is null); * /// r.Dispose(); * /// * /// part2(writer, fsmp); * /// */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); //System.out.println("close"); writer.Dispose(); dir.Dispose(); }
public virtual void TestDeletes1() { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergeScheduler(new SerialMergeScheduler()); iwc.SetMaxBufferedDocs(5000); iwc.SetRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(this, false); iwc.SetMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit1"); writer.Commit(); Assert.AreEqual(1, writer.SegmentCount); for (int x = 5; x < 10; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "2", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit2"); writer.Commit(); Assert.AreEqual(2, writer.SegmentCount); for (int x = 10; x < 15; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.DeleteDocuments(new Term("id", "1")); writer.DeleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.Flush(false, false); Assert.IsTrue(writer.BufferedUpdatesStreamAny); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.Reader; Assert.IsFalse(writer.BufferedUpdatesStreamAny); r1.Dispose(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.DeleteDocuments(new Term("id", "2")); writer.Flush(false, false); fsmp = (RangeMergePolicy)writer.Config.MergePolicy; fsmp.DoMerge = true; fsmp.Start = 0; fsmp.Length = 2; writer.MaybeMerge(); Assert.AreEqual(2, writer.SegmentCount); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.Reader; int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2); Assert.IsTrue(id2docs == null); r2.Dispose(); /* /// // added docs are in the ram buffer /// for (int x = 15; x < 20; x++) { /// writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2)); /// System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); /// } /// Assert.IsTrue(writer.numRamDocs() > 0); /// // delete from the ram buffer /// writer.DeleteDocuments(new Term("id", Integer.toString(13))); /// /// Term id3 = new Term("id", Integer.toString(3)); /// /// // delete from the 1st segment /// writer.DeleteDocuments(id3); /// /// Assert.IsTrue(writer.numRamDocs() > 0); /// /// //System.out /// // .println("segdels1:" + writer.docWriter.deletesToString()); /// /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); /// /// // we cause a merge to happen /// fsmp.doMerge = true; /// fsmp.start = 0; /// fsmp.Length = 2; /// System.out.println("maybeMerge "+writer.SegmentInfos); /// /// SegmentInfo info0 = writer.SegmentInfos.Info(0); /// SegmentInfo info1 = writer.SegmentInfos.Info(1); /// /// writer.MaybeMerge(); /// System.out.println("maybeMerge after "+writer.SegmentInfos); /// // there should be docs in RAM /// Assert.IsTrue(writer.numRamDocs() > 0); /// /// // assert we've merged the 1 and 2 segments /// // and still have a segment leftover == 2 /// Assert.AreEqual(2, writer.SegmentInfos.Size()); /// Assert.IsFalse(segThere(info0, writer.SegmentInfos)); /// Assert.IsFalse(segThere(info1, writer.SegmentInfos)); /// /// //System.out.println("segdels2:" + writer.docWriter.deletesToString()); /// /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); /// /// IndexReader r = writer.GetReader(); /// IndexReader r1 = r.getSequentialSubReaders()[0]; /// printDelDocs(r1.GetLiveDocs()); /// int[] docs = toDocsArray(id3, null, r); /// System.out.println("id3 docs:"+Arrays.toString(docs)); /// // there shouldn't be any docs for id:3 /// Assert.IsTrue(docs == null); /// r.Dispose(); /// /// part2(writer, fsmp); /// */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); //System.out.println("close"); writer.Dispose(); dir.Dispose(); }
/// <summary> /// create a new index writer config with random defaults using the specified random </summary> public static IndexWriterConfig NewIndexWriterConfig(Random r, LuceneVersion v, Analyzer a) { IndexWriterConfig c = new IndexWriterConfig(v, a); c.SetSimilarity(ClassEnvRule.Similarity); if (VERBOSE) { // Even though TestRuleSetupAndRestoreClassEnv calls // InfoStream.setDefault, we do it again here so that // the PrintStreamInfoStream.messageID increments so // that when there are separate instances of // IndexWriter created we see "IW 0", "IW 1", "IW 2", // ... instead of just always "IW 0": c.InfoStream = new TestRuleSetupAndRestoreClassEnv.ThreadNameFixingPrintStreamInfoStream(Console.Out); } if (r.NextBoolean()) { c.SetMergeScheduler(new SerialMergeScheduler()); } else if (Rarely(r)) { int maxThreadCount = TestUtil.NextInt(Random(), 1, 4); int maxMergeCount = TestUtil.NextInt(Random(), maxThreadCount, maxThreadCount + 4); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount); c.SetMergeScheduler(cms); } if (r.NextBoolean()) { if (Rarely(r)) { // crazy value c.SetMaxBufferedDocs(TestUtil.NextInt(r, 2, 15)); } else { // reasonable value c.SetMaxBufferedDocs(TestUtil.NextInt(r, 16, 1000)); } } if (r.NextBoolean()) { if (Rarely(r)) { // crazy value c.SetTermIndexInterval(r.NextBoolean() ? TestUtil.NextInt(r, 1, 31) : TestUtil.NextInt(r, 129, 1000)); } else { // reasonable value c.SetTermIndexInterval(TestUtil.NextInt(r, 32, 128)); } } if (r.NextBoolean()) { int maxNumThreadStates = Rarely(r) ? TestUtil.NextInt(r, 5, 20) : TestUtil.NextInt(r, 1, 4); // reasonable value - crazy value if (Rarely(r)) { // Retrieve the package-private setIndexerThreadPool // method: MethodInfo setIndexerThreadPoolMethod = typeof(IndexWriterConfig).GetMethod("SetIndexerThreadPool", new Type[] { typeof(DocumentsWriterPerThreadPool) }); //setIndexerThreadPoolMethod.setAccessible(true); Type clazz = typeof(RandomDocumentsWriterPerThreadPool); ConstructorInfo ctor = clazz.GetConstructor(new[] { typeof(int), typeof(Random) }); //ctor.Accessible = true; // random thread pool setIndexerThreadPoolMethod.Invoke(c, new[] { ctor.Invoke(new object[] { maxNumThreadStates, r }) }); } else { // random thread pool c.SetMaxThreadStates(maxNumThreadStates); } } c.SetMergePolicy(NewMergePolicy(r)); if (Rarely(r)) { c.SetMergedSegmentWarmer(new SimpleMergedSegmentWarmer(c.InfoStream)); } c.SetUseCompoundFile(r.NextBoolean()); c.SetReaderPooling(r.NextBoolean()); c.SetReaderTermsIndexDivisor(TestUtil.NextInt(r, 1, 4)); c.SetCheckIntegrityAtMerge(r.NextBoolean()); return c; }
protected internal virtual void RunFlushByRam(int numThreads, double maxRamMB, bool ensureNotStalled) { int numDocumentsToIndex = 10 + AtLeast(30); AtomicInt32 numDocs = new AtomicInt32(numDocumentsToIndex); Directory dir = NewDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetFlushPolicy(flushPolicy); int numDWPT = 1 + AtLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(numDWPT); iwc.SetIndexerThreadPool(threadPool); iwc.SetRAMBufferSizeMB(maxRamMB); iwc.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); IndexWriter writer = new IndexWriter(dir, iwc); flushPolicy = (MockDefaultFlushPolicy)writer.Config.FlushPolicy; Assert.IsFalse(flushPolicy.FlushOnDocCount); Assert.IsFalse(flushPolicy.FlushOnDeleteTerms); Assert.IsTrue(flushPolicy.FlushOnRAM); DocumentsWriter docsWriter = writer.DocsWriter; Assert.IsNotNull(docsWriter); DocumentsWriterFlushControl flushControl = docsWriter.flushControl; Assert.AreEqual(0, flushControl.FlushBytes, " bytes must be 0 after init"); IndexThread[] threads = new IndexThread[numThreads]; for (int x = 0; x < threads.Length; x++) { threads[x] = new IndexThread(this, numDocs, numThreads, writer, LineDocFile, false); threads[x].Start(); } for (int x = 0; x < threads.Length; x++) { threads[x].Join(); } long maxRAMBytes = (long)(iwc.RAMBufferSizeMB * 1024.0 * 1024.0); Assert.AreEqual(0, flushControl.FlushBytes, " all flushes must be due numThreads=" + numThreads); Assert.AreEqual(numDocumentsToIndex, writer.NumDocs); Assert.AreEqual(numDocumentsToIndex, writer.MaxDoc); Assert.IsTrue(flushPolicy.PeakBytesWithoutFlush <= maxRAMBytes, "peak bytes without flush exceeded watermark"); AssertActiveBytesAfter(flushControl); if (flushPolicy.HasMarkedPending) { Assert.IsTrue(maxRAMBytes < flushControl.peakActiveBytes); } if (ensureNotStalled) { Assert.IsFalse(docsWriter.flushControl.stallControl.WasStalled); } writer.Dispose(); Assert.AreEqual(0, flushControl.ActiveBytes); dir.Dispose(); }
public virtual void TestDeletesCheckIndexOutput() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMaxBufferedDocs(2); IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); Document doc = new Document(); doc.Add(NewField("field", "0", StringField.TYPE_NOT_STORED)); w.AddDocument(doc); doc = new Document(); doc.Add(NewField("field", "1", StringField.TYPE_NOT_STORED)); w.AddDocument(doc); w.Commit(); Assert.AreEqual(1, w.SegmentCount); w.DeleteDocuments(new Term("field", "0")); w.Commit(); Assert.AreEqual(1, w.SegmentCount); w.Dispose(); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); //MemoryStream bos = new MemoryStream(1024); CheckIndex checker = new CheckIndex(dir); checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); CheckIndex.Status indexStatus = checker.DoCheckIndex(null); Assert.IsTrue(indexStatus.Clean); checker.FlushInfoStream(); string s = bos.ToString(); // Segment should have deletions: Assert.IsTrue(s.Contains("has deletions"), "string was: " + s); w = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone()); w.ForceMerge(1); w.Dispose(); bos = new ByteArrayOutputStream(1024); checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); indexStatus = checker.DoCheckIndex(null); Assert.IsTrue(indexStatus.Clean); checker.FlushInfoStream(); s = bos.ToString(); Assert.IsFalse(s.Contains("has deletions")); dir.Dispose(); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestWriteReadMerge() { // get another codec, other than the default: so we are merging segments across different codecs Codec otherCodec; if ("SimpleText".Equals(Codec.Default.Name, StringComparison.Ordinal)) { otherCodec = new Lucene46Codec(); } else { otherCodec = new SimpleTextCodec(); } Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone()); int docCount = AtLeast(200); var data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { int fieldCount = Rarely() ? RandomInts.NextIntBetween(Random(), 1, 500) : RandomInts.NextIntBetween(Random(), 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { int length = Rarely() ? Random().Next(1000) : Random().Next(10); int max = Rarely() ? 256 : 2; data[i][j] = RandomByteArray(length, max); } } FieldType type = new FieldType(StringField.TYPE_STORED); type.IsIndexed = false; type.Freeze(); Int32Field id = new Int32Field("id", 0, Field.Store.YES); for (int i = 0; i < data.Length; ++i) { Document doc = new Document(); doc.Add(id); id.SetInt32Value(i); for (int j = 0; j < data[i].Length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.Add(f); } iw.w.AddDocument(doc); if (Random().NextBoolean() && (i % (data.Length / 10) == 0)) { iw.w.Dispose(); // test merging against a non-compressing codec if (iwConf.Codec == otherCodec) { iwConf.SetCodec(Codec.Default); } else { iwConf.SetCodec(otherCodec); } iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone()); } } for (int i = 0; i < 10; ++i) { int min = Random().Next(data.Length); int max = min + Random().Next(20); iw.DeleteDocuments(NumericRangeQuery.NewInt32Range("id", min, max, true, false)); } iw.ForceMerge(2); // force merges with deletions iw.Commit(); DirectoryReader ir = DirectoryReader.Open(dir); Assert.IsTrue(ir.NumDocs > 0); int numDocs = 0; for (int i = 0; i < ir.MaxDoc; ++i) { Document doc = ir.Document(i); if (doc == null) { continue; } ++numDocs; int docId = (int)doc.GetField("id").GetNumericValue(); Assert.AreEqual(data[docId].Length + 1, doc.Fields.Count); for (int j = 0; j < data[docId].Length; ++j) { var arr = data[docId][j]; BytesRef arr2Ref = doc.GetBinaryValue("bytes" + j); var arr2 = Arrays.CopyOfRange(arr2Ref.Bytes, arr2Ref.Offset, arr2Ref.Offset + arr2Ref.Length); Assert.AreEqual(arr, arr2); } } Assert.IsTrue(ir.NumDocs <= numDocs); ir.Dispose(); iw.DeleteAll(); iw.Commit(); iw.ForceMerge(1); iw.Dispose(); dir.Dispose(); }
// [Test, LongRunningTest, Timeout(int.MaxValue)] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.IsIndexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.SetStringValue("" + i); iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IIndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestLongPostings_Mem() { // Don't use TestUtil.getTempDir so that we own the // randomness (ie same seed will point to same dir): Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random.NextInt64())); int NUM_DOCS = AtLeast(2000); if (Verbose) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } string s1 = GetRandomTerm(null); string s2 = GetRandomTerm(s1); if (Verbose) { Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); /* * for(int idx=0;idx<s1.length();idx++) { * System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); * } * for(int idx=0;idx<s2.length();idx++) { * System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); * } */ } FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); for (int idx = 0; idx < NUM_DOCS; idx++) { if (Random.NextBoolean()) { isS1.Set(idx); } } IndexReader r; IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()); iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random.NextDouble()); iwc.SetMaxBufferedDocs(-1); RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc); for (int idx = 0; idx < NUM_DOCS; idx++) { Document doc = new Document(); string s = isS1.Get(idx) ? s1 : s2; Field f = NewTextField("field", s, Field.Store.NO); int count = TestUtil.NextInt32(Random, 1, 4); for (int ct = 0; ct < count; ct++) { doc.Add(f); } riw.AddDocument(doc); } r = riw.GetReader(); riw.Dispose(); /* * if (VERBOSE) { * System.out.println("TEST: terms"); * TermEnum termEnum = r.Terms(); * while(termEnum.Next()) { * System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().length()); * Assert.IsTrue(termEnum.DocFreq() > 0); * System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.length()); * System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.length()); * final String s = termEnum.Term().Text(); * for(int idx=0;idx<s.length();idx++) { * System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); * } * } * } */ Assert.AreEqual(NUM_DOCS, r.NumDocs); Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string term; bool doS1; if (Random.NextBoolean()) { term = s1; doS1 = true; } else { term = s2; doS1 = false; } if (Verbose) { Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1); } DocsAndPositionsEnum postings = MultiFields.GetTermPositionsEnum(r, null, "field", new BytesRef(term)); int docID = -1; while (docID < DocIdSetIterator.NO_MORE_DOCS) { int what = Random.Next(3); if (what == 0) { if (Verbose) { Console.WriteLine("TEST: docID=" + docID + "; do next()"); } // nextDoc int expected = docID + 1; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = postings.NextDoc(); if (Verbose) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { Assert.AreEqual(pos, postings.NextPosition()); if (Random.NextBoolean()) { var dummy = postings.GetPayload(); if (Random.NextBoolean()) { dummy = postings.GetPayload(); // get it again } } } } } else { // advance int targetDocID; if (docID == -1) { targetDocID = Random.Next(NUM_DOCS + 1); } else { targetDocID = docID + TestUtil.NextInt32(Random, 1, NUM_DOCS - docID); } if (Verbose) { Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); } int expected = targetDocID; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = postings.Advance(targetDocID); if (Verbose) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { Assert.AreEqual(pos, postings.NextPosition()); if (Random.NextBoolean()) { var dummy = postings.GetPayload(); if (Random.NextBoolean()) { dummy = postings.GetPayload(); // get it again } } } } } } } r.Dispose(); dir.Dispose(); }
public virtual void TestInvalidValues() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); // Test IndexDeletionPolicy Assert.AreEqual(typeof(KeepOnlyLastCommitDeletionPolicy), conf.DelPolicy.GetType()); conf.SetIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); Assert.AreEqual(typeof(SnapshotDeletionPolicy), conf.DelPolicy.GetType()); try { conf.SetIndexDeletionPolicy(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test MergeScheduler Assert.AreEqual(typeof(ConcurrentMergeScheduler), conf.MergeScheduler.GetType()); conf.SetMergeScheduler(new SerialMergeScheduler()); Assert.AreEqual(typeof(SerialMergeScheduler), conf.MergeScheduler.GetType()); try { conf.SetMergeScheduler(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test Similarity: // we shouldnt assert what the default is, just that its not null. Assert.IsTrue(IndexSearcher.DefaultSimilarity == conf.Similarity); conf.SetSimilarity(new MySimilarity()); Assert.AreEqual(typeof(MySimilarity), conf.Similarity.GetType()); try { conf.SetSimilarity(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } // Test IndexingChain Assert.IsTrue(DocumentsWriterPerThread.DefaultIndexingChain == conf.IndexingChain); conf.SetIndexingChain(new MyIndexingChain()); Assert.AreEqual(typeof(MyIndexingChain), conf.IndexingChain.GetType()); try { conf.SetIndexingChain(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } try { conf.SetMaxBufferedDeleteTerms(0); Assert.Fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetMaxBufferedDocs(1); Assert.Fail("should not have succeeded to set maxBufferedDocs to 1"); } catch (System.ArgumentException e) { // this is expected } try { // Disable both MAX_BUF_DOCS and RAM_SIZE_MB conf.SetMaxBufferedDocs(4); conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well"); } catch (System.ArgumentException e) { // this is expected } conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); try { conf.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); Assert.Fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well"); } catch (System.ArgumentException e) { // this is expected } // Test setReaderTermsIndexDivisor try { conf.SetReaderTermsIndexDivisor(0); Assert.Fail("should not have succeeded to set termsIndexDivisor to 0"); } catch (System.ArgumentException e) { // this is expected } // Setting to -1 is ok conf.SetReaderTermsIndexDivisor(-1); try { conf.SetReaderTermsIndexDivisor(-2); Assert.Fail("should not have succeeded to set termsIndexDivisor to < -1"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(2048); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); } catch (System.ArgumentException e) { // this is expected } try { conf.SetRAMPerThreadHardLimitMB(0); Assert.Fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); } catch (System.ArgumentException e) { // this is expected } // Test MergePolicy Assert.AreEqual(typeof(TieredMergePolicy), conf.MergePolicy.GetType()); conf.SetMergePolicy(new LogDocMergePolicy()); Assert.AreEqual(typeof(LogDocMergePolicy), conf.MergePolicy.GetType()); try { conf.SetMergePolicy(null); Assert.Fail(); } catch (System.ArgumentException e) { // ok } }
public virtual void DoTestLongPostingsNoPositions(IndexOptions options) { // Don't use TestUtil.getTempDir so that we own the // randomness (ie same seed will point to same dir): Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random.NextInt64())); int NUM_DOCS = AtLeast(2000); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } string s1 = GetRandomTerm(null); string s2 = GetRandomTerm(s1); if (VERBOSE) { Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); /* * for(int idx=0;idx<s1.Length();idx++) { * System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); * } * for(int idx=0;idx<s2.Length();idx++) { * System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); * } */ } FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); for (int idx = 0; idx < NUM_DOCS; idx++) { if (Random.NextBoolean()) { isS1.Set(idx); } } IndexReader r; if (true) { IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()); iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random.NextDouble()); iwc.SetMaxBufferedDocs(-1); RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = options; for (int idx = 0; idx < NUM_DOCS; idx++) { Document doc = new Document(); string s = isS1.Get(idx) ? s1 : s2; Field f = NewField("field", s, ft); int count = TestUtil.NextInt32(Random, 1, 4); for (int ct = 0; ct < count; ct++) { doc.Add(f); } riw.AddDocument(doc); } r = riw.GetReader(); riw.Dispose(); } else { #pragma warning disable 162 r = DirectoryReader.Open(dir); #pragma warning restore 162 } /* * if (VERBOSE) { * System.out.println("TEST: terms"); * TermEnum termEnum = r.Terms(); * while(termEnum.Next()) { * System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length()); * Assert.IsTrue(termEnum.DocFreq() > 0); * System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length()); * System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length()); * final String s = termEnum.Term().Text(); * for(int idx=0;idx<s.Length();idx++) { * System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); * } * } * } */ Assert.AreEqual(NUM_DOCS, r.NumDocs); Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string term; bool doS1; if (Random.NextBoolean()) { term = s1; doS1 = true; } else { term = s2; doS1 = false; } if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term); } DocsEnum docs; DocsEnum postings; if (options == IndexOptions.DOCS_ONLY) { docs = TestUtil.Docs(Random, r, "field", new BytesRef(term), null, null, DocsFlags.NONE); postings = null; } else { docs = postings = TestUtil.Docs(Random, r, "field", new BytesRef(term), null, null, DocsFlags.FREQS); Debug.Assert(postings != null); } Debug.Assert(docs != null); int docID = -1; while (docID < DocIdSetIterator.NO_MORE_DOCS) { int what = Random.Next(3); if (what == 0) { if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do next()"); } // nextDoc int expected = docID + 1; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = docs.NextDoc(); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3 && postings != null) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4); } } else { // advance int targetDocID; if (docID == -1) { targetDocID = Random.Next(NUM_DOCS + 1); } else { targetDocID = docID + TestUtil.NextInt32(Random, 1, NUM_DOCS - docID); } if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); } int expected = targetDocID; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = docs.Advance(targetDocID); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3 && postings != null) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4, "got invalid freq=" + freq); } } } } r.Dispose(); dir.Dispose(); }