public virtual void TestUpdateDifferentDocsInDifferentGens() { // update same document multiple times across generations Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); conf.SetMaxBufferedDocs(4); IndexWriter writer = new IndexWriter(dir, conf); int numDocs = AtLeast(10); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("id", "doc" + i, Store.NO)); long value = Random.Next(); doc.Add(new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.ToBytes(value))); doc.Add(new NumericDocValuesField("cf", value * 2)); writer.AddDocument(doc); } int numGens = AtLeast(5); BytesRef scratch = new BytesRef(); for (int i = 0; i < numGens; i++) { int doc = Random.Next(numDocs); Term t = new Term("id", "doc" + doc); long value = Random.NextInt64(); writer.UpdateBinaryDocValue(t, "f", TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(t, "cf", value * 2); DirectoryReader reader = DirectoryReader.Open(writer, true); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; BinaryDocValues fbdv = r.GetBinaryDocValues("f"); NumericDocValues cfndv = r.GetNumericDocValues("cf"); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cfndv.Get(j), TestBinaryDocValuesUpdates.GetValue(fbdv, j, scratch) * 2); } } reader.Dispose(); } writer.Dispose(); dir.Dispose(); }
public virtual void TestManyReopensAndFields() { Directory dir = NewDirectory(); Random random = Random; IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); LogMergePolicy lmp = NewLogMergePolicy(); lmp.MergeFactor = 3; // merge often conf.SetMergePolicy(lmp); IndexWriter writer = new IndexWriter(dir, conf); bool isNRT = random.NextBoolean(); DirectoryReader reader; if (isNRT) { reader = DirectoryReader.Open(writer, true); } else { writer.Commit(); reader = DirectoryReader.Open(dir); } int numFields = random.Next(4) + 3; // 3-7 int numNDVFields = random.Next(numFields / 2) + 1; // 1-3 long[] fieldValues = new long[numFields]; bool[] fieldHasValue = new bool[numFields]; Arrays.Fill(fieldHasValue, true); for (int i = 0; i < fieldValues.Length; i++) { fieldValues[i] = 1; } int numRounds = AtLeast(15); int docID = 0; for (int i = 0; i < numRounds; i++) { int numDocs = AtLeast(5); // System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs); for (int j = 0; j < numDocs; j++) { Document doc = new Document(); doc.Add(new StringField("id", "doc-" + docID, Store.NO)); doc.Add(new StringField("key", "all", Store.NO)); // update key // add all fields with their current value for (int f = 0; f < fieldValues.Length; f++) { if (f < numNDVFields) { doc.Add(new NumericDocValuesField("f" + f, fieldValues[f])); } else { doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f]))); } } writer.AddDocument(doc); ++docID; } // if field's value was unset before, unset it from all new added documents too for (int field = 0; field < fieldHasValue.Length; field++) { if (!fieldHasValue[field]) { if (field < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null); } } } int fieldIdx = random.Next(fieldValues.Length); string updateField = "f" + fieldIdx; if (random.NextBoolean()) { // System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'"); fieldHasValue[fieldIdx] = false; if (fieldIdx < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null); } } else { fieldHasValue[fieldIdx] = true; if (fieldIdx < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx])); } // System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]); } if (random.NextDouble() < 0.2) { int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok! writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc)); // System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc); } // verify reader if (!isNRT) { writer.Commit(); } // System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader); DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader); Assert.IsNotNull(newReader); reader.Dispose(); reader = newReader; // System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader); Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; // System.out.println(((SegmentReader) r).getSegmentName()); IBits liveDocs = r.LiveDocs; for (int field = 0; field < fieldValues.Length; field++) { string f = "f" + field; BinaryDocValues bdv = r.GetBinaryDocValues(f); NumericDocValues ndv = r.GetNumericDocValues(f); IBits docsWithField = r.GetDocsWithField(f); if (field < numNDVFields) { Assert.IsNotNull(ndv); Assert.IsNull(bdv); } else { Assert.IsNull(ndv); Assert.IsNotNull(bdv); } int maxDoc = r.MaxDoc; for (int doc = 0; doc < maxDoc; doc++) { if (liveDocs is null || liveDocs.Get(doc)) { // System.out.println("doc=" + (doc + context.docBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch)); if (fieldHasValue[field]) { Assert.IsTrue(docsWithField.Get(doc)); if (field < numNDVFields) { Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); } else { Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); } } else { Assert.IsFalse(docsWithField.Get(doc)); } } } } } // System.out.println(); } IOUtils.Dispose(writer, reader, dir); }
public virtual void TestTonsOfUpdates() { // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM Directory dir = NewDirectory(); Random random = Random; IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc IndexWriter writer = new IndexWriter(dir, conf); // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) int numDocs = AtLeast(20000); int numBinaryFields = AtLeast(5); int numTerms = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs ISet <string> updateTerms = new JCG.HashSet <string>(); while (updateTerms.Count < numTerms) { updateTerms.Add(TestUtil.RandomSimpleString(random)); } // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); // build a large index with many BDV fields and update terms for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10); for (int j = 0; j < numUpdateTerms; j++) { doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO)); } for (int j = 0; j < numBinaryFields; j++) { long val = random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val))); doc.Add(new NumericDocValuesField("cf" + j, val * 2)); } writer.AddDocument(doc); } writer.Commit(); // commit so there's something to apply to // set to flush every 2048 bytes (approximately every 12 updates), so we get // many flushes during binary updates writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024); int numUpdates = AtLeast(100); // System.out.println("numUpdates=" + numUpdates); for (int i = 0; i < numUpdates; i++) { int field = random.Next(numBinaryFields); Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms)); long value = random.Next(); writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2); } writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { for (int i = 0; i < numBinaryFields; i++) { AtomicReader r = context.AtomicReader; BinaryDocValues f = r.GetBinaryDocValues("f" + i); NumericDocValues cf = r.GetNumericDocValues("cf" + i); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j); } } } reader.Dispose(); dir.Dispose(); }
public virtual void TestStressMultiThreading() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); IndexWriter writer = new IndexWriter(dir, conf); // create index int numThreads = TestUtil.NextInt32(Random, 3, 6); int numDocs = AtLeast(2000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("id", "doc" + i, Store.NO)); double group = Random.NextDouble(); string g; if (group < 0.1) { g = "g0"; } else if (group < 0.5) { g = "g1"; } else if (group < 0.8) { g = "g2"; } else { g = "g3"; } doc.Add(new StringField("updKey", g, Store.NO)); for (int j = 0; j < numThreads; j++) { long value = Random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value))); doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2 } writer.AddDocument(doc); } CountdownEvent done = new CountdownEvent(numThreads); AtomicInt32 numUpdates = new AtomicInt32(AtLeast(100)); // same thread updates a field as well as reopens ThreadJob[] threads = new ThreadJob[numThreads]; for (int i = 0; i < threads.Length; i++) { string f = "f" + i; string cf = "cf" + i; threads[i] = new ThreadAnonymousClass(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf); } foreach (ThreadJob t in threads) { t.Start(); } done.Wait(); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; for (int i = 0; i < numThreads; i++) { BinaryDocValues bdv = r.GetBinaryDocValues("f" + i); NumericDocValues control = r.GetNumericDocValues("cf" + i); IBits docsWithBdv = r.GetDocsWithField("f" + i); IBits docsWithControl = r.GetDocsWithField("cf" + i); IBits liveDocs = r.LiveDocs; for (int j = 0; j < r.MaxDoc; j++) { if (liveDocs is null || liveDocs.Get(j)) { Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j)); if (docsWithBdv.Get(j)) { long ctrlValue = control.Get(j); long bdvValue = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2; // if (ctrlValue != bdvValue) { // System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch); // } Assert.AreEqual(ctrlValue, bdvValue); } } } } } reader.Dispose(); dir.Dispose(); }