public virtual void TestSortedTermsEnum() { Directory directory = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random); IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwconfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig); Document doc = new Document(); doc.Add(new StringField("field", "hello", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "world", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "beer", Field.Store.NO)); iwriter.AddDocument(doc); iwriter.ForceMerge(1); DirectoryReader ireader = iwriter.GetReader(); iwriter.Dispose(); AtomicReader ar = GetOnlySegmentReader(ireader); SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field"); Assert.AreEqual(3, dv.ValueCount); TermsEnum termsEnum = dv.GetTermsEnum(); // next() Assert.AreEqual("beer", termsEnum.Next().Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.AreEqual("hello", termsEnum.Next().Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.AreEqual("world", termsEnum.Next().Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); // seekCeil() Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!"))); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz"))); // seekExact() Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello"))); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world"))); Assert.AreEqual("world", termsEnum.Term.Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus"))); // seek(ord) termsEnum.SeekExact(0); Assert.AreEqual("beer", termsEnum.Term.Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord); termsEnum.SeekExact(1); Assert.AreEqual("hello", termsEnum.Term.Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord); termsEnum.SeekExact(2); Assert.AreEqual("world", termsEnum.Term.Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord); ireader.Dispose(); directory.Dispose(); }
public virtual void TestNumericField() { using Directory dir = NewDirectory(); DirectoryReader r = null; try { var numDocs = AtLeast(500); var answers = new Number[numDocs]; using (var w = new RandomIndexWriter(Random, dir)) { NumericType[] typeAnswers = new NumericType[numDocs]; for (int id = 0; id < numDocs; id++) { Document doc = new Document(); Field nf; Field sf; Number answer; NumericType typeAnswer; if (Random.NextBoolean()) { // float/double if (Random.NextBoolean()) { float f = Random.NextSingle(); answer = Single.GetInstance(f); nf = new SingleField("nf", f, Field.Store.NO); sf = new StoredField("nf", f); typeAnswer = NumericType.SINGLE; } else { double d = Random.NextDouble(); answer = Double.GetInstance(d); nf = new DoubleField("nf", d, Field.Store.NO); sf = new StoredField("nf", d); typeAnswer = NumericType.DOUBLE; } } else { // int/long if (Random.NextBoolean()) { int i = Random.Next(); answer = Int32.GetInstance(i); nf = new Int32Field("nf", i, Field.Store.NO); sf = new StoredField("nf", i); typeAnswer = NumericType.INT32; } else { long l = Random.NextInt64(); answer = Int64.GetInstance(l); nf = new Int64Field("nf", l, Field.Store.NO); sf = new StoredField("nf", l); typeAnswer = NumericType.INT64; } } doc.Add(nf); doc.Add(sf); answers[id] = answer; typeAnswers[id] = typeAnswer; FieldType ft = new FieldType(Int32Field.TYPE_STORED); ft.NumericPrecisionStep = int.MaxValue; doc.Add(new Int32Field("id", id, ft)); w.AddDocument(doc); } r = w.GetReader(); } // w.Dispose(); Assert.AreEqual(numDocs, r.NumDocs); foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader sub = ctx.AtomicReader; FieldCache.Int32s ids = FieldCache.DEFAULT.GetInt32s(sub, "id", false); for (int docID = 0; docID < sub.NumDocs; docID++) { Document doc = sub.Document(docID); Field f = doc.GetField <Field>("nf"); Assert.IsTrue(f is StoredField, "got f=" + f); #pragma warning disable 612, 618 Assert.AreEqual(answers[ids.Get(docID)], f.GetNumericValue()); #pragma warning restore 612, 618 } } } finally { r?.Dispose(); } }
public virtual void TestOpenPriorSnapshot() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(new KeepAllDeletionPolicy(this, dir)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(10))); KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy)writer.Config.DelPolicy; for (int i = 0; i < 10; i++) { AddDoc(writer); if ((1 + i) % 2 == 0) { writer.Commit(); } } writer.Dispose(); ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); Assert.AreEqual(5, commits.Count); IndexCommit lastCommit = null; foreach (IndexCommit commit in commits) { if (lastCommit == null || commit.Generation > lastCommit.Generation) { lastCommit = commit; } } Assert.IsTrue(lastCommit != null); // Now add 1 doc and merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy)); AddDoc(writer); Assert.AreEqual(11, writer.NumDocs()); writer.ForceMerge(1); writer.Dispose(); Assert.AreEqual(6, DirectoryReader.ListCommits(dir).Count); // Now open writer on the commit just before merge: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs()); // Should undo our rollback: writer.Rollback(); DirectoryReader r = DirectoryReader.Open(dir); // Still merged, still 11 docs Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(11, r.NumDocs); r.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs()); // Commits the rollback: writer.Dispose(); // Now 8 because we made another commit Assert.AreEqual(7, DirectoryReader.ListCommits(dir).Count); r = DirectoryReader.Open(dir); // Not fully merged because we rolled it back, and now only // 10 docs Assert.IsTrue(r.Leaves.Count > 1); Assert.AreEqual(10, r.NumDocs); r.Dispose(); // Re-merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(policy)); writer.ForceMerge(1); writer.Dispose(); r = DirectoryReader.Open(dir); Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(10, r.NumDocs); r.Dispose(); // Now open writer on the commit just before merging, // but this time keeping only the last commit: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs()); // Reader still sees fully merged index, because writer // opened on the prior commit has not yet committed: r = DirectoryReader.Open(dir); Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(10, r.NumDocs); r.Dispose(); writer.Dispose(); // Now reader sees not-fully-merged index: r = DirectoryReader.Open(dir); Assert.IsTrue(r.Leaves.Count > 1); Assert.AreEqual(10, r.NumDocs); r.Dispose(); dir.Dispose(); }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.Add(field); const int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) // trick blockPF's little optimization { tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.AddDocument(doc); } w.ForceMerge(1); w.Dispose(); DirectoryReader oneThousand = DirectoryReader.Open(dir); IndexReader[] subReaders = new IndexReader[1000]; Arrays.Fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2")); if (dir2 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.AddIndexes(mr); w2.ForceMerge(1); w2.Dispose(); oneThousand.Dispose(); DirectoryReader oneMillion = DirectoryReader.Open(dir2); subReaders = new IndexReader[2000]; Arrays.Fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3")); if (dir3 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.AddIndexes(mr); w3.ForceMerge(1); w3.Dispose(); oneMillion.Dispose(); dir.Dispose(); dir2.Dispose(); dir3.Dispose(); }
public virtual void TestWriteReadMerge() { // get another codec, other than the default: so we are merging segments across different codecs Codec otherCodec; /*if ("SimpleText".Equals(Codec.Default.Name)) * {*/ otherCodec = new Lucene46Codec(); /*} * else * { * otherCodec = new SimpleTextCodec(); * }*/ Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone()); int docCount = AtLeast(200); var data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { int fieldCount = Rarely() ? RandomInts.NextIntBetween(Random(), 1, 500) : RandomInts.NextIntBetween(Random(), 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { int length = Rarely() ? Random().Next(1000) : Random().Next(10); int max = Rarely() ? 256 : 2; data[i][j] = RandomByteArray(length, max); } } FieldType type = new FieldType(StringField.TYPE_STORED); type.Indexed = false; type.Freeze(); IntField id = new IntField("id", 0, Field.Store.YES); for (int i = 0; i < data.Length; ++i) { Document doc = new Document(); doc.Add(id); id.IntValue = i; for (int j = 0; j < data[i].Length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.Add(f); } iw.w.AddDocument(doc); if (Random().NextBoolean() && (i % (data.Length / 10) == 0)) { iw.w.Dispose(); // test merging against a non-compressing codec if (iwConf.Codec == otherCodec) { iwConf.SetCodec(Codec.Default); } else { iwConf.SetCodec(otherCodec); } iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone()); } } for (int i = 0; i < 10; ++i) { int min = Random().Next(data.Length); int max = min + Random().Next(20); iw.DeleteDocuments(NumericRangeQuery.NewIntRange("id", min, max, true, false)); } iw.ForceMerge(2); // force merges with deletions iw.Commit(); DirectoryReader ir = DirectoryReader.Open(dir); Assert.IsTrue(ir.NumDocs > 0); int numDocs = 0; for (int i = 0; i < ir.MaxDoc; ++i) { Document doc = ir.Document(i); if (doc == null) { continue; } ++numDocs; int docId = (int)doc.GetField("id").NumericValue; Assert.AreEqual(data[docId].Length + 1, doc.Fields.Count); for (int j = 0; j < data[docId].Length; ++j) { var arr = data[docId][j]; BytesRef arr2Ref = doc.GetBinaryValue("bytes" + j); var arr2 = Arrays.CopyOfRange(arr2Ref.Bytes, arr2Ref.Offset, arr2Ref.Offset + arr2Ref.Length); Assert.AreEqual(arr, arr2); } } Assert.IsTrue(ir.NumDocs <= numDocs); ir.Dispose(); iw.DeleteAll(); iw.Commit(); iw.ForceMerge(1); iw.Dispose(); dir.Dispose(); }
public virtual void TestStressAdvance_Mem() { for (int iter = 0; iter < 3; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); HashSet <int> aDocs = new HashSet <int>(); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field idField = NewStringField("id", "", Field.Store.YES); doc.Add(idField); int num = AtLeast(4097); if (VERBOSE) { Console.WriteLine("\nTEST: numDocs=" + num); } for (int id = 0; id < num; id++) { if (Random().Next(4) == 3) { f.SetStringValue("a"); aDocs.Add(id); } else { f.SetStringValue("b"); } idField.SetStringValue("" + id); w.AddDocument(doc); if (VERBOSE) { Console.WriteLine("\nTEST: doc upto " + id); } } w.ForceMerge(1); IList <int> aDocIDs = new List <int>(); IList <int> bDocIDs = new List <int>(); DirectoryReader r = w.Reader; int[] idToDocID = new int[r.MaxDoc]; for (int docID = 0; docID < idToDocID.Length; docID++) { int id = Convert.ToInt32(r.Document(docID).Get("id")); if (aDocs.Contains(id)) { aDocIDs.Add(docID); } else { bDocIDs.Add(docID); } } TermsEnum te = GetOnlySegmentReader(r).Fields.GetTerms("field").GetIterator(null); DocsEnum de = null; for (int iter2 = 0; iter2 < 10; iter2++) { if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " iter2=" + iter2); } Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("a"))); de = TestUtil.Docs(Random(), te, null, de, DocsFlags.NONE); TestOne(de, aDocIDs); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(new BytesRef("b"))); de = TestUtil.Docs(Random(), te, null, de, DocsFlags.NONE); TestOne(de, bDocIDs); } w.Dispose(); r.Dispose(); dir.Dispose(); } }
public virtual void TestCommitOnCloseForceMerge() { Directory dir = NewDirectory(); // Must disable throwing exc on double-write: this // test uses IW.rollback which easily results in // writing to same file more than once if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).PreventDoubleWrite = false; } IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(10).SetMergePolicy(NewLogMergePolicy(10))); for (int j = 0; j < 17; j++) { AddDocWithIndex(writer, j); } writer.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND)); writer.ForceMerge(1); // Open a reader before closing (commiting) the writer: DirectoryReader reader = DirectoryReader.Open(dir); // Reader should see index as multi-seg at this // point: Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment"); reader.Dispose(); // Abort the writer: writer.Rollback(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge"); // Open a reader after aborting writer: reader = DirectoryReader.Open(dir); // Reader should still see index as multi-segment Assert.IsTrue(reader.Leaves.Count > 1, "Reader incorrectly sees one segment"); reader.Dispose(); if (Verbose) { Console.WriteLine("TEST: do real full merge"); } writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND)); writer.ForceMerge(1); writer.Dispose(); if (Verbose) { Console.WriteLine("TEST: writer closed"); } TestIndexWriter.AssertNoUnreferencedFiles(dir, "aborted writer after forceMerge"); // Open a reader after aborting writer: reader = DirectoryReader.Open(dir); // Reader should see index as one segment Assert.AreEqual(1, reader.Leaves.Count, "Reader incorrectly sees more than one segment"); reader.Dispose(); dir.Dispose(); }
public virtual void TestTonsOfUpdates() { // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM Directory dir = NewDirectory(); Random random = Random(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc IndexWriter writer = new IndexWriter(dir, conf); // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) int numDocs = AtLeast(20000); int numBinaryFields = AtLeast(5); int numTerms = TestUtil.NextInt(random, 10, 100); // terms should affect many docs HashSet <string> updateTerms = new HashSet <string>(); while (updateTerms.Count < numTerms) { updateTerms.Add(TestUtil.RandomSimpleString(random)); } // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); // build a large index with many BDV fields and update terms for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numUpdateTerms = TestUtil.NextInt(random, 1, numTerms / 10); for (int j = 0; j < numUpdateTerms; j++) { doc.Add(new StringField("upd", RandomInts.RandomFrom(random, updateTerms), Store.NO)); } for (int j = 0; j < numBinaryFields; j++) { long val = random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val))); doc.Add(new NumericDocValuesField("cf" + j, val * 2)); } writer.AddDocument(doc); } writer.Commit(); // commit so there's something to apply to // set to flush every 2048 bytes (approximately every 12 updates), so we get // many flushes during binary updates writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024); int numUpdates = AtLeast(100); // System.out.println("numUpdates=" + numUpdates); for (int i = 0; i < numUpdates; i++) { int field = random.Next(numBinaryFields); Term updateTerm = new Term("upd", RandomInts.RandomFrom(random, updateTerms)); long value = random.Next(); writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2); } writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { for (int i = 0; i < numBinaryFields; i++) { AtomicReader r = context.AtomicReader; BinaryDocValues f = r.GetBinaryDocValues("f" + i); NumericDocValues cf = r.GetNumericDocValues("cf" + i); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j); } } } reader.Dispose(); dir.Dispose(); }
public virtual void TestFixedBinary([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); Document doc = new Document(); var bytes = new byte[4]; BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 24); bytes[1] = (byte)(i >> 16); bytes[2] = (byte)(i >> 8); bytes[3] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 24); bytes[1] = (byte)(expectedValue >> 16); bytes[2] = (byte)(expectedValue >> 8); bytes[3] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("4GBStoredFields"))); dir.Throttling = MockDirectoryWrapper.Throttling_e.NEVER; var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(); ft.IsIndexed = false; ft.IsStored = true; ft.Freeze(); int valueLength = RandomInts.NextIntBetween(Random(), 1 << 13, 1 << 20); var value = new byte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (byte)Random().Next(256); } Field f = new Field("fld", value, ft); doc.Add(f); int numDocs = (int)((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.AddDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { Console.WriteLine(i + " of " + numDocs + "..."); } } w.ForceMerge(1); w.Dispose(); if (VERBOSE) { bool found = false; foreach (string file in dir.ListAll()) { if (file.EndsWith(".fdt", StringComparison.Ordinal)) { long fileLength = dir.FileLength(file); if (fileLength >= 1L << 32) { found = true; } Console.WriteLine("File length of " + file + " : " + fileLength); } } if (!found) { Console.WriteLine("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.Open(dir); Document sd = rd.Document(numDocs - 1); Assert.IsNotNull(sd); Assert.AreEqual(1, sd.Fields.Count); BytesRef valueRef = sd.GetBinaryValue("fld"); Assert.IsNotNull(valueRef); Assert.AreEqual(new BytesRef(value), valueRef); rd.Dispose(); dir.Dispose(); }
public virtual void TestVariableBinary([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BVariableBinary")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); Document doc = new Document(); var bytes = new byte[4]; ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes); BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { encoder.Reset(bytes); encoder.WriteVInt32(i % 65535); // 1, 2, or 3 bytes data.Length = encoder.Position; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; ByteArrayDataInput input = new ByteArrayDataInput(); foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(bytes); BinaryDocValues dv = reader.GetBinaryDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { dv.Get(i, scratch); input.Reset(scratch.Bytes, scratch.Offset, scratch.Length); Assert.AreEqual(expectedValue % 65535, input.ReadVInt32()); Assert.IsTrue(input.Eof); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public virtual void Test() { MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors")); dir.PreventDoubleWrite = false; double rate = Random.NextDouble() * 0.01; //System.out.println("rate=" + rate); dir.RandomIOExceptionRateOnOpen = rate; int iters = AtLeast(20); LineFileDocs docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues); IndexReader r = null; DirectoryReader r2 = null; bool any = false; MockDirectoryWrapper dirCopy = null; int lastNumDocs = 0; for (int iter = 0; iter < iters; iter++) { IndexWriter w = null; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } try { MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); if (VERBOSE) { // Do this ourselves instead of relying on LTC so // we see incrementing messageID: iwc.SetInfoStream(new TextWriterInfoStream(Console.Out)); } var ms = iwc.MergeScheduler; if (ms is IConcurrentMergeScheduler) { ((IConcurrentMergeScheduler)ms).SetSuppressExceptions(); } w = new IndexWriter(dir, iwc); if (r != null && Random.Next(5) == 3) { if (Random.NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: addIndexes IR[]"); } w.AddIndexes(new IndexReader[] { r }); } else { if (VERBOSE) { Console.WriteLine("TEST: addIndexes Directory[]"); } w.AddIndexes(new Directory[] { dirCopy }); } } else { if (VERBOSE) { Console.WriteLine("TEST: addDocument"); } w.AddDocument(docs.NextDoc()); } dir.RandomIOExceptionRateOnOpen = 0.0; w.Dispose(); w = null; // NOTE: this is O(N^2)! Only enable for temporary debugging: //dir.setRandomIOExceptionRateOnOpen(0.0); //TestUtil.CheckIndex(dir); //dir.setRandomIOExceptionRateOnOpen(rate); // Verify numDocs only increases, to catch IndexWriter // accidentally deleting the index: dir.RandomIOExceptionRateOnOpen = 0.0; Assert.IsTrue(DirectoryReader.IndexExists(dir)); if (r2 == null) { r2 = DirectoryReader.Open(dir); } else { DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2); if (r3 != null) { r2.Dispose(); r2 = r3; } } Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs); lastNumDocs = r2.NumDocs; //System.out.println("numDocs=" + lastNumDocs); dir.RandomIOExceptionRateOnOpen = rate; any = true; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": success"); } } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": exception"); Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } if (w != null) { // NOTE: leave random IO exceptions enabled here, // to verify that rollback does not try to write // anything: w.Rollback(); } } if (any && r == null && Random.NextBoolean()) { // Make a copy of a non-empty index so we can use // it to addIndexes later: dir.RandomIOExceptionRateOnOpen = 0.0; r = DirectoryReader.Open(dir); dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy")); ISet <string> files = new JCG.HashSet <string>(); foreach (string file in dir.ListAll()) { dir.Copy(dirCopy, file, file, IOContext.DEFAULT); files.Add(file); } dirCopy.Sync(files); // Have IW kiss the dir so we remove any leftover // files ... we can easily have leftover files at // the time we take a copy because we are holding // open a reader: (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)))).Dispose(); dirCopy.RandomIOExceptionRate = rate; dir.RandomIOExceptionRateOnOpen = rate; } } if (r2 != null) { r2.Dispose(); } if (r != null) { r.Dispose(); dirCopy.Dispose(); } dir.Dispose(); }
public void Test() { DirectoryInfo dir = CreateTempDir(GetType().Name); DirectoryInfo destDir = CreateTempDir(GetType().Name); Store.Directory fsDir = NewFSDirectory(dir); // IndexSplitter.split makes its own commit directly with SIPC/SegmentInfos, // so the unreferenced files are expected. if (fsDir is MockDirectoryWrapper) { ((MockDirectoryWrapper)fsDir).AssertNoUnreferencedFilesOnClose = (false); } MergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.NoCFSRatio = 1.0; mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity; IndexWriter iw = new IndexWriter( fsDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)). SetOpenMode(OpenMode.CREATE). SetMergePolicy(mergePolicy) ); for (int x = 0; x < 100; x++) { Document doc = DocHelper.CreateDocument(x, "index", 5); iw.AddDocument(doc); } iw.Commit(); for (int x = 100; x < 150; x++) { Document doc = DocHelper.CreateDocument(x, "index2", 5); iw.AddDocument(doc); } iw.Commit(); for (int x = 150; x < 200; x++) { Document doc = DocHelper.CreateDocument(x, "index3", 5); iw.AddDocument(doc); } iw.Commit(); DirectoryReader iwReader = iw.GetReader(); assertEquals(3, iwReader.Leaves.Count); iwReader.Dispose(); iw.Dispose(); // we should have 2 segments now IndexSplitter @is = new IndexSplitter(dir); string splitSegName = @is.Infos.Info(1).Info.Name; @is.Split(destDir, new string[] { splitSegName }); Store.Directory fsDirDest = NewFSDirectory(destDir); DirectoryReader r = DirectoryReader.Open(fsDirDest); assertEquals(50, r.MaxDoc); r.Dispose(); fsDirDest.Dispose(); // now test cmdline DirectoryInfo destDir2 = CreateTempDir(GetType().Name); IndexSplitter.Main(new String[] { dir.FullName, destDir2.FullName, splitSegName }); assertEquals(5, destDir2.GetFiles().Length); Store.Directory fsDirDest2 = NewFSDirectory(destDir2); r = DirectoryReader.Open(fsDirDest2); assertEquals(50, r.MaxDoc); r.Dispose(); fsDirDest2.Dispose(); // now remove the copied segment from src IndexSplitter.Main(new String[] { dir.FullName, "-d", splitSegName }); r = DirectoryReader.Open(fsDir); assertEquals(2, r.Leaves.size()); r.Dispose(); fsDir.Dispose(); }
public virtual void Test2() { Random random = Random; int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif random, dir); bool allowDups = random.NextBoolean(); ISet <string> seen = new JCG.HashSet <string>(); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random); } else { s = TestUtil.RandomUnicodeString(random); } BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (VERBOSE) { Console.WriteLine(" " + numDocs + ": s=" + s); } Document doc = new Document(); doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); docValues.Add(br); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.GetReader().Dispose(); } } writer.ForceMerge(1); DirectoryReader r = writer.GetReader(); writer.Dispose(); AtomicReader sr = GetOnlySegmentReader(r); long END_TIME = Environment.TickCount + (TEST_NIGHTLY ? 30 : 1); int NUM_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 1, 10); ThreadJob[] threads = new ThreadJob[NUM_THREADS]; for (int thread = 0; thread < NUM_THREADS; thread++) { threads[thread] = new ThreadAnonymousInnerClassHelper2(random, docValues, sr, END_TIME); threads[thread].Start(); } foreach (ThreadJob thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
public virtual void TestPrepareCommitRollback() { Directory dir = NewDirectory(); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).PreventDoubleWrite = false; } IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(5))); writer.Commit(); for (int i = 0; i < 23; i++) { AddDoc(writer); } DirectoryReader reader = DirectoryReader.Open(dir); Assert.AreEqual(0, reader.NumDocs); writer.PrepareCommit(); IndexReader reader2 = DirectoryReader.Open(dir); Assert.AreEqual(0, reader2.NumDocs); writer.Rollback(); IndexReader reader3 = DirectoryReader.OpenIfChanged(reader); Assert.IsNull(reader3); Assert.AreEqual(0, reader.NumDocs); Assert.AreEqual(0, reader2.NumDocs); reader.Dispose(); reader2.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); for (int i = 0; i < 17; i++) { AddDoc(writer); } reader = DirectoryReader.Open(dir); Assert.AreEqual(0, reader.NumDocs); reader.Dispose(); writer.PrepareCommit(); reader = DirectoryReader.Open(dir); Assert.AreEqual(0, reader.NumDocs); reader.Dispose(); writer.Commit(); reader = DirectoryReader.Open(dir); Assert.AreEqual(17, reader.NumDocs); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public override void Run() { DirectoryReader reader = null; bool success = false; try { Random random = Random(); while (NumUpdates.GetAndDecrement() > 0) { double group = random.NextDouble(); Term t; if (group < 0.1) { t = new Term("updKey", "g0"); } else if (group < 0.5) { t = new Term("updKey", "g1"); } else if (group < 0.8) { t = new Term("updKey", "g2"); } else { t = new Term("updKey", "g3"); } // System.out.println("[" + Thread.currentThread().getName() + "] numUpdates=" + numUpdates + " updateTerm=" + t); if (random.NextBoolean()) // sometimes unset a value { // System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=UNSET"); Writer.UpdateBinaryDocValue(t, f, null); Writer.UpdateNumericDocValue(t, Cf, null); } else { long updValue = random.Next(); // System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=" + updValue); Writer.UpdateBinaryDocValue(t, f, TestBinaryDocValuesUpdates.ToBytes(updValue)); Writer.UpdateNumericDocValue(t, Cf, updValue * 2); } if (random.NextDouble() < 0.2) { // delete a random document int doc = random.Next(NumDocs); // System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc); Writer.DeleteDocuments(new Term("id", "doc" + doc)); } if (random.NextDouble() < 0.05) // commit every 20 updates on average { // System.out.println("[" + Thread.currentThread().getName() + "] commit"); Writer.Commit(); } if (random.NextDouble() < 0.1) // reopen NRT reader (apply updates), on average once every 10 updates { if (reader == null) { // System.out.println("[" + Thread.currentThread().getName() + "] open NRT"); reader = DirectoryReader.Open(Writer, true); } else { // System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT"); DirectoryReader r2 = DirectoryReader.OpenIfChanged(reader, Writer, true); if (r2 != null) { reader.Dispose(); reader = r2; } } } } // System.out.println("[" + Thread.currentThread().getName() + "] DONE"); success = true; } catch (IOException e) { throw new Exception(e.Message, e); } finally { if (reader != null) { try { reader.Dispose(); } catch (IOException e) { if (success) // suppress this exception only if there was another exception { throw new Exception(e.Message, e); } } } Done.Signal(); } }
private void DoTestReopenWithCommit(Random random, Directory dir, bool withReopen) { IndexWriter iwriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode.CREATE).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy())); iwriter.Commit(); DirectoryReader reader = DirectoryReader.Open(dir); try { int M = 3; FieldType customType = new FieldType(TextField.TYPE_STORED); customType.IsTokenized = false; FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.IsTokenized = false; customType2.OmitNorms = true; FieldType customType3 = new FieldType(); customType3.IsStored = true; for (int i = 0; i < 4; i++) { for (int j = 0; j < M; j++) { Document doc = new Document(); doc.Add(NewField("id", i + "_" + j, customType)); doc.Add(NewField("id2", i + "_" + j, customType2)); doc.Add(NewField("id3", i + "_" + j, customType3)); iwriter.AddDocument(doc); if (i > 0) { int k = i - 1; int n = j + k * M; Document prevItereationDoc = reader.Document(n); Assert.IsNotNull(prevItereationDoc); string id = prevItereationDoc.Get("id"); Assert.AreEqual(k + "_" + j, id); } } iwriter.Commit(); if (withReopen) { // reopen DirectoryReader r2 = DirectoryReader.OpenIfChanged(reader); if (r2 != null) { reader.Dispose(); reader = r2; } } else { // recreate reader.Dispose(); reader = DirectoryReader.Open(dir); } } } finally { iwriter.Dispose(); reader.Dispose(); } }
public void TestNumericField() { Directory dir = NewDirectory(); var w = new RandomIndexWriter(Random(), dir); var numDocs = AtLeast(500); var answers = new object[numDocs]; FieldType.NumericType[] typeAnswers = new FieldType.NumericType[numDocs]; for (int id = 0; id < numDocs; id++) { Document doc = new Document(); Field nf; Field sf; object answer; FieldType.NumericType typeAnswer; if (Random().NextBoolean()) { // float/double if (Random().NextBoolean()) { float f = Random().NextFloat(); answer = Convert.ToSingle(f); nf = new FloatField("nf", f, Field.Store.NO); sf = new StoredField("nf", f); typeAnswer = FieldType.NumericType.FLOAT; } else { double d = Random().NextDouble(); answer = Convert.ToDouble(d); nf = new DoubleField("nf", d, Field.Store.NO); sf = new StoredField("nf", d); typeAnswer = FieldType.NumericType.DOUBLE; } } else { // int/long if (Random().NextBoolean()) { int i = Random().Next(); answer = Convert.ToInt32(i); nf = new IntField("nf", i, Field.Store.NO); sf = new StoredField("nf", i); typeAnswer = FieldType.NumericType.INT; } else { long l = Random().NextLong(); answer = Convert.ToInt64(l); nf = new LongField("nf", l, Field.Store.NO); sf = new StoredField("nf", l); typeAnswer = FieldType.NumericType.LONG; } } doc.Add(nf); doc.Add(sf); answers[id] = answer; typeAnswers[id] = typeAnswer; FieldType ft = new FieldType(IntField.TYPE_STORED); ft.NumericPrecisionStep = int.MaxValue; doc.Add(new IntField("id", id, ft)); w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); Assert.AreEqual(numDocs, r.NumDocs); foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader sub = (AtomicReader)ctx.Reader; FieldCache.Ints ids = FieldCache.DEFAULT.GetInts(sub, "id", false); for (int docID = 0; docID < sub.NumDocs; docID++) { Document doc = sub.Document(docID); Field f = (Field)doc.GetField("nf"); Assert.IsTrue(f is StoredField, "got f=" + f); Assert.AreEqual(answers[ids.Get(docID)], f.NumericValue); } } r.Dispose(); dir.Dispose(); }
public virtual void TestThreadSafety() { Directory dir = NewDirectory(); // NOTE: this also controls the number of threads! int n = TestUtil.NextInt(Random(), 20, 40); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); for (int i = 0; i < n; i++) { writer.AddDocument(CreateDocument(i, 3)); } writer.ForceMerge(1); writer.Dispose(); TestReopen test = new TestReopenAnonymousInnerClassHelper3(this, dir, n); IList <ReaderCouple> readers = new SynchronizedList <ReaderCouple>(); DirectoryReader firstReader = DirectoryReader.Open(dir); DirectoryReader reader = firstReader; ReaderThread[] threads = new ReaderThread[n]; ISet <DirectoryReader> readersToClose = new ConcurrentHashSet <DirectoryReader>(new HashSet <DirectoryReader>()); for (int i = 0; i < n; i++) { if (i % 2 == 0) { DirectoryReader refreshed = DirectoryReader.OpenIfChanged(reader); if (refreshed != null) { readersToClose.Add(reader); reader = refreshed; } } DirectoryReader r = reader; int index = i; ReaderThreadTask task; if (i < 4 || (i >= 10 && i < 14) || i > 18) { task = new ReaderThreadTaskAnonymousInnerClassHelper(this, test, readers, readersToClose, r, index); } else { task = new ReaderThreadTaskAnonymousInnerClassHelper2(this, readers); } threads[i] = new ReaderThread(task); threads[i].Start(); } lock (this) { Monitor.Wait(this, TimeSpan.FromMilliseconds(1000)); } for (int i = 0; i < n; i++) { if (threads[i] != null) { threads[i].StopThread(); } } for (int i = 0; i < n; i++) { if (threads[i] != null) { threads[i].Join(); if (threads[i].Error != null) { string msg = "Error occurred in thread " + threads[i].Name + ":\n" + threads[i].Error.Message; Assert.Fail(msg); } } } foreach (DirectoryReader readerToClose in readersToClose) { readerToClose.Dispose(); } firstReader.Dispose(); reader.Dispose(); foreach (DirectoryReader readerToClose in readersToClose) { AssertReaderClosed(readerToClose, true); } AssertReaderClosed(reader, true); AssertReaderClosed(firstReader, true); dir.Dispose(); }
public void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.Indexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.StringValue = "" + i; iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].BinaryValue(), fieldValues[0].BinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestReopenOnCommit() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(new KeepAllCommits()).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(10))); for (int i = 0; i < 4; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + i, Field.Store.NO)); writer.AddDocument(doc); IDictionary <string, string> data = new Dictionary <string, string>(); data["index"] = i + ""; writer.SetCommitData(data); writer.Commit(); } for (int i = 0; i < 4; i++) { writer.DeleteDocuments(new Term("id", "" + i)); IDictionary <string, string> data = new Dictionary <string, string>(); data["index"] = (4 + i) + ""; writer.SetCommitData(data); writer.Commit(); } writer.Dispose(); DirectoryReader r = DirectoryReader.Open(dir); Assert.AreEqual(0, r.NumDocs); ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); foreach (IndexCommit commit in commits) { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r, commit); Assert.IsNotNull(r2); Assert.IsTrue(r2 != r); IDictionary <string, string> s = commit.UserData; int v; if (s.Count == 0) { // First commit created by IW v = -1; } else { v = Convert.ToInt32(s["index"]); } if (v < 4) { Assert.AreEqual(1 + v, r2.NumDocs); } else { Assert.AreEqual(7 - v, r2.NumDocs); } r.Dispose(); r = r2; } r.Dispose(); dir.Dispose(); }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { Console.WriteLine("TEST: cycle pass="******"TEST: open writer for forceMerge"); } writer = new IndexWriter(dir, conf); policy = (KeepAllDeletionPolicy)writer.Config.DelPolicy; writer.ForceMerge(1); writer.Dispose(); } Assert.AreEqual(needsMerging ? 2 : 1, policy.NumOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(1 + (needsMerging ? 1 : 0), policy.NumOnCommit); // Test listCommits ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); // 2 from closing writer Assert.AreEqual(1 + (needsMerging ? 1 : 0), commits.Count); // Make sure we can open a reader on each commit: foreach (IndexCommit commit in commits) { IndexReader r = DirectoryReader.Open(commit); r.Dispose(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetLastCommitGeneration(dir); while (gen > 0) { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetIndexDeletionPolicy(policy)); writer.Dispose(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Dispose(); } }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field")); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (VERBOSE) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList = CollectionsHelper.Shuffle(postingsList); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir("bagofpostings")); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int threadCount = TestUtil.NextInt(Random(), 1, 5); if (VERBOSE) { Console.WriteLine("config: " + iw.w.Config); Console.WriteLine("threadCount=" + threadCount); } ThreadClass[] threads = new ThreadClass[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.Reader; Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.Terms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount); if (iwc.Codec is Lucene3xCodec == false) { Assert.AreEqual(numTerms - 1, terms.Size()); } TermsEnum termsEnum = terms.Iterator(null); BytesRef term_; while ((term_ = termsEnum.Next()) != null) { int value = Convert.ToInt32(term_.Utf8ToString()); Assert.AreEqual(value, termsEnum.DocFreq()); // don't really need to check more than this, as CheckIndex // will verify that docFreq == actual number of documents seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { // token -> docID -> tokens IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >(); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); int numDocs = AtLeast(20); //final int numDocs = AtLeast(5); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: randomize what IndexOptions we use; also test // changing this up in one IW buffered segment...: ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random.NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random.NextBoolean(); ft.StoreTermVectorPositions = Random.NextBoolean(); } for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = new Document(); doc.Add(new Int32Field("id", docCount, Field.Store.NO)); IList <Token> tokens = new List <Token>(); int numTokens = AtLeast(100); //final int numTokens = AtLeast(20); int pos = -1; int offset = 0; //System.out.println("doc id=" + docCount); for (int tokenCount = 0; tokenCount < numTokens; tokenCount++) { string text; if (Random.NextBoolean()) { text = "a"; } else if (Random.NextBoolean()) { text = "b"; } else if (Random.NextBoolean()) { text = "c"; } else { text = "d"; } int posIncr = Random.NextBoolean() ? 1 : Random.Next(5); if (tokenCount == 0 && posIncr == 0) { posIncr = 1; } int offIncr = Random.NextBoolean() ? 0 : Random.Next(5); int tokenOffset = Random.Next(5); Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset); if (!actualTokens.TryGetValue(text, out IDictionary <int?, IList <Token> > postingsByDoc)) { actualTokens[text] = postingsByDoc = new Dictionary <int?, IList <Token> >(); } if (!postingsByDoc.TryGetValue(docCount, out IList <Token> postings)) { postingsByDoc[docCount] = postings = new List <Token>(); } postings.Add(token); tokens.Add(token); pos += posIncr; // stuff abs position into type: token.Type = "" + pos; offset += offIncr + tokenOffset; //System.out.println(" " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset + "/" + token.EndOffset + " (freq=" + postingsByDoc.Get(docCount).Size() + ")"); } doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft)); w.AddDocument(doc); } DirectoryReader r = w.GetReader(); w.Dispose(); string[] terms = new string[] { "a", "b", "c", "d" }; foreach (AtomicReaderContext ctx in r.Leaves) { // TODO: improve this AtomicReader sub = (AtomicReader)ctx.Reader; //System.out.println("\nsub=" + sub); TermsEnum termsEnum = sub.Fields.GetTerms("content").GetIterator(null); DocsEnum docs = null; DocsAndPositionsEnum docsAndPositions = null; DocsAndPositionsEnum docsAndPositionsAndOffsets = null; FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(sub, "id", false); foreach (string term in terms) { //System.out.println(" term=" + term); if (termsEnum.SeekExact(new BytesRef(term))) { docs = termsEnum.Docs(null, docs); Assert.IsNotNull(docs); int doc; //System.out.println(" doc/freq"); while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docs.Freq); } // explicitly exclude offsets here docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsFlags.PAYLOADS); Assert.IsNotNull(docsAndPositions); //System.out.println(" doc/freq/pos"); while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docsAndPositions.Freq); foreach (Token token in expected) { int pos = Convert.ToInt32(token.Type); //System.out.println(" pos=" + pos); Assert.AreEqual(pos, docsAndPositions.NextPosition()); } } docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions); Assert.IsNotNull(docsAndPositionsAndOffsets); //System.out.println(" doc/freq/pos/offs"); while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq); foreach (Token token in expected) { int pos = Convert.ToInt32(token.Type); //System.out.println(" pos=" + pos); Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition()); Assert.AreEqual(token.StartOffset, docsAndPositionsAndOffsets.StartOffset); Assert.AreEqual(token.EndOffset, docsAndPositionsAndOffsets.EndOffset); } } } } // TODO: test advance: } r.Dispose(); dir.Dispose(); }
public virtual void Test() { IList<string> postingsList = new List<string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random)); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TestNightly || RandomMultiplier > 1)) { // Otherwise test can take way too long (> 2 hours) //numTerms /= 2; // LUCENENET specific - To keep this under the 1 hour free limit // of Azure DevOps, this was reduced from /2 to /6. numTerms /= 6; } if (Verbose) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i, CultureInfo.InvariantCulture); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList.Shuffle(Random); ConcurrentQueue<string> postings = new ConcurrentQueue<string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir("bagofpostings")); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int threadCount = TestUtil.NextInt32(Random, 1, 5); if (Verbose) { Console.WriteLine("config: " + iw.IndexWriter.Config); Console.WriteLine("threadCount=" + threadCount); } ThreadJob[] threads = new ThreadJob[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { threads[threadID] = new ThreadAnonymousClass(this, maxTermsPerDoc, postings, iw, startingGun); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadJob t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.GetReader(); Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.GetTerms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: #pragma warning disable 612, 618 Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount); if (iwc.Codec is Lucene3xCodec == false) #pragma warning restore 612, 618 { Assert.AreEqual(numTerms - 1, terms.Count); } TermsEnum termsEnum = terms.GetEnumerator(); while (termsEnum.MoveNext()) { int value = Convert.ToInt32(termsEnum.Term.Utf8ToString(), CultureInfo.InvariantCulture); Assert.AreEqual(value, termsEnum.DocFreq); // don't really need to check more than this, as CheckIndex // will verify that docFreq == actual number of documents seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestRandomWithPrefix() { Directory dir = NewDirectory(); ISet <string> prefixes = new JCG.HashSet <string>(); int numPrefix = TestUtil.NextInt32(Random, 2, 7); if (Verbose) { Console.WriteLine("TEST: use " + numPrefix + " prefixes"); } while (prefixes.Count < numPrefix) { prefixes.Add(TestUtil.RandomRealisticUnicodeString(Random)); //prefixes.Add(TestUtil.RandomSimpleString(random)); } string[] prefixesArray = prefixes.ToArray(/*new string[prefixes.Count]*/); int NUM_TERMS = AtLeast(20); ISet <BytesRef> terms = new JCG.HashSet <BytesRef>(); while (terms.Count < NUM_TERMS) { string s = prefixesArray[Random.Next(prefixesArray.Length)] + TestUtil.RandomRealisticUnicodeString(Random); //final String s = prefixesArray[random.nextInt(prefixesArray.Length)] + TestUtil.RandomSimpleString(random); if (s.Length > 0) { terms.Add(new BytesRef(s)); } } BytesRef[] termsArray = terms.ToArray(); Array.Sort(termsArray); int NUM_DOCS = AtLeast(100); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); // Sometimes swap in codec that impls ord(): if (Random.Next(10) == 7) { Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds")); conf.SetCodec(codec); } RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf); int[][] idToOrds = new int[NUM_DOCS][]; ISet <int?> ordsForDocSet = new JCG.HashSet <int?>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.Add(new Int32Field("id", id, Field.Store.NO)); int termCount = TestUtil.NextInt32(Random, 0, 20 * RandomMultiplier); while (ordsForDocSet.Count < termCount) { ordsForDocSet.Add(Random.Next(termsArray.Length)); } int[] ordsForDoc = new int[termCount]; int upto = 0; if (Verbose) { Console.WriteLine("TEST: doc id=" + id); } foreach (int ord in ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO); if (Verbose) { Console.WriteLine(" f=" + termsArray[ord].Utf8ToString()); } doc.Add(field); } ordsForDocSet.Clear(); Array.Sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.AddDocument(doc); } DirectoryReader r = w.GetReader(); w.Dispose(); if (Verbose) { Console.WriteLine("TEST: reader=" + r); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r); foreach (string prefix in prefixesArray) { BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix); int[][] idToOrdsPrefix = new int[NUM_DOCS][]; for (int id = 0; id < NUM_DOCS; id++) { int[] docOrds = idToOrds[id]; IList <int?> newOrds = new List <int?>(); foreach (int ord in idToOrds[id]) { if (StringHelper.StartsWith(termsArray[ord], prefixRef)) { newOrds.Add(ord); } } int[] newOrdsArray = new int[newOrds.Count]; int upto = 0; foreach (int ord in newOrds) { newOrdsArray[upto++] = ord; } idToOrdsPrefix[id] = newOrdsArray; } foreach (AtomicReaderContext ctx in r.Leaves) { if (Verbose) { Console.WriteLine("\nTEST: sub=" + ctx.Reader); } Verify((AtomicReader)ctx.Reader, idToOrdsPrefix, termsArray, prefixRef); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (Verbose) { Console.WriteLine("TEST: top reader"); } Verify(slowR, idToOrdsPrefix, termsArray, prefixRef); } FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey); r.Dispose(); dir.Dispose(); }
public virtual void TestIndexing() { DirectoryInfo tmpDir = CreateTempDir("TestNeverDelete"); BaseDirectoryWrapper d = NewFSDirectory(tmpDir); // We want to "see" files removed if Lucene removed // them. this is still worth running on Windows since // some files the IR opens and closes. if (d is MockDirectoryWrapper) { ((MockDirectoryWrapper)d).NoDeleteOpenFile = false; } RandomIndexWriter w = new RandomIndexWriter(Random(), d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); w.w.Config.SetMaxBufferedDocs(TestUtil.NextInt(Random(), 5, 30)); w.Commit(); ThreadClass[] indexThreads = new ThreadClass[Random().Next(4)]; long stopTime = Environment.TickCount + AtLeast(1000); for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x] = new ThreadAnonymousInnerClassHelper(w, stopTime); indexThreads[x].Name = "Thread " + x; indexThreads[x].Start(); } HashSet <string> allFiles = new HashSet <string>(); DirectoryReader r = DirectoryReader.Open(d); while (Environment.TickCount < stopTime) { IndexCommit ic = r.IndexCommit; if (VERBOSE) { Console.WriteLine("TEST: check files: " + ic.FileNames); } allFiles.AddAll(ic.FileNames); // Make sure no old files were removed foreach (string fileName in allFiles) { Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist"); } DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } Thread.Sleep(1); } r.Dispose(); foreach (ThreadClass t in indexThreads) { t.Join(); } w.Dispose(); d.Dispose(); System.IO.Directory.Delete(tmpDir.FullName, true); }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field")); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (VERBOSE) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i); for (int j = 0; j < i; j++) { postingsList.Add(term); } } Collections.Shuffle(postingsList); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName())); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int threadCount = TestUtil.NextInt(Random(), 1, 5); if (VERBOSE) { Console.WriteLine("config: " + iw.w.Config); Console.WriteLine("threadCount=" + threadCount); } Field prototype = NewTextField("field", "", Field.Store.NO); FieldType fieldType = new FieldType((FieldType)prototype.FieldType); if (Random().NextBoolean()) { fieldType.OmitNorms = true; } int options = Random().Next(3); if (options == 0) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS; // we dont actually need positions fieldType.StoreTermVectors = true; // but enforce term vectors when we do this so we check SOMETHING } else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field"))) { fieldType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } // else just positions ThreadClass[] threads = new ThreadClass[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { Random threadRandom = new Random(Random().Next()); Document document = new Document(); Field field = new Field("field", "", fieldType); document.Add(field); threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.Reader; Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.GetTerms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, terms.Count); TermsEnum termsEnum = terms.GetIterator(null); BytesRef termBR; while ((termBR = termsEnum.Next()) != null) { int value = Convert.ToInt32(termBR.Utf8ToString()); Assert.AreEqual(value, termsEnum.TotalTermFreq); // don't really need to check more than this, as CheckIndex // will verify that totalTermFreq == total number of positions seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestStressMultiThreading() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter writer = new IndexWriter(dir, conf); // create index int numThreads = TestUtil.NextInt(Random(), 3, 6); int numDocs = AtLeast(2000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("id", "doc" + i, Store.NO)); double group = Random().NextDouble(); string g; if (group < 0.1) { g = "g0"; } else if (group < 0.5) { g = "g1"; } else if (group < 0.8) { g = "g2"; } else { g = "g3"; } doc.Add(new StringField("updKey", g, Store.NO)); for (int j = 0; j < numThreads; j++) { long value = Random().Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value))); doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2 } writer.AddDocument(doc); } CountdownEvent done = new CountdownEvent(numThreads); AtomicInteger numUpdates = new AtomicInteger(AtLeast(100)); // same thread updates a field as well as reopens ThreadClass[] threads = new ThreadClass[numThreads]; for (int i = 0; i < threads.Length; i++) { string f = "f" + i; string cf = "cf" + i; threads[i] = new ThreadAnonymousInnerClassHelper(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf); } foreach (ThreadClass t in threads) { t.Start(); } done.Wait(); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; for (int i = 0; i < numThreads; i++) { BinaryDocValues bdv = r.GetBinaryDocValues("f" + i); NumericDocValues control = r.GetNumericDocValues("cf" + i); Bits docsWithBdv = r.GetDocsWithField("f" + i); Bits docsWithControl = r.GetDocsWithField("cf" + i); Bits liveDocs = r.LiveDocs; for (int j = 0; j < r.MaxDoc; j++) { if (liveDocs == null || liveDocs.Get(j)) { Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j)); if (docsWithBdv.Get(j)) { long ctrlValue = control.Get(j); long bdvValue = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2; // if (ctrlValue != bdvValue) { // System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch); // } Assert.AreEqual(ctrlValue, bdvValue); } } } } } reader.Dispose(); dir.Dispose(); }
public virtual void Test() { // update variables int commitPercent = Random().Next(20); int softCommitPercent = Random().Next(100); // what percent of the commits are soft int deletePercent = Random().Next(50); int deleteByQueryPercent = Random().Next(25); int ndocs = AtLeast(50); int nWriteThreads = TestUtil.NextInt(Random(), 1, TEST_NIGHTLY ? 10 : 5); int maxConcurrentCommits = TestUtil.NextInt(Random(), 1, TEST_NIGHTLY ? 10 : 5); // number of committers at a time... needed if we want to avoid commit errors due to exceeding the max bool tombstones = Random().NextBoolean(); // query variables AtomicLong operations = new AtomicLong(AtLeast(10000)); // number of query operations to perform in total int nReadThreads = TestUtil.NextInt(Random(), 1, TEST_NIGHTLY ? 10 : 5); InitModel(ndocs); FieldType storedOnlyType = new FieldType(); storedOnlyType.Stored = true; if (VERBOSE) { Console.WriteLine("\n"); Console.WriteLine("TEST: commitPercent=" + commitPercent); Console.WriteLine("TEST: softCommitPercent=" + softCommitPercent); Console.WriteLine("TEST: deletePercent=" + deletePercent); Console.WriteLine("TEST: deleteByQueryPercent=" + deleteByQueryPercent); Console.WriteLine("TEST: ndocs=" + ndocs); Console.WriteLine("TEST: nWriteThreads=" + nWriteThreads); Console.WriteLine("TEST: nReadThreads=" + nReadThreads); Console.WriteLine("TEST: maxConcurrentCommits=" + maxConcurrentCommits); Console.WriteLine("TEST: tombstones=" + tombstones); Console.WriteLine("TEST: operations=" + operations); Console.WriteLine("\n"); } AtomicInteger numCommitting = new AtomicInteger(); IList<ThreadClass> threads = new List<ThreadClass>(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); writer.DoRandomForceMergeAssert = false; writer.Commit(); Reader = DirectoryReader.Open(dir); for (int i = 0; i < nWriteThreads; i++) { ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, "WRITER" + i, commitPercent, softCommitPercent, deletePercent, deleteByQueryPercent, ndocs, maxConcurrentCommits, tombstones, operations, storedOnlyType, numCommitting, writer); threads.Add(thread); } for (int i = 0; i < nReadThreads; i++) { ThreadClass thread = new ThreadAnonymousInnerClassHelper2(this, "READER" + i, ndocs, tombstones, operations); threads.Add(thread); } foreach (ThreadClass thread in threads) { thread.Start(); } foreach (ThreadClass thread in threads) { thread.Join(); } writer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: close reader=" + Reader); } Reader.Dispose(); dir.Dispose(); }
public virtual void TestDocsWithField() { AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); if (Random.Next(4) >= 0) { doc.Add(new NumericDocValuesField("numbers", Random.NextInt64())); } doc.Add(new NumericDocValuesField("numbersAlways", Random.NextInt64())); iw.AddDocument(doc); if (Random.Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.GetReader(); iw.ForceMerge(1); DirectoryReader ir2 = iw.GetReader(); AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); IBits multi = MultiDocValues.GetDocsWithField(ir, "numbers"); IBits single = merged.GetDocsWithField("numbers"); if (multi == null) { Assert.IsNull(single); } else { Assert.AreEqual(single.Length, multi.Length); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } } multi = MultiDocValues.GetDocsWithField(ir, "numbersAlways"); single = merged.GetDocsWithField("numbersAlways"); Assert.AreEqual(single.Length, multi.Length); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }