// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestWriteReadMerge() { // get another codec, other than the default: so we are merging segments across different codecs Codec otherCodec; /*if ("SimpleText".Equals(Codec.Default.Name)) * {*/ otherCodec = new Lucene46Codec(); /*} * else * { * otherCodec = new SimpleTextCodec(); * }*/ Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone()); int docCount = AtLeast(200); var data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { int fieldCount = Rarely() ? RandomInts.NextIntBetween(Random(), 1, 500) : RandomInts.NextIntBetween(Random(), 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { int length = Rarely() ? Random().Next(1000) : Random().Next(10); int max = Rarely() ? 256 : 2; data[i][j] = RandomByteArray(length, max); } } FieldType type = new FieldType(StringField.TYPE_STORED); type.Indexed = false; type.Freeze(); IntField id = new IntField("id", 0, Field.Store.YES); for (int i = 0; i < data.Length; ++i) { Document doc = new Document(); doc.Add(id); id.IntValue = i; for (int j = 0; j < data[i].Length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.Add(f); } iw.w.AddDocument(doc); if (Random().NextBoolean() && (i % (data.Length / 10) == 0)) { iw.w.Dispose(); // test merging against a non-compressing codec if (iwConf.Codec == otherCodec) { iwConf.SetCodec(Codec.Default); } else { iwConf.SetCodec(otherCodec); } iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwConf.Clone()); } } for (int i = 0; i < 10; ++i) { int min = Random().Next(data.Length); int max = min + Random().Next(20); iw.DeleteDocuments(NumericRangeQuery.NewIntRange("id", min, max, true, false)); } iw.ForceMerge(2); // force merges with deletions iw.Commit(); DirectoryReader ir = DirectoryReader.Open(dir); Assert.IsTrue(ir.NumDocs > 0); int numDocs = 0; for (int i = 0; i < ir.MaxDoc; ++i) { Document doc = ir.Document(i); if (doc == null) { continue; } ++numDocs; int docId = (int)doc.GetField("id").NumericValue; Assert.AreEqual(data[docId].Length + 1, doc.Fields.Count); for (int j = 0; j < data[docId].Length; ++j) { var arr = data[docId][j]; BytesRef arr2Ref = doc.GetBinaryValue("bytes" + j); var arr2 = Arrays.CopyOfRange(arr2Ref.Bytes, arr2Ref.Offset, arr2Ref.Offset + arr2Ref.Length); Assert.AreEqual(arr, arr2); } } Assert.IsTrue(ir.NumDocs <= numDocs); ir.Dispose(); iw.DeleteAll(); iw.Commit(); iw.ForceMerge(1); iw.Dispose(); dir.Dispose(); }
public void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("testBigDocuments"))); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwConf.SetMaxBufferedDocs(RandomInts.NextIntBetween(Random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConf); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.Indexed = false; Field smallField = new Field("fld", RandomByteArray(Random().Next(10), 256), onlyStored); int numFields = RandomInts.NextIntBetween(Random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.NextIntBetween(Random(), 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomInts.RandomFrom(Random(), Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.StringValue = "" + i; iw.AddDocument(docs[i]); if (Random().Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged DirectoryReader rd = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].BinaryValue(), fieldValues[0].BinaryValue()); } } rd.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void Test() { MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("4GBStoredFields"))); dir.Throttling = MockDirectoryWrapper.Throttling_e.NEVER; IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetRAMBufferSizeMB(256.0).SetMergeScheduler(new ConcurrentMergeScheduler()).SetMergePolicy(NewLogMergePolicy(false, 10)).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(); ft.Indexed = false; ft.Stored = true; ft.Freeze(); int valueLength = RandomInts.NextIntBetween(Random(), 1 << 13, 1 << 20); sbyte[] value = new sbyte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (sbyte)Random().Next(256); } Field f = new Field("fld", value, ft); doc.Add(f); int numDocs = (int)((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.AddDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { Console.WriteLine(i + " of " + numDocs + "..."); } } w.ForceMerge(1); w.Dispose(); if (VERBOSE) { bool found = false; foreach (string file in dir.ListAll()) { if (file.EndsWith(".fdt")) { long fileLength = dir.FileLength(file); if (fileLength >= 1L << 32) { found = true; } Console.WriteLine("File length of " + file + " : " + fileLength); } } if (!found) { Console.WriteLine("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.Open(dir); Document sd = rd.Document(numDocs - 1); Assert.IsNotNull(sd); Assert.AreEqual(1, sd.Fields.Count); BytesRef valueRef = sd.GetBinaryValue("fld"); Assert.IsNotNull(valueRef); Assert.AreEqual(new BytesRef(value), valueRef); rd.Dispose(); dir.Dispose(); }
public virtual void TestEncodeDecode() { int iterations = RandomInts.NextIntBetween(Random(), 1, 1000); float AcceptableOverheadRatio = (float)Random().NextDouble(); int[] values = new int[(iterations - 1) * Lucene41PostingsFormat.BLOCK_SIZE + ForUtil.MAX_DATA_SIZE]; for (int i = 0; i < iterations; ++i) { int bpv = Random().Next(32); if (bpv == 0) { int value = RandomInts.NextIntBetween(Random(), 0, int.MaxValue); for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j) { values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = value; } } else { for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j) { values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = RandomInts.NextIntBetween(Random(), 0, (int)PackedInt32s.MaxValue(bpv)); } } } Directory d = new RAMDirectory(); long endPointer; { // encode IndexOutput @out = d.CreateOutput("test.bin", IOContext.DEFAULT); ForUtil forUtil = new ForUtil(AcceptableOverheadRatio, @out); for (int i = 0; i < iterations; ++i) { forUtil.WriteBlock(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, values.Length), new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], @out); } endPointer = @out.GetFilePointer(); @out.Dispose(); } { // decode IndexInput @in = d.OpenInput("test.bin", IOContext.READ_ONCE); ForUtil forUtil = new ForUtil(@in); for (int i = 0; i < iterations; ++i) { if (Random().NextBoolean()) { forUtil.SkipBlock(@in); continue; } int[] restored = new int[Lucene41.ForUtil.MAX_DATA_SIZE]; forUtil.ReadBlock(@in, new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], restored); Assert.AreEqual(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, (i + 1) * Lucene41PostingsFormat.BLOCK_SIZE), Arrays.CopyOf(restored, Lucene41PostingsFormat.BLOCK_SIZE)); } assertEquals(endPointer, @in.GetFilePointer()); @in.Dispose(); } }
/// <summary> /// Creates a random <seealso cref="CompressingCodec"/> that is using a segment suffix /// </summary> public static CompressingCodec RandomInstance(Random random, bool withSegmentSuffix) { return(RandomInstance(random, RandomInts.NextIntBetween(random, 1, 500), withSegmentSuffix)); }
/// <summary> /// Creates a random <seealso cref="CompressingCodec"/> that is using an empty segment /// suffix /// </summary> public static CompressingCodec RandomInstance(Random random) { return(RandomInstance(random, RandomInts.NextIntBetween(random, 1, 500), false)); }