public virtual void TestEmptyDocs() { using (Directory dir = NewDirectory()) { IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 30)); using (RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConf)) { // make sure that the fact that documents might be empty is not a problem Document emptyDoc = new Document(); int numDocs = Random.NextBoolean() ? 1 : AtLeast(1000); for (int i = 0; i < numDocs; ++i) { iw.AddDocument(emptyDoc); } iw.Commit(); using (DirectoryReader rd = DirectoryReader.Open(dir)) { for (int i = 0; i < numDocs; ++i) { Document doc = rd.Document(i); Assert.IsNotNull(doc); Assert.IsTrue(doc.Fields.Count <= 0); } } // rd.Dispose(); } // iw.Dispose(); } // dir.Dispose(); }
public virtual void TestReadSkip() { using (Directory dir = NewDirectory()) { IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 30)); using (RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConf)) { FieldType ft = new FieldType(); ft.IsStored = true; ft.Freeze(); string @string = TestUtil.RandomSimpleString(Random, 50); var bytes = @string.GetBytes(Encoding.UTF8); long l = Random.NextBoolean() ? Random.Next(42) : Random.NextInt64(); int i = Random.NextBoolean() ? Random.Next(42) : Random.Next(); float f = Random.NextSingle(); double d = Random.NextDouble(); IList <Field> fields = Arrays.AsList(new Field("bytes", bytes, ft), new Field("string", @string, ft), new Int64Field("long", l, Field.Store.YES), new Int32Field("int", i, Field.Store.YES), new SingleField("float", f, Field.Store.YES), new DoubleField("double", d, Field.Store.YES) ); for (int k = 0; k < 100; ++k) { Document doc = new Document(); foreach (Field fld in fields) { doc.Add(fld); } iw.IndexWriter.AddDocument(doc); } iw.Commit(); using (DirectoryReader reader = DirectoryReader.Open(dir)) { int docID = Random.Next(100); foreach (Field fld in fields) { string fldName = fld.Name; Document sDoc = reader.Document(docID, Collections.Singleton(fldName)); IIndexableField sField = sDoc.GetField(fldName); if (typeof(Field) == fld.GetType()) { Assert.AreEqual(fld.GetBinaryValue(), sField.GetBinaryValue()); Assert.AreEqual(fld.GetStringValue(), sField.GetStringValue()); } else { #pragma warning disable 612, 618 Assert.AreEqual(fld.GetNumericValue(), sField.GetNumericValue()); #pragma warning restore 612, 618 } } } // reader.Dispose(); } // iw.Dispose(); } // dir.Dispose(); }
internal static byte[] RandomArray(int length, int max) { var arr = new byte[length]; for (int i = 0; i < arr.Length; ++i) { arr[i] = (byte)RandomInts.RandomInt32Between(Random, 0, max); } return(arr); }
public virtual void TestIncompressible() { var decompressed = new byte[RandomInts.RandomInt32Between(Random, 20, 256)]; for (int i = 0; i < decompressed.Length; ++i) { decompressed[i] = (byte)i; } Test(decompressed); }
public virtual void TestLongLiterals() { // long literals (length >= 16) which are not the last literals var decompressed = RandomArray(RandomInts.RandomInt32Between(Random, 400, 1024), 256); int matchRef = Random.Next(30); int matchOff = RandomInts.RandomInt32Between(Random, decompressed.Length - 40, decompressed.Length - 20); int matchLength = RandomInts.RandomInt32Between(Random, 4, 10); Array.Copy(decompressed, matchRef, decompressed, matchOff, matchLength); Test(decompressed); }
public virtual void TestLongMatchs() { // match length >= 20 var decompressed = new byte[RandomInts.RandomInt32Between(Random, 300, 1024)]; for (int i = 0; i < decompressed.Length; ++i) { decompressed[i] = (byte)i; } Test(decompressed); }
public virtual void TestStringUnion() { List <BytesRef> strings = new List <BytesRef>(); for (int i = RandomInts.RandomInt32Between(Random, 0, 1000); --i >= 0;) { strings.Add(new BytesRef(TestUtil.RandomUnicodeString(Random))); } strings.Sort(); Automaton union = BasicAutomata.MakeStringUnion(strings); Assert.IsTrue(union.IsDeterministic); Assert.IsTrue(BasicOperations.SameLanguage(union, NaiveUnion(strings))); }
public virtual void TestDeletePartiallyWrittenFilesIfAbort() { Directory dir = NewDirectory(); IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 30)); iwConf.SetCodec(CompressingCodec.RandomInstance(Random)); // disable CFS because this test checks file names iwConf.SetMergePolicy(NewLogMergePolicy(false)); iwConf.SetUseCompoundFile(false); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConf); Document validDoc = new Document(); validDoc.Add(new Int32Field("id", 0, Field.Store.YES)); iw.AddDocument(validDoc); iw.Commit(); // make sure that #writeField will fail to trigger an abort Document invalidDoc = new Document(); FieldType fieldType = new FieldType(); fieldType.IsStored = true; invalidDoc.Add(new FieldAnonymousInnerClassHelper(this, fieldType)); try { Assert.Throws <ArgumentException>(() => { iw.AddDocument(invalidDoc); iw.Commit(); }); } finally { int counter = 0; foreach (string fileName in dir.ListAll()) { if (fileName.EndsWith(".fdt", StringComparison.Ordinal) || fileName.EndsWith(".fdx", StringComparison.Ordinal)) { counter++; } } // Only one .fdt and one .fdx files must have been found Assert.AreEqual(2, counter); iw.Dispose(); dir.Dispose(); } }
public virtual void TestConcurrentReads() { using (Directory dir = NewDirectory()) { IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 30)); using (RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConf)) { // make sure the readers are properly cloned Document doc = new Document(); Field field = new StringField("fld", "", Field.Store.YES); doc.Add(field); int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; ++i) { field.SetStringValue("" + i); iw.AddDocument(doc); } iw.Commit(); AtomicObject <Exception> ex = new AtomicObject <Exception>(); using (DirectoryReader rd = DirectoryReader.Open(dir)) { IndexSearcher searcher = new IndexSearcher(rd); int concurrentReads = AtLeast(5); int readsPerThread = AtLeast(50); IList <ThreadClass> readThreads = new List <ThreadClass>(); for (int i = 0; i < concurrentReads; ++i) { readThreads.Add(new ThreadAnonymousInnerClassHelper(numDocs, rd, searcher, readsPerThread, ex, i)); } foreach (ThreadClass thread in readThreads) { thread.Start(); } foreach (ThreadClass thread in readThreads) { thread.Join(); } } // rd.Dispose(); if (ex.Value != null) { throw ex.Value; } } // iw.Dispose(); } // dir.Dispose(); }
/// <summary> /// Creates a random <see cref="CompressingCodec"/> that is using a segment suffix. /// </summary> public static CompressingCodec RandomInstance(Random random, bool withSegmentSuffix) { return(RandomInstance(random, RandomInts.RandomInt32Between(random, 1, 500), withSegmentSuffix)); }
/// <summary> /// Creates a random <see cref="CompressingCodec"/> that is using an empty segment /// suffix. /// </summary> public static CompressingCodec RandomInstance(Random random) { return(RandomInstance(random, RandomInts.RandomInt32Between(random, 1, 500), false)); }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new MMapDirectory(CreateTempDir("4GBStoredFields"))); dir.Throttling = Throttling.NEVER; var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(); ft.IsIndexed = false; ft.IsStored = true; ft.Freeze(); int valueLength = RandomInts.RandomInt32Between(Random, 1 << 13, 1 << 20); var value = new byte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (byte)Random.Next(256); } Field f = new Field("fld", value, ft); doc.Add(f); int numDocs = (int)((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.AddDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { Console.WriteLine(i + " of " + numDocs + "..."); } } w.ForceMerge(1); w.Dispose(); if (VERBOSE) { bool found = false; foreach (string file in dir.ListAll()) { if (file.EndsWith(".fdt", StringComparison.Ordinal)) { long fileLength = dir.FileLength(file); if (fileLength >= 1L << 32) { found = true; } Console.WriteLine("File length of " + file + " : " + fileLength); } } if (!found) { Console.WriteLine("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.Open(dir); Document sd = rd.Document(numDocs - 1); Assert.IsNotNull(sd); Assert.AreEqual(1, sd.Fields.Count); BytesRef valueRef = sd.GetBinaryValue("fld"); Assert.IsNotNull(valueRef); Assert.AreEqual(new BytesRef(value), valueRef); rd.Dispose(); dir.Dispose(); }
public virtual void TestEncodeDecode() { int iterations = RandomInts.RandomInt32Between(Random, 1, 1000); float AcceptableOverheadRatio = (float)Random.NextDouble(); int[] values = new int[(iterations - 1) * Lucene41PostingsFormat.BLOCK_SIZE + ForUtil.MAX_DATA_SIZE]; for (int i = 0; i < iterations; ++i) { int bpv = Random.Next(32); if (bpv == 0) { int value = RandomInts.RandomInt32Between(Random, 0, int.MaxValue); for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j) { values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = value; } } else { for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j) { values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = RandomInts.RandomInt32Between(Random, 0, (int)PackedInt32s.MaxValue(bpv)); } } } Directory d = new RAMDirectory(); long endPointer; { // encode IndexOutput @out = d.CreateOutput("test.bin", IOContext.DEFAULT); ForUtil forUtil = new ForUtil(AcceptableOverheadRatio, @out); for (int i = 0; i < iterations; ++i) { forUtil.WriteBlock(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, values.Length), new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], @out); } endPointer = @out.GetFilePointer(); @out.Dispose(); } { // decode IndexInput @in = d.OpenInput("test.bin", IOContext.READ_ONCE); ForUtil forUtil = new ForUtil(@in); for (int i = 0; i < iterations; ++i) { if (Random.NextBoolean()) { forUtil.SkipBlock(@in); continue; } int[] restored = new int[Lucene41.ForUtil.MAX_DATA_SIZE]; forUtil.ReadBlock(@in, new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], restored); Assert.AreEqual(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, (i + 1) * Lucene41PostingsFormat.BLOCK_SIZE), Arrays.CopyOf(restored, Lucene41PostingsFormat.BLOCK_SIZE)); } assertEquals(endPointer, @in.GetFilePointer()); @in.Dispose(); } }
private void DoTest(DocValuesType type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case DocValuesType.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case DocValuesType.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case DocValuesType.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw new InvalidOperationException(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.SetInt64Value(i); switch (type) { case DocValuesType.SORTED: case DocValuesType.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random, 20); } while (((string)vals[i]).Length == 0); f.SetBytesValue(new BytesRef((string)vals[i])); break; case DocValuesType.NUMERIC: int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31); // keep it an int vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue)); f.SetInt64Value((long)vals[i]); break; } iw.AddDocument(document); if (Random.NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case DocValuesType.BINARY: case DocValuesType.SORTED: vs = new BytesRefFieldSource("dv"); break; case DocValuesType.NUMERIC: vs = new Int64FieldSource("dv"); break; default: throw new InvalidOperationException(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is Int64FieldSource) { assertTrue(values.ObjectVal(i) is long?); assertTrue(values.BytesVal(i, bytes)); } else { throw new InvalidOperationException(); } object expected = vals[ids.Int32Val(i)]; switch (type) { case DocValuesType.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd >= 1); goto case DocValuesType.BINARY; case DocValuesType.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case DocValuesType.NUMERIC: assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i)); break; } } } rd.Dispose(); d.Dispose(); }
public virtual void TestBigDocuments() { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) using (Directory dir = new MockDirectoryWrapper(Random, new MMapDirectory(CreateTempDir("testBigDocuments")))) { IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 30)); using (RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConf)) { if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } Document emptyDoc = new Document(); // emptyDoc Document bigDoc1 = new Document(); // lot of small fields Document bigDoc2 = new Document(); // 1 very big field Field idField = new StringField("id", "", Field.Store.NO); emptyDoc.Add(idField); bigDoc1.Add(idField); bigDoc2.Add(idField); FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.IsIndexed = false; Field smallField = new Field("fld", RandomByteArray(Random.Next(10), 256), onlyStored); int numFields = RandomInts.RandomInt32Between(Random, 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.Add(smallField); } Field bigField = new Field("fld", RandomByteArray(RandomInts.RandomInt32Between(Random, 1000000, 5000000), 2), onlyStored); bigDoc2.Add(bigField); int numDocs = AtLeast(5); Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomPicks.RandomFrom(Random, Arrays.AsList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.SetStringValue("" + i); iw.AddDocument(docs[i]); if (Random.Next(numDocs) == 0) { iw.Commit(); } } iw.Commit(); iw.ForceMerge(1); // look at what happens when big docs are merged using (DirectoryReader rd = DirectoryReader.Open(dir)) { IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { Query query = new TermQuery(new Term("id", "" + i)); TopDocs topDocs = searcher.Search(query, 1); Assert.AreEqual(1, topDocs.TotalHits, "" + i); Document doc = rd.Document(topDocs.ScoreDocs[0].Doc); Assert.IsNotNull(doc); IIndexableField[] fieldValues = doc.GetFields("fld"); Assert.AreEqual(docs[i].GetFields("fld").Length, fieldValues.Length); if (fieldValues.Length > 0) { Assert.AreEqual(docs[i].GetFields("fld")[0].GetBinaryValue(), fieldValues[0].GetBinaryValue()); } } } // rd.Dispose(); } // iw.Dispose(); } // dir.Dispose(); }
public virtual void TestWriteReadMerge() { // get another codec, other than the default: so we are merging segments across different codecs Codec otherCodec; if ("SimpleText".Equals(Codec.Default.Name, StringComparison.Ordinal)) { otherCodec = new Lucene46Codec(); } else { otherCodec = new SimpleTextCodec(); } using (Directory dir = NewDirectory()) { IndexWriterConfig iwConf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwConf.SetMaxBufferedDocs(RandomInts.RandomInt32Between(Random, 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, (IndexWriterConfig)iwConf.Clone()); try { int docCount = AtLeast(200); var data = new byte[docCount][][]; for (int i = 0; i < docCount; ++i) { int fieldCount = Rarely() ? RandomInts.RandomInt32Between(Random, 1, 500) : RandomInts.RandomInt32Between(Random, 1, 5); data[i] = new byte[fieldCount][]; for (int j = 0; j < fieldCount; ++j) { int length = Rarely() ? Random.Next(1000) : Random.Next(10); int max = Rarely() ? 256 : 2; data[i][j] = RandomByteArray(length, max); } } FieldType type = new FieldType(StringField.TYPE_STORED); type.IsIndexed = false; type.Freeze(); Int32Field id = new Int32Field("id", 0, Field.Store.YES); for (int i = 0; i < data.Length; ++i) { Document doc = new Document(); doc.Add(id); id.SetInt32Value(i); for (int j = 0; j < data[i].Length; ++j) { Field f = new Field("bytes" + j, data[i][j], type); doc.Add(f); } iw.IndexWriter.AddDocument(doc); if (Random.NextBoolean() && (i % (data.Length / 10) == 0)) { iw.IndexWriter.Dispose(); // test merging against a non-compressing codec if (iwConf.Codec == otherCodec) { iwConf.SetCodec(Codec.Default); } else { iwConf.SetCodec(otherCodec); } iw = new RandomIndexWriter(Random, dir, (IndexWriterConfig)iwConf.Clone()); } } for (int i = 0; i < 10; ++i) { int min = Random.Next(data.Length); int max = min + Random.Next(20); iw.DeleteDocuments(NumericRangeQuery.NewInt32Range("id", min, max, true, false)); } iw.ForceMerge(2); // force merges with deletions iw.Commit(); using (DirectoryReader ir = DirectoryReader.Open(dir)) { Assert.IsTrue(ir.NumDocs > 0); int numDocs = 0; for (int i = 0; i < ir.MaxDoc; ++i) { Document doc = ir.Document(i); if (doc == null) { continue; } ++numDocs; int docId = (int)doc.GetField("id").GetInt32Value(); Assert.AreEqual(data[docId].Length + 1, doc.Fields.Count); for (int j = 0; j < data[docId].Length; ++j) { var arr = data[docId][j]; BytesRef arr2Ref = doc.GetBinaryValue("bytes" + j); var arr2 = Arrays.CopyOfRange(arr2Ref.Bytes, arr2Ref.Offset, arr2Ref.Offset + arr2Ref.Length); Assert.AreEqual(arr, arr2); } } Assert.IsTrue(ir.NumDocs <= numDocs); } // ir.Dispose(); iw.DeleteAll(); iw.Commit(); iw.ForceMerge(1); } finally { iw.Dispose(); } } // dir.Dispose(); }