private static byte[] Pair(BytesWritable a, BytesWritable b) { byte[] pairData = new byte[a.GetLength() + b.GetLength()]; System.Array.Copy(a.GetBytes(), 0, pairData, 0, a.GetLength()); System.Array.Copy(b.GetBytes(), 0, pairData, a.GetLength(), b.GetLength()); return(pairData); }
public virtual void TestCompare() { byte[][] values = { GetBytesForString("abc"), GetBytesForString ("ad"), GetBytesForString("abcd"), GetBytesForString (string.Empty), GetBytesForString("b") }; BytesWritable[] buf = new BytesWritable[values.Length]; for (int i = 0; i < values.Length; ++i) { buf[i] = new BytesWritable(values[i]); } // check to make sure the compare function is symetric and reflexive for (int i_1 = 0; i_1 < values.Length; ++i_1) { for (int j = 0; j < values.Length; ++j) { Assert.True(buf[i_1].CompareTo(buf[j]) == -buf[j].CompareTo(buf [i_1])); Assert.True((i_1 == j) == (buf[i_1].CompareTo(buf[j]) == 0)); } } Assert.True(buf[0].CompareTo(buf[1]) < 0); Assert.True(buf[1].CompareTo(buf[2]) > 0); Assert.True(buf[2].CompareTo(buf[3]) > 0); Assert.True(buf[3].CompareTo(buf[4]) < 0); }
private static string TextifyBytes(Text t) { BytesWritable b = new BytesWritable(); b.Set(t.GetBytes(), 0, t.GetLength()); return(b.ToString()); }
public virtual void TestSizeChange() { var hadoop = GetBytesForString("hadoop"); BytesWritable buf = new BytesWritable(hadoop); int size = buf.Length; int orig_capacity = buf.Capacity; buf.Size = (size * 2); int newCapacity = buf.Capacity; Array.Copy(buf.Bytes, 0, buf.Bytes, size, size); newCapacity.ShouldBeGreaterThanOrEqualTo(size * 2); (size * 2).ShouldEqual(buf.Length); Assert.True(newCapacity != orig_capacity); buf.Size = (size * 4); Assert.True(newCapacity != buf.Capacity); for (int i = 0; i < size * 2; ++i) { Assert.Equal(hadoop[i % size], buf.Bytes[i]); } // ensure that copyBytes is exactly the right length Assert.Equal(size * 4, buf.CopyBytes().Length); // shrink the buffer buf.Capacity = (1); // make sure the size has been cut down too Assert.Equal(1, buf.Length); // but that the data is still there Assert.Equal(hadoop[0], buf.Bytes[0]); }
/// <summary>Write the given object to the stream.</summary> /// <remarks> /// Write the given object to the stream. If it is a Text or BytesWritable, /// write it directly. Otherwise, write it to a buffer and then write the /// length and data to the stream. /// </remarks> /// <param name="obj">the object to write</param> /// <exception cref="System.IO.IOException"/> private void WriteObject(Writable obj) { // For Text and BytesWritable, encode them directly, so that they end up // in C++ as the natural translations. if (obj is Text) { Text t = (Text)obj; int len = t.GetLength(); WritableUtils.WriteVInt(stream, len); stream.Write(t.GetBytes(), 0, len); } else { if (obj is BytesWritable) { BytesWritable b = (BytesWritable)obj; int len = b.GetLength(); WritableUtils.WriteVInt(stream, len); stream.Write(b.GetBytes(), 0, len); } else { buffer.Reset(); obj.Write(buffer); int length = buffer.GetLength(); WritableUtils.WriteVInt(stream, length); stream.Write(buffer.GetData(), 0, length); } } }
public virtual void TestZeroCopy() { byte[] bytes = GetBytesForString("brock"); BytesWritable zeroBuf = new BytesWritable(bytes, bytes.Length); // new BytesWritable copyBuf = new BytesWritable(bytes); // old // using zero copy constructor shouldn't result in a copy Assert.True(bytes == zeroBuf.Bytes, "copy took place, backing array != array passed to constructor"); Assert.True(zeroBuf.Length == bytes.Length, "length of BW should backing byte array"); Assert.Equal(zeroBuf, copyBuf, "objects with same backing array should be equal"); Assert.Equal(zeroBuf.ToString(), copyBuf.ToString(), "string repr of objects with same backing array should be equal"); Assert.True(zeroBuf.CompareTo(copyBuf) == 0, "compare order objects with same backing array should be equal"); Assert.True(zeroBuf.GetHashCode() == copyBuf.GetHashCode(), "hash of objects with same backing array should be equal"); // ensure expanding buffer is handled correctly // for buffers created with zero copy api byte[] buffer = new byte[bytes.Length * 5]; zeroBuf.Set(buffer, 0, buffer.Length); // expand internal buffer zeroBuf.Set(bytes, 0, bytes.Length); // set back to normal contents Assert.Equal(zeroBuf, copyBuf, "buffer created with (array, len) has bad contents"); Assert.True(zeroBuf.Length == copyBuf.Length, "buffer created with (array, len) has bad length"); }
/// <exception cref="System.IO.IOException"/> internal virtual void ReadFile() { long fileLength = fs.GetFileStatus(path).GetLen(); int numSplit = 10; long splitSize = fileLength / numSplit + 1; TFile.Reader reader = new TFile.Reader(fs.Open(path), fs.GetFileStatus(path).GetLen (), conf); long offset = 0; long rowCount = 0; BytesWritable key; BytesWritable value; for (int i = 0; i < numSplit; ++i, offset += splitSize) { TFile.Reader.Scanner scanner = reader.CreateScannerByByteRange(offset, splitSize); int count = 0; key = new BytesWritable(); value = new BytesWritable(); while (!scanner.AtEnd()) { scanner.Entry().Get(key, value); ++count; scanner.Advance(); } scanner.Close(); Assert.True(count > 0); rowCount += count; } Assert.Equal(rowCount, reader.GetEntryCount()); reader.Close(); }
/// <exception cref="System.IO.IOException"/> private static void CreateFiles(int length, int numFiles, Random random, Job job) { TestCombineSequenceFileInputFormat.Range[] ranges = CreateRanges(length, numFiles , random); for (int i = 0; i < numFiles; i++) { Path file = new Path(workDir, "test_" + i + ".seq"); // create a file with length entries SequenceFile.Writer writer = SequenceFile.CreateWriter(localFs, job.GetConfiguration (), file, typeof(IntWritable), typeof(BytesWritable)); TestCombineSequenceFileInputFormat.Range range = ranges[i]; try { for (int j = range.start; j < range.end; j++) { IntWritable key = new IntWritable(j); byte[] data = new byte[random.Next(10)]; random.NextBytes(data); BytesWritable value = new BytesWritable(data); writer.Append(key, value); } } finally { writer.Close(); } } }
/// <exception cref="System.IO.IOException"/> private int CountRecords(int numSplits) { InputFormat <Text, BytesWritable> format = new SequenceFileInputFilter <Text, BytesWritable >(); Text key = new Text(); BytesWritable value = new BytesWritable(); if (numSplits == 0) { numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1; } InputSplit[] splits = format.GetSplits(job, numSplits); // check each split int count = 0; Log.Info("Generated " + splits.Length + " splits."); for (int j = 0; j < splits.Length; j++) { RecordReader <Text, BytesWritable> reader = format.GetRecordReader(splits[j], job, reporter); try { while (reader.Next(key, value)) { Log.Info("Accept record " + key.ToString()); count++; } } finally { reader.Close(); } } return(count); }
private void FillKey(BytesWritable o) { int len = keyLenRNG.NextInt(); if (len < MinKeyLen) { len = MinKeyLen; } o.SetSize(len); int n = MinKeyLen; while (n < len) { byte[] word = dict[random.Next(dict.Length)]; int l = Math.Min(word.Length, len - n); System.Array.Copy(word, 0, o.Get(), n, l); n += l; } if (sorted && WritableComparator.CompareBytes(lastKey.Get(), MinKeyLen, lastKey.GetSize () - MinKeyLen, o.Get(), MinKeyLen, o.GetSize() - MinKeyLen) > 0) { IncrementPrefix(); } System.Array.Copy(prefix, 0, o.Get(), 0, MinKeyLen); lastKey.Set(o); }
/// <exception cref="System.IO.IOException"/> public SeqFileReadable(FileSystem fs, Path path, int osBufferSize) { Configuration conf = new Configuration(); conf.SetInt("io.file.buffer.size", osBufferSize); reader = new SequenceFile.Reader(fs, path, conf); key = new BytesWritable(); value = new BytesWritable(); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(conf); Reporter reporter = Reporter.Null; Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create a file with various lengths CreateFiles(length, numFiles, random); // create a combine split for the files InputFormat <IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat <IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / (SequenceFile.SyncInterval / 20)) + 1; Log.Info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Info("splitting: got = " + splits.Length); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check each split BitSet bits = new BitSet(length); RecordReader <IntWritable, BytesWritable> reader = format.GetRecordReader(split, job , reporter); try { while (reader.Next(key, value)) { NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(key.Get()) ); bits.Set(key.Get()); } } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
public virtual void TestHash() { byte[] owen = GetBytesForString("owen"); BytesWritable buf = new BytesWritable(owen); Assert.Equal(4347922, buf.GetHashCode()); buf.Capacity = (10000); Assert.Equal(4347922, buf.GetHashCode()); buf.Size = (0); Assert.Equal(1, buf.GetHashCode()); }
public virtual void Next(BytesWritable key) { key.SetSize(Math.Max(MinKeyLen, keyLenRNG.NextInt())); random.NextBytes(key.Get()); int n = random.Next(max - min) + min; byte[] b = key.Get(); b[0] = unchecked ((byte)(n >> 24)); b[1] = unchecked ((byte)(n >> 16)); b[2] = unchecked ((byte)(n >> 8)); b[3] = unchecked ((byte)n); }
public virtual void Next(BytesWritable key, BytesWritable value, bool dupKey) { if (dupKey) { key.Set(lastKey); } else { FillKey(key); } FillValue(value); }
private void FillBuffer(Random rng, BytesWritable bw, byte[] tmp, int len) { int n = 0; while (n < len) { byte[] word = dictionary[rng.Next(dictionary.Length)]; int l = Math.Min(word.Length, len - n); System.Array.Copy(word, 0, tmp, n, l); n += l; } bw.Set(tmp, 0, len); }
/// <summary>Constructor</summary> /// <param name="fout">FS output stream.</param> /// <param name="compressionName"> /// Name of the compression algorithm, which will be used for all /// data blocks. /// </param> /// <exception cref="System.IO.IOException"/> /// <seealso cref="Compression.GetSupportedAlgorithms()"/> public Writer(FSDataOutputStream fout, string compressionName, Configuration conf ) { if (fout.GetPos() != 0) { throw new IOException("Output file not at zero offset."); } this.@out = fout; this.conf = conf; dataIndex = new BCFile.DataIndex(compressionName); metaIndex = new BCFile.MetaIndex(); fsOutputBuffer = new BytesWritable(); BCFile.Magic.Write(fout); }
public virtual void TestUpperBound() { Configuration conf = new Configuration(); BinaryPartitioner.SetRightOffset(conf, 4); BinaryPartitioner <object> partitioner = ReflectionUtils.NewInstance <BinaryPartitioner >(conf); BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 }); BinaryComparable key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 6 }); int partition1 = partitioner.GetPartition(key1, null, 10); int partition2 = partitioner.GetPartition(key2, null, 10); NUnit.Framework.Assert.IsTrue(partition1 != partition2); }
/// <exception cref="System.IO.IOException"/> private void CreateTFile() { long totalBytes = 0; FSDataOutputStream fout = CreateFSOutput(path, fs); try { TFile.Writer writer = new TFile.Writer(fout, options.minBlockSize, options.compress , "memcmp", conf); try { BytesWritable key = new BytesWritable(); BytesWritable val = new BytesWritable(); timer.Start(); for (long i = 0; true; ++i) { if (i % 1000 == 0) { // test the size for every 1000 rows. if (fs.GetFileStatus(path).GetLen() >= options.fileSize) { break; } } kvGen.Next(key, val, false); writer.Append(key.Get(), 0, key.GetSize(), val.Get(), 0, val.GetSize()); totalBytes += key.GetSize(); totalBytes += val.GetSize(); } timer.Stop(); } finally { writer.Close(); } } finally { fout.Close(); } double duration = (double)timer.Read() / 1000; // in us. long fsize = fs.GetFileStatus(path).GetLen(); System.Console.Out.Printf("time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n" , timer.ToString(), (double)totalBytes / 1024 / 1024, totalBytes / duration); System.Console.Out.Printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n" , timer.ToString(), (double)fsize / 1024 / 1024, fsize / duration); }
private void FillValue(BytesWritable o) { int len = valLenRNG.NextInt(); o.SetSize(len); int n = 0; while (n < len) { byte[] word = dict[random.Next(dict.Length)]; int l = Math.Min(word.Length, len - n); System.Array.Copy(word, 0, o.Get(), n, l); n += l; } }
public virtual void TestSortedMapWritable() { Text[] keys = new Text[] { new Text("key1"), new Text("key2"), new Text("key3") }; BytesWritable[] values = new BytesWritable[] { new BytesWritable(Runtime.GetBytesForString ("value1")), new BytesWritable(Runtime.GetBytesForString("value2")), new BytesWritable(Runtime.GetBytesForString("value3")) }; SortedMapWritable inMap = new SortedMapWritable(); for (int i = 0; i < keys.Length; i++) { inMap[keys[i]] = values[i]; } Assert.Equal(0, inMap.FirstKey().CompareTo(keys[0])); Assert.Equal(0, inMap.LastKey().CompareTo(keys[2])); SortedMapWritable outMap = new SortedMapWritable(inMap); Assert.Equal(inMap.Count, outMap.Count); foreach (KeyValuePair <WritableComparable, Writable> e in inMap) { Assert.True(outMap.Contains(e.Key)); Assert.Equal(0, ((WritableComparable)outMap[e.Key]).CompareTo( e.Value)); } // Now for something a little harder... Text[] maps = new Text[] { new Text("map1"), new Text("map2") }; SortedMapWritable mapOfMaps = new SortedMapWritable(); mapOfMaps[maps[0]] = inMap; mapOfMaps[maps[1]] = outMap; SortedMapWritable copyOfMapOfMaps = new SortedMapWritable(mapOfMaps); for (int i_1 = 0; i_1 < maps.Length; i_1++) { Assert.True(copyOfMapOfMaps.Contains(maps[i_1])); SortedMapWritable a = (SortedMapWritable)mapOfMaps[maps[i_1]]; SortedMapWritable b = (SortedMapWritable)copyOfMapOfMaps[maps[i_1]]; Assert.Equal(a.Count, b.Count); foreach (Writable key in a.Keys) { Assert.True(b.Contains(key)); // This will work because we know what we put into each set WritableComparable aValue = (WritableComparable)a[key]; WritableComparable bValue = (WritableComparable)b[key]; Assert.Equal(0, aValue.CompareTo(bValue)); } } }
/// <exception cref="System.IO.IOException"/> private static void CreateBigMapInputFile(Configuration conf, FileSystem fs, Path dir, long fileSizeInMB) { // Check if the input path exists and is non-empty if (fs.Exists(dir)) { FileStatus[] list = fs.ListStatus(dir); if (list.Length > 0) { throw new IOException("Input path: " + dir + " already exists... "); } } Path file = new Path(dir, "part-0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(BytesWritable ), typeof(BytesWritable), SequenceFile.CompressionType.None); long numBytesToWrite = fileSizeInMB * 1024 * 1024; int minKeySize = conf.GetInt(MinKey, 10); int keySizeRange = conf.GetInt(MaxKey, 1000) - minKeySize; int minValueSize = conf.GetInt(MinValue, 0); int valueSizeRange = conf.GetInt(MaxValue, 20000) - minValueSize; BytesWritable randomKey = new BytesWritable(); BytesWritable randomValue = new BytesWritable(); Log.Info("Writing " + numBytesToWrite + " bytes to " + file + " with " + "minKeySize: " + minKeySize + " keySizeRange: " + keySizeRange + " minValueSize: " + minValueSize + " valueSizeRange: " + valueSizeRange); long start = Runtime.CurrentTimeMillis(); while (numBytesToWrite > 0) { int keyLength = minKeySize + (keySizeRange != 0 ? random.Next(keySizeRange) : 0); randomKey.SetSize(keyLength); RandomizeBytes(randomKey.GetBytes(), 0, randomKey.GetLength()); int valueLength = minValueSize + (valueSizeRange != 0 ? random.Next(valueSizeRange ) : 0); randomValue.SetSize(valueLength); RandomizeBytes(randomValue.GetBytes(), 0, randomValue.GetLength()); writer.Append(randomKey, randomValue); numBytesToWrite -= keyLength + valueLength; } writer.Close(); long end = Runtime.CurrentTimeMillis(); Log.Info("Created " + file + " of size: " + fileSizeInMB + "MB in " + (end - start ) / 1000 + "secs"); }
public virtual void TestEqualsAndHashCode() { string failureReason; SortedMapWritable mapA = new SortedMapWritable(); SortedMapWritable mapB = new SortedMapWritable(); // Sanity checks failureReason = "SortedMapWritable couldn't be initialized. Got null reference"; NUnit.Framework.Assert.IsNotNull(failureReason, mapA); NUnit.Framework.Assert.IsNotNull(failureReason, mapB); // Basic null check NUnit.Framework.Assert.IsFalse("equals method returns true when passed null", mapA .Equals(null)); // When entry set is empty, they should be equal Assert.True("Two empty SortedMapWritables are no longer equal", mapA.Equals(mapB)); // Setup Text[] keys = new Text[] { new Text("key1"), new Text("key2") }; BytesWritable[] values = new BytesWritable[] { new BytesWritable(Runtime.GetBytesForString ("value1")), new BytesWritable(Runtime.GetBytesForString("value2")) }; mapA[keys[0]] = values[0]; mapB[keys[1]] = values[1]; // entrySets are different failureReason = "Two SortedMapWritables with different data are now equal"; Assert.True(failureReason, mapA.GetHashCode() != mapB.GetHashCode ()); Assert.True(failureReason, !mapA.Equals(mapB)); Assert.True(failureReason, !mapB.Equals(mapA)); mapA[keys[1]] = values[1]; mapB[keys[0]] = values[0]; // entrySets are now same failureReason = "Two SortedMapWritables with same entry sets formed in different order are now different"; Assert.Equal(failureReason, mapA.GetHashCode(), mapB.GetHashCode ()); Assert.True(failureReason, mapA.Equals(mapB)); Assert.True(failureReason, mapB.Equals(mapA)); // Let's check if entry sets of same keys but different values mapA[keys[0]] = values[1]; mapA[keys[1]] = values[0]; failureReason = "Two SortedMapWritables with different content are now equal"; Assert.True(failureReason, mapA.GetHashCode() != mapB.GetHashCode ()); Assert.True(failureReason, !mapA.Equals(mapB)); Assert.True(failureReason, !mapB.Equals(mapA)); }
public KVGenerator(Random random, bool sorted, RandomDistribution.DiscreteRNG keyLenRNG , RandomDistribution.DiscreteRNG valLenRNG, RandomDistribution.DiscreteRNG wordLenRNG , int dictSize) { this.random = random; dict = new byte[dictSize][]; this.sorted = sorted; this.keyLenRNG = keyLenRNG; this.valLenRNG = valLenRNG; for (int i = 0; i < dictSize; ++i) { int wordLen = wordLenRNG.NextInt(); dict[i] = new byte[wordLen]; random.NextBytes(dict[i]); } lastKey = new BytesWritable(); FillKey(lastKey); }
/// <exception cref="System.IO.IOException"/> public static void TestValue(int val, int vintlen) { DataOutputBuffer buf = new DataOutputBuffer(); DataInputBuffer inbuf = new DataInputBuffer(); WritableUtils.WriteVInt(buf, val); if (Log.IsDebugEnabled()) { Log.Debug("Value = " + val); BytesWritable printer = new BytesWritable(); printer.Set(buf.GetData(), 0, buf.GetLength()); Log.Debug("Buffer = " + printer); } inbuf.Reset(buf.GetData(), 0, buf.GetLength()); Assert.Equal(val, WritableUtils.ReadVInt(inbuf)); Assert.Equal(vintlen, buf.GetLength()); Assert.Equal(vintlen, WritableUtils.GetVIntSize(val)); Assert.Equal(vintlen, WritableUtils.DecodeVIntSize(buf.GetData ()[0])); }
public override void Run() { for (int i = 0; i < count; i++) { try { int byteSize = Random.Next(ByteCount); byte[] bytes = new byte[byteSize]; System.Array.Copy(Bytes, 0, bytes, 0, byteSize); Writable param = new BytesWritable(bytes); client.Call(param, address); Thread.Sleep(Random.Next(20)); } catch (Exception e) { Log.Fatal("Caught Exception", e); failed = true; } } }
/// <exception cref="System.IO.IOException"/> public virtual void SeekTFile() { int miss = 0; long totalBytes = 0; FSDataInputStream fsdis = fs.Open(path); TFile.Reader reader = new TFile.Reader(fsdis, fs.GetFileStatus(path).GetLen(), conf ); KeySampler kSampler = new KeySampler(rng, reader.GetFirstKey(), reader.GetLastKey (), keyLenGen); TFile.Reader.Scanner scanner = reader.CreateScanner(); BytesWritable key = new BytesWritable(); BytesWritable val = new BytesWritable(); timer.Reset(); timer.Start(); for (int i = 0; i < options.seekCount; ++i) { kSampler.Next(key); scanner.LowerBound(key.Get(), 0, key.GetSize()); if (!scanner.AtEnd()) { scanner.Entry().Get(key, val); totalBytes += key.GetSize(); totalBytes += val.GetSize(); } else { ++miss; } } timer.Stop(); double duration = (double)timer.Read() / 1000; // in us. System.Console.Out.Printf("time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n" , timer.ToString(), NanoTimer.NanoTimeToString(timer.Read() / options.seekCount) , options.seekCount - miss, miss, (double)totalBytes / 1024 / (options.seekCount - miss)); }
/// <exception cref="System.Exception"/> private static void CreateSequenceFile(int numRecords) { // create a file with length entries SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text ), typeof(BytesWritable)); try { for (int i = 1; i <= numRecords; i++) { Text key = new Text(Sharpen.Extensions.ToString(i)); byte[] data = new byte[random.Next(10)]; random.NextBytes(data); BytesWritable value = new BytesWritable(data); writer.Append(key, value); } } finally { writer.Close(); } }
/// <exception cref="System.IO.IOException"/> private void TimeWrite(Path path, TestTFileSeqFileComparison.KVAppendable appendable , int baseKlen, int baseVlen, long fileSize) { int maxKlen = baseKlen * 2; int maxVlen = baseVlen * 2; BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); byte[] keyBuffer = new byte[maxKlen]; byte[] valueBuffer = new byte[maxVlen]; Random rng = new Random(options.seed); long totalBytes = 0; PrintlnWithTimestamp("Start writing: " + path.GetName() + "..."); StartTime(); for (long i = 0; true; ++i) { if (i % 1000 == 0) { // test the size for every 1000 rows. if (fs.GetFileStatus(path).GetLen() >= fileSize) { break; } } int klen = rng.Next(baseKlen) + baseKlen; int vlen = rng.Next(baseVlen) + baseVlen; FillBuffer(rng, key, keyBuffer, klen); FillBuffer(rng, value, valueBuffer, vlen); key.Set(keyBuffer, 0, klen); value.Set(valueBuffer, 0, vlen); appendable.Append(key, value); totalBytes += klen; totalBytes += vlen; } StopTime(); appendable.Close(); ReportStats(path, totalBytes); }
/* Similar to readFile(), tests the scanner created * by record numbers rather than the offsets. */ /// <exception cref="System.IO.IOException"/> internal virtual void ReadRowSplits(int numSplits) { TFile.Reader reader = new TFile.Reader(fs.Open(path), fs.GetFileStatus(path).GetLen (), conf); long totalRecords = reader.GetEntryCount(); for (int i = 0; i < numSplits; i++) { long startRec = i * totalRecords / numSplits; long endRec = (i + 1) * totalRecords / numSplits; if (i == numSplits - 1) { endRec = totalRecords; } TFile.Reader.Scanner scanner = reader.CreateScannerByRecordNum(startRec, endRec); int count = 0; BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); long x = startRec; while (!scanner.AtEnd()) { Assert.Equal("Incorrect RecNum returned by scanner", scanner.GetRecordNum (), x); scanner.Entry().Get(key, value); ++count; Assert.Equal("Incorrect RecNum returned by scanner", scanner.GetRecordNum (), x); scanner.Advance(); ++x; } scanner.Close(); Assert.True(count == (endRec - startRec)); } // make sure specifying range at the end gives zero records. TFile.Reader.Scanner scanner_1 = reader.CreateScannerByRecordNum(totalRecords, -1 ); Assert.True(scanner_1.AtEnd()); }