public virtual void TestZeroCopy() { byte[] bytes = GetBytesForString("brock"); BytesWritable zeroBuf = new BytesWritable(bytes, bytes.Length); // new BytesWritable copyBuf = new BytesWritable(bytes); // old // using zero copy constructor shouldn't result in a copy Assert.True(bytes == zeroBuf.Bytes, "copy took place, backing array != array passed to constructor"); Assert.True(zeroBuf.Length == bytes.Length, "length of BW should backing byte array"); Assert.Equal(zeroBuf, copyBuf, "objects with same backing array should be equal"); Assert.Equal(zeroBuf.ToString(), copyBuf.ToString(), "string repr of objects with same backing array should be equal"); Assert.True(zeroBuf.CompareTo(copyBuf) == 0, "compare order objects with same backing array should be equal"); Assert.True(zeroBuf.GetHashCode() == copyBuf.GetHashCode(), "hash of objects with same backing array should be equal"); // ensure expanding buffer is handled correctly // for buffers created with zero copy api byte[] buffer = new byte[bytes.Length * 5]; zeroBuf.Set(buffer, 0, buffer.Length); // expand internal buffer zeroBuf.Set(bytes, 0, bytes.Length); // set back to normal contents Assert.Equal(zeroBuf, copyBuf, "buffer created with (array, len) has bad contents"); Assert.True(zeroBuf.Length == copyBuf.Length, "buffer created with (array, len) has bad length"); }
private void FillKey(BytesWritable o) { int len = keyLenRNG.NextInt(); if (len < MinKeyLen) { len = MinKeyLen; } o.SetSize(len); int n = MinKeyLen; while (n < len) { byte[] word = dict[random.Next(dict.Length)]; int l = Math.Min(word.Length, len - n); System.Array.Copy(word, 0, o.Get(), n, l); n += l; } if (sorted && WritableComparator.CompareBytes(lastKey.Get(), MinKeyLen, lastKey.GetSize () - MinKeyLen, o.Get(), MinKeyLen, o.GetSize() - MinKeyLen) > 0) { IncrementPrefix(); } System.Array.Copy(prefix, 0, o.Get(), 0, MinKeyLen); lastKey.Set(o); }
private static string TextifyBytes(Text t) { BytesWritable b = new BytesWritable(); b.Set(t.GetBytes(), 0, t.GetLength()); return(b.ToString()); }
public virtual void Next(BytesWritable key, BytesWritable value, bool dupKey) { if (dupKey) { key.Set(lastKey); } else { FillKey(key); } FillValue(value); }
private void FillBuffer(Random rng, BytesWritable bw, byte[] tmp, int len) { int n = 0; while (n < len) { byte[] word = dictionary[rng.Next(dictionary.Length)]; int l = Math.Min(word.Length, len - n); System.Array.Copy(word, 0, tmp, n, l); n += l; } bw.Set(tmp, 0, len); }
/// <exception cref="System.IO.IOException"/> public static void TestValue(int val, int vintlen) { DataOutputBuffer buf = new DataOutputBuffer(); DataInputBuffer inbuf = new DataInputBuffer(); WritableUtils.WriteVInt(buf, val); if (Log.IsDebugEnabled()) { Log.Debug("Value = " + val); BytesWritable printer = new BytesWritable(); printer.Set(buf.GetData(), 0, buf.GetLength()); Log.Debug("Buffer = " + printer); } inbuf.Reset(buf.GetData(), 0, buf.GetLength()); Assert.Equal(val, WritableUtils.ReadVInt(inbuf)); Assert.Equal(vintlen, buf.GetLength()); Assert.Equal(vintlen, WritableUtils.GetVIntSize(val)); Assert.Equal(vintlen, WritableUtils.DecodeVIntSize(buf.GetData ()[0])); }
/// <summary>Advance to the next key/value pair.</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public override bool NextKeyValue() { if (!hasMore) { key = null; value = null; return(false); } firstValue = !nextKeyIsSame; DataInputBuffer nextKey = input.GetKey(); currentRawKey.Set(nextKey.GetData(), nextKey.GetPosition(), nextKey.GetLength() - nextKey.GetPosition()); buffer.Reset(currentRawKey.GetBytes(), 0, currentRawKey.GetLength()); key = keyDeserializer.Deserialize(key); DataInputBuffer nextVal = input.GetValue(); buffer.Reset(nextVal.GetData(), nextVal.GetPosition(), nextVal.GetLength() - nextVal .GetPosition()); value = valueDeserializer.Deserialize(value); currentKeyLength = nextKey.GetLength() - nextKey.GetPosition(); currentValueLength = nextVal.GetLength() - nextVal.GetPosition(); if (isMarked) { backupStore.Write(nextKey, nextVal); } hasMore = input.Next(); if (hasMore) { nextKey = input.GetKey(); nextKeyIsSame = comparator.Compare(currentRawKey.GetBytes(), 0, currentRawKey.GetLength (), nextKey.GetData(), nextKey.GetPosition(), nextKey.GetLength() - nextKey.GetPosition ()) == 0; } else { nextKeyIsSame = false; } inputValueCounter.Increment(1); return(true); }
/// <exception cref="System.IO.IOException"/> private void TimeWrite(Path path, TestTFileSeqFileComparison.KVAppendable appendable , int baseKlen, int baseVlen, long fileSize) { int maxKlen = baseKlen * 2; int maxVlen = baseVlen * 2; BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); byte[] keyBuffer = new byte[maxKlen]; byte[] valueBuffer = new byte[maxVlen]; Random rng = new Random(options.seed); long totalBytes = 0; PrintlnWithTimestamp("Start writing: " + path.GetName() + "..."); StartTime(); for (long i = 0; true; ++i) { if (i % 1000 == 0) { // test the size for every 1000 rows. if (fs.GetFileStatus(path).GetLen() >= fileSize) { break; } } int klen = rng.Next(baseKlen) + baseKlen; int vlen = rng.Next(baseVlen) + baseVlen; FillBuffer(rng, key, keyBuffer, klen); FillBuffer(rng, value, valueBuffer, vlen); key.Set(keyBuffer, 0, klen); value.Set(valueBuffer, 0, vlen); appendable.Append(key, value); totalBytes += klen; totalBytes += vlen; } StopTime(); appendable.Close(); ReportStats(path, totalBytes); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestBinary() { Configuration conf = new Configuration(); Job job = Job.GetInstance(conf); Path outdir = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); FileOutputFormat.SetOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable )); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable )); SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); OutputFormat <BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat (); OutputCommitter committer = outputFormat.GetOutputCommitter(context); committer.SetupJob(job); RecordWriter <BytesWritable, BytesWritable> writer = outputFormat.GetRecordWriter( context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); Log.Info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < Records; ++i) { iwritable = new IntWritable(r.Next()); iwritable.Write(outbuf); bkey.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); dwritable = new DoubleWritable(r.NextDouble()); dwritable.Write(outbuf); bval.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); writer.Write(bkey, bval); } } finally { writer.Close(context); } committer.CommitTask(context); committer.CommitJob(job); InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable , DoubleWritable>(); int count = 0; r.SetSeed(seed); SequenceFileInputFormat.SetInputPaths(job, outdir); Log.Info("Reading data by SequenceFileInputFormat"); foreach (InputSplit split in iformat.GetSplits(job)) { RecordReader <IntWritable, DoubleWritable> reader = iformat.CreateRecordReader(split , context); MapContext <IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl <IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil .CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.NextKeyValue()) { sourceInt = r.Next(); sourceDouble = r.NextDouble(); iwritable = reader.GetCurrentKey(); dwritable = reader.GetCurrentValue(); NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":" + sourceInt + "*", sourceInt, iwritable.Get()); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":" + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
// A random task attempt id for testing. /// <exception cref="System.IO.IOException"/> public virtual void TestBinary() { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(new Path(new Path(Runtime.GetProperty("test.build.data", ".") ), FileOutputCommitter.TempDirName), "_" + attempt); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); fs.Delete(dir, true); if (!fs.Mkdirs(dir)) { Fail("Failed to create output directory"); } job.Set(JobContext.TaskAttemptId, attempt); FileOutputFormat.SetOutputPath(job, dir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, dir); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable )); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable )); SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); RecordWriter <BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat ().GetRecordWriter(fs, job, file.ToString(), Reporter.Null); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); Log.Info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < Records; ++i) { iwritable = new IntWritable(r.Next()); iwritable.Write(outbuf); bkey.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); dwritable = new DoubleWritable(r.NextDouble()); dwritable.Write(outbuf); bval.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); writer.Write(bkey, bval); } } finally { writer.Close(Reporter.Null); } InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable , DoubleWritable>(); int count = 0; r.SetSeed(seed); DataInputBuffer buf = new DataInputBuffer(); int NumSplits = 3; SequenceFileInputFormat.AddInputPath(job, file); Log.Info("Reading data by SequenceFileInputFormat"); foreach (InputSplit split in iformat.GetSplits(job, NumSplits)) { RecordReader <IntWritable, DoubleWritable> reader = iformat.GetRecordReader(split, job, Reporter.Null); try { int sourceInt; double sourceDouble; while (reader.Next(iwritable, dwritable)) { sourceInt = r.Next(); sourceDouble = r.NextDouble(); NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":" + sourceInt + "*", sourceInt, iwritable.Get()); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":" + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }