private static byte[] Pair(BytesWritable a, BytesWritable b) { byte[] pairData = new byte[a.GetLength() + b.GetLength()]; System.Array.Copy(a.GetBytes(), 0, pairData, 0, a.GetLength()); System.Array.Copy(b.GetBytes(), 0, pairData, a.GetLength(), b.GetLength()); return(pairData); }
/// <summary>Write the given object to the stream.</summary> /// <remarks> /// Write the given object to the stream. If it is a Text or BytesWritable, /// write it directly. Otherwise, write it to a buffer and then write the /// length and data to the stream. /// </remarks> /// <param name="obj">the object to write</param> /// <exception cref="System.IO.IOException"/> private void WriteObject(Writable obj) { // For Text and BytesWritable, encode them directly, so that they end up // in C++ as the natural translations. if (obj is Text) { Text t = (Text)obj; int len = t.GetLength(); WritableUtils.WriteVInt(stream, len); stream.Write(t.GetBytes(), 0, len); } else { if (obj is BytesWritable) { BytesWritable b = (BytesWritable)obj; int len = b.GetLength(); WritableUtils.WriteVInt(stream, len); stream.Write(b.GetBytes(), 0, len); } else { buffer.Reset(); obj.Write(buffer); int length = buffer.GetLength(); WritableUtils.WriteVInt(stream, length); stream.Write(buffer.GetData(), 0, length); } } }
/// <summary>Advance to the next key/value pair.</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public override bool NextKeyValue() { if (!hasMore) { key = null; value = null; return(false); } firstValue = !nextKeyIsSame; DataInputBuffer nextKey = input.GetKey(); currentRawKey.Set(nextKey.GetData(), nextKey.GetPosition(), nextKey.GetLength() - nextKey.GetPosition()); buffer.Reset(currentRawKey.GetBytes(), 0, currentRawKey.GetLength()); key = keyDeserializer.Deserialize(key); DataInputBuffer nextVal = input.GetValue(); buffer.Reset(nextVal.GetData(), nextVal.GetPosition(), nextVal.GetLength() - nextVal .GetPosition()); value = valueDeserializer.Deserialize(value); currentKeyLength = nextKey.GetLength() - nextKey.GetPosition(); currentValueLength = nextVal.GetLength() - nextVal.GetPosition(); if (isMarked) { backupStore.Write(nextKey, nextVal); } hasMore = input.Next(); if (hasMore) { nextKey = input.GetKey(); nextKeyIsSame = comparator.Compare(currentRawKey.GetBytes(), 0, currentRawKey.GetLength (), nextKey.GetData(), nextKey.GetPosition(), nextKey.GetLength() - nextKey.GetPosition ()) == 0; } else { nextKeyIsSame = false; } inputValueCounter.Increment(1); return(true); }
/// <exception cref="System.IO.IOException"/> private static void CreateBigMapInputFile(Configuration conf, FileSystem fs, Path dir, long fileSizeInMB) { // Check if the input path exists and is non-empty if (fs.Exists(dir)) { FileStatus[] list = fs.ListStatus(dir); if (list.Length > 0) { throw new IOException("Input path: " + dir + " already exists... "); } } Path file = new Path(dir, "part-0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(BytesWritable ), typeof(BytesWritable), SequenceFile.CompressionType.None); long numBytesToWrite = fileSizeInMB * 1024 * 1024; int minKeySize = conf.GetInt(MinKey, 10); int keySizeRange = conf.GetInt(MaxKey, 1000) - minKeySize; int minValueSize = conf.GetInt(MinValue, 0); int valueSizeRange = conf.GetInt(MaxValue, 20000) - minValueSize; BytesWritable randomKey = new BytesWritable(); BytesWritable randomValue = new BytesWritable(); Log.Info("Writing " + numBytesToWrite + " bytes to " + file + " with " + "minKeySize: " + minKeySize + " keySizeRange: " + keySizeRange + " minValueSize: " + minValueSize + " valueSizeRange: " + valueSizeRange); long start = Runtime.CurrentTimeMillis(); while (numBytesToWrite > 0) { int keyLength = minKeySize + (keySizeRange != 0 ? random.Next(keySizeRange) : 0); randomKey.SetSize(keyLength); RandomizeBytes(randomKey.GetBytes(), 0, randomKey.GetLength()); int valueLength = minValueSize + (valueSizeRange != 0 ? random.Next(valueSizeRange ) : 0); randomValue.SetSize(valueLength); RandomizeBytes(randomValue.GetBytes(), 0, randomValue.GetLength()); writer.Append(randomKey, randomValue); numBytesToWrite -= keyLength + valueLength; } writer.Close(); long end = Runtime.CurrentTimeMillis(); Log.Info("Created " + file + " of size: " + fileSizeInMB + "MB in " + (end - start ) / 1000 + "secs"); }
/// <exception cref="System.IO.IOException"/> protected internal virtual void WriteObject(Writable obj, DataOutputStream stream ) { // For Text and BytesWritable, encode them directly, so that they end up // in C++ as the natural translations. DataOutputBuffer buffer = new DataOutputBuffer(); if (obj is Text) { Text t = (Text)obj; int len = t.GetLength(); WritableUtils.WriteVLong(stream, len); stream.Flush(); stream.Write(t.GetBytes(), 0, len); stream.Flush(); } else { if (obj is BytesWritable) { BytesWritable b = (BytesWritable)obj; int len = b.GetLength(); WritableUtils.WriteVLong(stream, len); stream.Write(b.GetBytes(), 0, len); } else { buffer.Reset(); obj.Write(buffer); int length = buffer.GetLength(); WritableUtils.WriteVInt(stream, length); stream.Write(buffer.GetData(), 0, length); } } stream.Flush(); }
/// <exception cref="System.IO.IOException"/> private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit split, JobConf job) { IList <string> result = new AList <string>(); RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split, job, voidReporter); LongWritable key = reader.CreateKey(); BytesWritable value = reader.CreateValue(); try { while (reader.Next(key, value)) { result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength ())); } } finally { reader.Close(); } return(result); }
/// <exception cref="System.IO.IOException"/> private void RunRandomTests(CompressionCodec codec) { StringBuilder fileName = new StringBuilder("testFormat.txt"); if (codec != null) { fileName.Append(".gz"); } localFs.Delete(workDir, true); Path file = new Path(workDir, fileName.ToString()); int seed = new Random().Next(); Log.Info("Seed = " + seed); Random random = new Random(seed); int MaxTests = 20; LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < MaxTests; i++) { Log.Info("----------------------------------------------------------"); // Maximum total records of 999 int totalRecords = random.Next(999) + 1; // Test an empty file if (i == 8) { totalRecords = 0; } // Maximum bytes in a record of 100K int recordLength = random.Next(1024 * 100) + 1; // For the 11th test, force a record length of 1 if (i == 10) { recordLength = 1; } // The total bytes in the test file int fileSize = (totalRecords * recordLength); Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength); // Create the job JobConf job = new JobConf(defaultConf); if (codec != null) { ReflectionUtils.SetConf(codec, job); } // Create the test file AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords); NUnit.Framework.Assert.IsTrue(localFs.Exists(file)); //set the fixed length record length config property for the job FixedLengthInputFormat.SetRecordLength(job, recordLength); int numSplits = 1; // Arbitrarily set number of splits. if (i > 0) { if (i == (MaxTests - 1)) { // Test a split size that is less than record len numSplits = (int)(fileSize / Math.Floor(recordLength / 2)); } else { if (MaxTests % i == 0) { // Let us create a split size that is forced to be // smaller than the end file itself, (ensures 1+ splits) numSplits = fileSize / (fileSize - random.Next(fileSize)); } else { // Just pick a random split size with no upper bound numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue)); } } Log.Info("Number of splits set to: " + numSplits); } // Setup the input path FileInputFormat.SetInputPaths(job, workDir); // Try splitting the file in a variety of sizes FixedLengthInputFormat format = new FixedLengthInputFormat(); format.Configure(job); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Info("Actual number of splits = " + splits.Length); // Test combined split lengths = total file size long recordOffset = 0; int recordNumber = 0; foreach (InputSplit split in splits) { RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split, job, voidReporter); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:" , typeof(FixedLengthRecordReader), clazz); // Plow through the records in this split while (reader.Next(key, value)) { NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength ), key.Get()); string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value .GetLength()); NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength ()); NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber < totalRecords); string origRecord = recordList[recordNumber]; NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString ); recordNumber++; } reader.Close(); } NUnit.Framework.Assert.AreEqual("Total original records should be total read records:" , recordList.Count, recordNumber); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestBinary() { Job job = Job.GetInstance(); FileSystem fs = FileSystem.GetLocal(job.GetConfiguration()); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); Text tkey = new Text(); Text tval = new Text(); SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.GetConfiguration(), file, typeof(Text), typeof(Text)); try { for (int i = 0; i < Records; ++i) { tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); writer.Append(tkey, tval); } } finally { writer.Close(); } TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat (); int count = 0; r.SetSeed(seed); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); Text cmpkey = new Text(); Text cmpval = new Text(); DataInputBuffer buf = new DataInputBuffer(); FileInputFormat.SetInputPaths(job, file); foreach (InputSplit split in bformat.GetSplits(job)) { RecordReader <BytesWritable, BytesWritable> reader = bformat.CreateRecordReader(split , context); MapContext <BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl <BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil .CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { while (reader.NextKeyValue()) { bkey = reader.GetCurrentKey(); bval = reader.GetCurrentValue(); tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); buf.Reset(bkey.GetBytes(), bkey.GetLength()); cmpkey.ReadFields(buf); buf.Reset(bval.GetBytes(), bval.GetLength()); cmpval.ReadFields(buf); NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":" + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString())); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":" + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString())); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
/// <exception cref="Sharpen.DigestException"/> private long MD5Hashcode(BytesWritable key) { return(MD5Hashcode(key.GetBytes(), 0, key.GetLength())); }
public virtual void TestSortedLongWritable() { Configuration conf = new Configuration(); Path path = new Path(Root, name); FileSystem fs = path.GetFileSystem(conf); FSDataOutputStream @out = fs.Create(path); try { TFile.Writer writer = new TFile.Writer(@out, BlockSize, "gz", jClassLongWritableComparator , conf); try { LongWritable key = new LongWritable(0); for (long i = 0; i < Nentry; ++i) { key.Set(Cube(i - Nentry / 2)); DataOutputStream dos = writer.PrepareAppendKey(-1); try { key.Write(dos); } finally { dos.Close(); } dos = writer.PrepareAppendValue(-1); try { dos.Write(Runtime.GetBytesForString(BuildValue(i))); } finally { dos.Close(); } } } finally { writer.Close(); } } finally { @out.Close(); } FSDataInputStream @in = fs.Open(path); try { TFile.Reader reader = new TFile.Reader(@in, fs.GetFileStatus(path).GetLen(), conf ); try { TFile.Reader.Scanner scanner = reader.CreateScanner(); long i = 0; BytesWritable value = new BytesWritable(); for (; !scanner.AtEnd(); scanner.Advance()) { scanner.Entry().GetValue(value); Assert.Equal(BuildValue(i), Runtime.GetStringForBytes( value.GetBytes(), 0, value.GetLength())); ++i; } } finally { reader.Close(); } } finally { @in.Close(); } }
/// <exception cref="System.IO.IOException"/> public virtual void TestBinary() { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); Text tkey = new Text(); Text tval = new Text(); SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, file, typeof(Text), typeof(Text)); try { for (int i = 0; i < Records; ++i) { tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); writer.Append(tkey, tval); } } finally { writer.Close(); } InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat (); int count = 0; r.SetSeed(seed); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); Text cmpkey = new Text(); Text cmpval = new Text(); DataInputBuffer buf = new DataInputBuffer(); int NumSplits = 3; FileInputFormat.SetInputPaths(job, file); foreach (InputSplit split in bformat.GetSplits(job, NumSplits)) { RecordReader <BytesWritable, BytesWritable> reader = bformat.GetRecordReader(split , job, Reporter.Null); try { while (reader.Next(bkey, bval)) { tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); buf.Reset(bkey.GetBytes(), bkey.GetLength()); cmpkey.ReadFields(buf); buf.Reset(bval.GetBytes(), bval.GetLength()); cmpval.ReadFields(buf); NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":" + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString())); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":" + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString())); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
/// <exception cref="System.IO.IOException"/> public virtual void WriteUncompressedBytes(DataOutputStream outStream) { outStream.Write(value.GetBytes(), 0, value.GetLength()); }