/// <exception cref="System.Exception"/> private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit split, Job job) { IList <string> result = new AList <string>(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, BytesWritable> reader = format.CreateRecordReader(split , context); MapContext <LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl <LongWritable, BytesWritable, LongWritable, BytesWritable>(job.GetConfiguration (), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter (), split); LongWritable key; BytesWritable value; try { reader.Initialize(split, mcontext); while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); value = reader.GetCurrentValue(); result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength ())); } } finally { reader.Close(); } return(result); }
/// <exception cref="System.Exception"/> public virtual int TestFormat(Configuration conf, int tupleSize, bool firstTuple, bool secondTuple, TestJoinProperties.TestType ttype) { Job job = Job.GetInstance(conf); CompositeInputFormat format = new CompositeInputFormat(); int count = 0; foreach (InputSplit split in (IList <InputSplit>)format.GetSplits(job)) { TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(conf ); RecordReader reader = format.CreateRecordReader(split, context); MapContext mcontext = new MapContextImpl(conf, context.GetTaskAttemptID(), reader , null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); WritableComparable key = null; Writable value = null; while (reader.NextKeyValue()) { key = (WritableComparable)reader.GetCurrentKey(); value = (Writable)reader.GetCurrentValue(); ValidateKeyValue(key, value, tupleSize, firstTuple, secondTuple, ttype); count++; } } return(count); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal virtual void CheckFormat(Job job, int expectedN, int lastN) { NLineInputFormat format = new NLineInputFormat(); IList <InputSplit> splits = format.GetSplits(job); int count = 0; for (int i = 0; i < splits.Count; i++) { NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[i].GetLocations ().Length); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(splits[i], context ); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader ), clazz); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[i]); reader.Initialize(splits[i], mcontext); try { count = 0; while (reader.NextKeyValue()) { count++; } } finally { reader.Close(); } if (i == splits.Count - 1) { NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", lastN , count); } else { NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", expectedN , count); } } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private static IList <Text> ReadSplit(InputFormat <LongWritable, Text> format, InputSplit split, Job job) { IList <Text> result = new AList <Text>(); Configuration conf = job.GetConfiguration(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(conf ); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, MapReduceTestUtil .CreateDummyMapTaskAttemptContext(conf)); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(conf, context.GetTaskAttemptID(), reader , null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); while (reader.NextKeyValue()) { result.AddItem(new Text(reader.GetCurrentValue())); } return(result); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private int CountRecords(int numSplits) { InputFormat <Text, BytesWritable> format = new SequenceFileInputFilter <Text, BytesWritable >(); if (numSplits == 0) { numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1; } FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(inFile).GetLen() / numSplits ); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); // check each split int count = 0; foreach (InputSplit split in format.GetSplits(job)) { RecordReader <Text, BytesWritable> reader = format.CreateRecordReader(split, context ); MapContext <Text, BytesWritable, Text, BytesWritable> mcontext = new MapContextImpl <Text, BytesWritable, Text, BytesWritable>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { while (reader.NextKeyValue()) { Log.Info("Accept record " + reader.GetCurrentKey().ToString()); count++; } } finally { reader.Close(); } } return(count); }
/// <summary>Test with record length set to a negative value</summary> /// <exception cref="System.Exception"/> public virtual void TestNegativeRecordLength() { localFs.Delete(workDir, true); Path file = new Path(workDir, new string("testFormat.txt")); CreateFile(file, null, 10, 10); // Set the fixed length record length config property Job job = Job.GetInstance(defaultConf); FixedLengthInputFormat format = new FixedLengthInputFormat(); FixedLengthInputFormat.SetRecordLength(job.GetConfiguration(), -10); FileInputFormat.SetInputPaths(job, workDir); IList <InputSplit> splits = format.GetSplits(job); bool exceptionThrown = false; foreach (InputSplit split in splits) { try { TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, BytesWritable> reader = format.CreateRecordReader(split , context); MapContext <LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl <LongWritable, BytesWritable, LongWritable, BytesWritable>(job.GetConfiguration (), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter (), split); reader.Initialize(split, mcontext); } catch (IOException ioe) { exceptionThrown = true; Log.Info("Exception message:" + ioe.Message); } } NUnit.Framework.Assert.IsTrue("Exception for negative record length:", exceptionThrown ); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { Job job = Job.GetInstance(new Configuration(defaultConf)); Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create files with various lengths CreateFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / 20) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check the split BitSet bits = new BitSet(length); Log.Debug("split= " + split); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context ); NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof( CombineFileRecordReader), reader.GetType()); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int count = 0; while (reader.NextKeyValue()) { LongWritable key = reader.GetCurrentKey(); NUnit.Framework.Assert.IsNotNull("Key should not be null.", key); Text value = reader.GetCurrentValue(); int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("split=" + split + " count=" + count); } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestBinary() { Configuration conf = new Configuration(); Job job = Job.GetInstance(conf); Path outdir = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); FileOutputFormat.SetOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable )); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable )); SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); OutputFormat <BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat (); OutputCommitter committer = outputFormat.GetOutputCommitter(context); committer.SetupJob(job); RecordWriter <BytesWritable, BytesWritable> writer = outputFormat.GetRecordWriter( context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); Log.Info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < Records; ++i) { iwritable = new IntWritable(r.Next()); iwritable.Write(outbuf); bkey.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); dwritable = new DoubleWritable(r.NextDouble()); dwritable.Write(outbuf); bval.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); writer.Write(bkey, bval); } } finally { writer.Close(context); } committer.CommitTask(context); committer.CommitJob(job); InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable , DoubleWritable>(); int count = 0; r.SetSeed(seed); SequenceFileInputFormat.SetInputPaths(job, outdir); Log.Info("Reading data by SequenceFileInputFormat"); foreach (InputSplit split in iformat.GetSplits(job)) { RecordReader <IntWritable, DoubleWritable> reader = iformat.CreateRecordReader(split , context); MapContext <IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl <IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil .CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.NextKeyValue()) { sourceInt = r.Next(); sourceDouble = r.NextDouble(); iwritable = reader.GetCurrentKey(); dwritable = reader.GetCurrentValue(); NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":" + sourceInt + "*", sourceInt, iwritable.Get()); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":" + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
/// <exception cref="System.Exception"/> private void RunRandomTests(CompressionCodec codec) { StringBuilder fileName = new StringBuilder("testFormat.txt"); if (codec != null) { fileName.Append(".gz"); } localFs.Delete(workDir, true); Path file = new Path(workDir, fileName.ToString()); int seed = new Random().Next(); Log.Info("Seed = " + seed); Random random = new Random(seed); int MaxTests = 20; LongWritable key; BytesWritable value; for (int i = 0; i < MaxTests; i++) { Log.Info("----------------------------------------------------------"); // Maximum total records of 999 int totalRecords = random.Next(999) + 1; // Test an empty file if (i == 8) { totalRecords = 0; } // Maximum bytes in a record of 100K int recordLength = random.Next(1024 * 100) + 1; // For the 11th test, force a record length of 1 if (i == 10) { recordLength = 1; } // The total bytes in the test file int fileSize = (totalRecords * recordLength); Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength); // Create the job Job job = Job.GetInstance(defaultConf); if (codec != null) { ReflectionUtils.SetConf(codec, job.GetConfiguration()); } // Create the test file AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords); NUnit.Framework.Assert.IsTrue(localFs.Exists(file)); //set the fixed length record length config property for the job FixedLengthInputFormat.SetRecordLength(job.GetConfiguration(), recordLength); int numSplits = 1; // Arbitrarily set number of splits. if (i > 0) { if (i == (MaxTests - 1)) { // Test a split size that is less than record len numSplits = (int)(fileSize / Math.Floor(recordLength / 2)); } else { if (MaxTests % i == 0) { // Let us create a split size that is forced to be // smaller than the end file itself, (ensures 1+ splits) numSplits = fileSize / (fileSize - random.Next(fileSize)); } else { // Just pick a random split size with no upper bound numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue)); } } Log.Info("Number of splits set to: " + numSplits); } job.GetConfiguration().SetLong("mapreduce.input.fileinputformat.split.maxsize", ( long)(fileSize / numSplits)); // setup the input path FileInputFormat.SetInputPaths(job, workDir); // Try splitting the file in a variety of sizes FixedLengthInputFormat format = new FixedLengthInputFormat(); IList <InputSplit> splits = format.GetSplits(job); Log.Info("Actual number of splits = " + splits.Count); // Test combined split lengths = total file size long recordOffset = 0; int recordNumber = 0; foreach (InputSplit split in splits) { TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, BytesWritable> reader = format.CreateRecordReader(split , context); MapContext <LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl <LongWritable, BytesWritable, LongWritable, BytesWritable>(job.GetConfiguration (), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter (), split); reader.Initialize(split, mcontext); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:" , typeof(FixedLengthRecordReader), clazz); // Plow through the records in this split while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); value = reader.GetCurrentValue(); NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength ), key.Get()); string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value .GetLength()); NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength ()); NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber < totalRecords); string origRecord = recordList[recordNumber]; NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString ); recordNumber++; } reader.Close(); } NUnit.Framework.Assert.AreEqual("Total original records should be total read records:" , recordList.Count, recordNumber); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestBinary() { Job job = Job.GetInstance(); FileSystem fs = FileSystem.GetLocal(job.GetConfiguration()); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); Text tkey = new Text(); Text tval = new Text(); SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.GetConfiguration(), file, typeof(Text), typeof(Text)); try { for (int i = 0; i < Records; ++i) { tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); writer.Append(tkey, tval); } } finally { writer.Close(); } TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat (); int count = 0; r.SetSeed(seed); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); Text cmpkey = new Text(); Text cmpval = new Text(); DataInputBuffer buf = new DataInputBuffer(); FileInputFormat.SetInputPaths(job, file); foreach (InputSplit split in bformat.GetSplits(job)) { RecordReader <BytesWritable, BytesWritable> reader = bformat.CreateRecordReader(split , context); MapContext <BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl <BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil .CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { while (reader.NextKeyValue()) { bkey = reader.GetCurrentKey(); bval = reader.GetCurrentValue(); tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); buf.Reset(bkey.GetBytes(), bkey.GetLength()); cmpkey.ReadFields(buf); buf.Reset(bval.GetBytes(), bval.GetLength()); cmpval.ReadFields(buf); NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":" + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString())); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":" + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString())); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
public virtual void TestFormat() { Job job = Job.GetInstance(new Configuration(defaultConf)); Path file = new Path(workDir, "test.txt"); int seed = new Random().Next(); Log.Info("seed = " + seed); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int MaxLength = 10000; // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) + 1) { Log.Debug("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(localFs.Create(file)); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i * 2)); writer.Write("\t"); writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 20) + 1; Log.Debug("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Debug("splitting: got = " + splits.Count); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Count; j++) { Log.Debug("split[" + j + "]= " + splits[j]); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof( KeyValueLineRecordReader), clazz); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[j]); reader.Initialize(splits[j], mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); clazz = key.GetType(); NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), clazz); value = reader.GetCurrentValue(); clazz = value.GetType(); NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), clazz); int k = System.Convert.ToInt32(key.ToString()); int v = System.Convert.ToInt32(value.ToString()); NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2); NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v); Log.Debug("read " + v); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
public virtual void TestSplitableCodecs() { Job job = Job.GetInstance(defaultConf); Configuration conf = job.GetConfiguration(); // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec" ), conf); } catch (TypeLoadException) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test" + codec.GetDefaultExtension()); int seed = new Random().Next(); Log.Info("seed = " + seed); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int MaxLength = 500000; FileInputFormat.SetMaxInputSplitSize(job, MaxLength / 20); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 4) + 1) { Log.Info("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create (file))); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i * 2)); writer.Write("\t"); writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); NUnit.Framework.Assert.IsTrue("KVTIF claims not splittable", format.IsSplitable(job , file)); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 2000) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Count; j++) { Log.Debug("split[" + j + "]= " + splits[j]); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context); Type clazz = reader.GetType(); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[j]); reader.Initialize(splits[j], mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); value = reader.GetCurrentValue(); int k = System.Convert.ToInt32(key.ToString()); int v = System.Convert.ToInt32(value.ToString()); NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2); NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v); Log.Debug("read " + k + "," + v); NUnit.Framework.Assert.IsFalse(k + "," + v + " in multiple partitions.", bits.Get (v)); bits.Set(v); count++; } if (count > 0) { Log.Info("splits[" + j + "]=" + splits[j] + " count=" + count); } else { Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count); } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { Job job = Job.GetInstance(conf); FileSystem fs = FileSystem.GetLocal(conf); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); int seed = new Random().Next(); Random random = new Random(seed); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) + 1) { // create a file with length entries SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(IntWritable ), typeof(LongWritable)); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); LongWritable value = new LongWritable(10 * i); writer.Append(key, value); } } finally { writer.Close(); } TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); // try splitting the file in a variety of sizes InputFormat <Text, Text> format = new SequenceFileAsTextInputFormat(); for (int i_1 = 0; i_1 < 3; i_1++) { // check each split BitSet bits = new BitSet(length); int numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1; FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(file).GetLen() / numSplits ); foreach (InputSplit split in format.GetSplits(job)) { RecordReader <Text, Text> reader = format.CreateRecordReader(split, context); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); Type readerClass = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is SequenceFileAsTextRecordReader." , typeof(SequenceFileAsTextRecordReader), readerClass); Text key; try { int count = 0; while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); int keyInt = System.Convert.ToInt32(key.ToString()); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(keyInt)); bits.Set(keyInt); count++; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }