/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private static IList <Text> ReadSplit(KeyValueTextInputFormat format, InputSplit split , Job job) { IList <Text> result = new AList <Text>(); Configuration conf = job.GetConfiguration(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(conf ); RecordReader <Text, Text> reader = format.CreateRecordReader(split, MapReduceTestUtil .CreateDummyMapTaskAttemptContext(conf)); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(conf, context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil. CreateDummyReporter(), split); reader.Initialize(split, mcontext); while (reader.NextKeyValue()) { result.AddItem(new Text(reader.GetCurrentValue())); } reader.Close(); return(result); }
public virtual void TestFormat() { Job job = Job.GetInstance(new Configuration(defaultConf)); Path file = new Path(workDir, "test.txt"); int seed = new Random().Next(); Log.Info("seed = " + seed); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int MaxLength = 10000; // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) + 1) { Log.Debug("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(localFs.Create(file)); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i * 2)); writer.Write("\t"); writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 20) + 1; Log.Debug("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Debug("splitting: got = " + splits.Count); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Count; j++) { Log.Debug("split[" + j + "]= " + splits[j]); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof( KeyValueLineRecordReader), clazz); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[j]); reader.Initialize(splits[j], mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); clazz = key.GetType(); NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), clazz); value = reader.GetCurrentValue(); clazz = value.GetType(); NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), clazz); int k = System.Convert.ToInt32(key.ToString()); int v = System.Convert.ToInt32(value.ToString()); NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2); NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v); Log.Debug("read " + v); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
public virtual void TestSplitableCodecs() { Job job = Job.GetInstance(defaultConf); Configuration conf = job.GetConfiguration(); // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec" ), conf); } catch (TypeLoadException) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test" + codec.GetDefaultExtension()); int seed = new Random().Next(); Log.Info("seed = " + seed); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int MaxLength = 500000; FileInputFormat.SetMaxInputSplitSize(job, MaxLength / 20); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 4) + 1) { Log.Info("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create (file))); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i * 2)); writer.Write("\t"); writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); NUnit.Framework.Assert.IsTrue("KVTIF claims not splittable", format.IsSplitable(job , file)); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 2000) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Count; j++) { Log.Debug("split[" + j + "]= " + splits[j]); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context); Type clazz = reader.GetType(); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[j]); reader.Initialize(splits[j], mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); value = reader.GetCurrentValue(); int k = System.Convert.ToInt32(key.ToString()); int v = System.Convert.ToInt32(value.ToString()); NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2); NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v); Log.Debug("read " + k + "," + v); NUnit.Framework.Assert.IsFalse(k + "," + v + " in multiple partitions.", bits.Get (v)); bits.Set(v); count++; } if (count > 0) { Log.Info("splits[" + j + "]=" + splits[j] + " count=" + count); } else { Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count); } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }