/// <summary>Creates a new TaggedInputSplit.</summary> /// <param name="inputSplit">The InputSplit to be tagged</param> /// <param name="conf">The configuration to use</param> /// <param name="inputFormatClass">The InputFormat class to use for this job</param> /// <param name="mapperClass">The Mapper class to use for this job</param> public TaggedInputSplit(InputSplit inputSplit, Configuration conf, Type inputFormatClass , Type mapperClass) { // Default constructor. this.inputSplitClass = inputSplit.GetType(); this.inputSplit = inputSplit; this.conf = conf; this.inputFormatClass = inputFormatClass; this.mapperClass = mapperClass; }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public override RecordReader CreateRecordReader(InputSplit split, TaskAttemptContext taskContext) { // child types unknowable if (!(split is CompositeInputSplit)) { throw new IOException("Invalid split type:" + split.GetType().FullName); } CompositeInputSplit spl = (CompositeInputSplit)split; int capacity = kids.Count; CompositeRecordReader ret = null; try { if (!rrCstrMap.Contains(ident)) { throw new IOException("No RecordReader for " + ident); } ret = (CompositeRecordReader)rrCstrMap[ident].NewInstance(id, taskContext.GetConfiguration (), capacity, cmpcl); } catch (MemberAccessException e) { throw new IOException(e); } catch (InstantiationException e) { throw new IOException(e); } catch (TargetInvocationException e) { throw new IOException(e); } for (int i = 0; i < capacity; ++i) { ret.Add(((ComposableRecordReader)kids[i].CreateRecordReader(spl.Get(i), taskContext ))); } return((ComposableRecordReader)ret); }
/// <exception cref="System.IO.IOException"/> public override ComposableRecordReader GetRecordReader(InputSplit split, JobConf job, Reporter reporter) { // child types unknowable if (!(split is CompositeInputSplit)) { throw new IOException("Invalid split type:" + split.GetType().FullName); } CompositeInputSplit spl = (CompositeInputSplit)split; int capacity = kids.Count; CompositeRecordReader ret = null; try { if (!rrCstrMap.Contains(ident)) { throw new IOException("No RecordReader for " + ident); } ret = (CompositeRecordReader)rrCstrMap[ident].NewInstance(id, job, capacity, cmpcl ); } catch (MemberAccessException e) { throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e); } catch (InstantiationException e) { throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e); } catch (TargetInvocationException e) { throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e); } for (int i = 0; i < capacity; ++i) { ret.Add(kids[i].GetRecordReader(spl.Get(i), job, reporter)); } return((ComposableRecordReader)ret); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { Job job = Job.GetInstance(new Configuration(defaultConf)); Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create files with various lengths CreateFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / 20) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check the split BitSet bits = new BitSet(length); Log.Debug("split= " + split); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context ); NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof( CombineFileRecordReader), reader.GetType()); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int count = 0; while (reader.NextKeyValue()) { LongWritable key = reader.GetCurrentKey(); NUnit.Framework.Assert.IsNotNull("Key should not be null.", key); Text value = reader.GetCurrentValue(); int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("split=" + split + " count=" + count); } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(conf); Reporter reporter = Reporter.Null; Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create a file with various lengths CreateFiles(length, numFiles, random); // create a combine split for the files InputFormat <IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat <IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / (SequenceFile.SyncInterval / 20)) + 1; Log.Info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Info("splitting: got = " + splits.Length); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check each split BitSet bits = new BitSet(length); RecordReader <IntWritable, BytesWritable> reader = format.GetRecordReader(split, job , reporter); try { while (reader.Next(key, value)) { NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(key.Get()) ); bits.Set(key.Get()); } } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
// A reporter that does nothing /// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(defaultConf); Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; CreateFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); LongWritable key = new LongWritable(); Text value = new Text(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / 20) + 1; Log.Info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Info("splitting: got = " + splits.Length); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check the split BitSet bits = new BitSet(length); Log.Debug("split= " + split); RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, job, voidReporter ); try { int count = 0; while (reader.Next(key, value)) { int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); if (bits.Get(v)) { Log.Warn("conflict with " + v + " at position " + reader.GetPos()); } NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Info("splits=" + split + " count=" + count); } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }