// by being synchronized MultipleOutputTask can be use with a // MultithreadedMapper. /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private RecordWriter GetRecordWriter(TaskAttemptContext taskContext, string baseFileName ) { lock (this) { // look for record-writer in the cache RecordWriter writer = recordWriters[baseFileName]; // If not in cache, create a new one if (writer == null) { // get the record writer from context output format FileOutputFormat.SetOutputName(taskContext, baseFileName); try { writer = ((OutputFormat)ReflectionUtils.NewInstance(taskContext.GetOutputFormatClass (), taskContext.GetConfiguration())).GetRecordWriter(taskContext); } catch (TypeLoadException e) { throw new IOException(e); } // if counters are enabled, wrap the writer with context // to increment counters if (countersEnabled) { writer = new MultipleOutputs.RecordWriterWithCounter(writer, baseFileName, context ); } // add the record-writer to the cache recordWriters[baseFileName] = writer; } return(writer); } }
/// <exception cref="System.Exception"/> private void TestMapFileOutputCommitterInternal(int version) { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output MapFileOutputFormat theOutputFormat = new MapFileOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(tContext); WriteMapFileOutput(theRecordWriter, tContext); // do commit committer.CommitTask(tContext); committer.CommitJob(jContext); // validate output ValidateMapFileOutputContent(FileSystem.Get(job.GetConfiguration()), outDir); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.Exception"/> public virtual void TestCheckOutputSpecsException() { Job job = Job.GetInstance(); Path outDir = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "output"); FileSystem fs = outDir.GetFileSystem(new Configuration()); // Create the output dir so it already exists and set it for the job fs.Mkdirs(outDir); FileOutputFormat.SetOutputPath(job, outDir); // We don't need a "full" implementation of FileOutputFormat for this test FileOutputFormat fof = new _FileOutputFormat_54(); try { try { // This should throw a FileAlreadyExistsException because the outputDir // already exists fof.CheckOutputSpecs(job); Fail("Should have thrown a FileAlreadyExistsException"); } catch (FileAlreadyExistsException) { } } finally { // correct behavior // Cleanup if (fs.Exists(outDir)) { fs.Delete(outDir, true); } } }
/// <exception cref="System.IO.IOException"/> public override void CleanupJob(JobContext context) { System.Console.Error.WriteLine("---- HERE ----"); Path outputPath = FileOutputFormat.GetOutputPath(context); FileSystem fs = outputPath.GetFileSystem(context.GetConfiguration()); fs.Create(new Path(outputPath, CustomCleanupFileName)).Close(); }
/// <exception cref="System.IO.IOException"/> public override Path GetDefaultWorkFile(TaskAttemptContext context, string extension ) { FileOutputCommitter foc = (FileOutputCommitter)this._enclosing._enclosing.GetOutputCommitter (context); return(new Path(new Path(foc.GetWorkPath(), TestFileOutputCommitter.SubDir), FileOutputFormat .GetUniqueFile(context, FileOutputFormat.GetOutputName(context), extension))); }
/// <exception cref="System.IO.IOException"/> public override void AbortJob(JobContext context, JobStatus.State state) { Path outputPath = FileOutputFormat.GetOutputPath(context); FileSystem fs = outputPath.GetFileSystem(context.GetConfiguration()); string fileName = (state.Equals(JobStatus.State.Failed)) ? AbortFailedFileName : AbortKilledFileName; fs.Create(new Path(outputPath, fileName)).Close(); }
/// <exception cref="System.Exception"/> private void TestConcurrentCommitTaskWithSubDir(int version) { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); conf.SetClass("fs.file.impl", typeof(TestFileOutputCommitter.RLFS), typeof(FileSystem )); FileSystem.CloseAll(); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); FileOutputCommitter amCommitter = new FileOutputCommitter(outDir, jContext); amCommitter.SetupJob(jContext); TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2]; taCtx[0] = new TaskAttemptContextImpl(conf, taskID); taCtx[1] = new TaskAttemptContextImpl(conf, taskID1); TextOutputFormat[] tof = new TextOutputFormat[2]; for (int i = 0; i < tof.Length; i++) { tof[i] = new _TextOutputFormat_508(this); } ExecutorService executor = Executors.NewFixedThreadPool(2); try { for (int i_1 = 0; i_1 < taCtx.Length; i_1++) { int taskIdx = i_1; executor.Submit(new _Callable_524(this, tof, taskIdx, taCtx)); } } finally { executor.Shutdown(); while (!executor.AwaitTermination(1, TimeUnit.Seconds)) { Log.Info("Awaiting thread termination!"); } } amCommitter.CommitJob(jContext); RawLocalFileSystem lfs = new RawLocalFileSystem(); lfs.SetConf(conf); NUnit.Framework.Assert.IsFalse("Must not end up with sub_dir/sub_dir", lfs.Exists (new Path(OutSubDir, SubDir))); // validate output ValidateContent(OutSubDir); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.Exception"/> public virtual void TestSetOutputPathException() { Job job = Job.GetInstance(); try { // Give it an invalid filesystem so it'll throw an exception FileOutputFormat.SetOutputPath(job, new Path("foo:///bar")); Fail("Should have thrown a RuntimeException with an IOException inside"); } catch (RuntimeException re) { NUnit.Framework.Assert.IsTrue(re.InnerException is IOException); } }
/// <exception cref="System.IO.IOException"/> public virtual void TestcheckOutputSpecsForbidRecordCompression() { Job job = Job.GetInstance(); FileSystem fs = FileSystem.GetLocal(job.GetConfiguration()); Path outputdir = new Path(Runtime.GetProperty("test.build.data", "/tmp") + "/output" ); fs.Delete(outputdir, true); // Without outputpath, FileOutputFormat.checkoutputspecs will throw // InvalidJobConfException FileOutputFormat.SetOutputPath(job, outputdir); // SequenceFileAsBinaryOutputFormat doesn't support record compression // It should throw an exception when checked by checkOutputSpecs SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); try { new SequenceFileAsBinaryOutputFormat().CheckOutputSpecs(job); } catch (Exception e) { Fail("Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:Caught " + e.GetType().FullName); } SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Record); try { new SequenceFileAsBinaryOutputFormat().CheckOutputSpecs(job); Fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat" ); } catch (InvalidJobConfException) { } catch (Exception e) { // expected Fail("Expected " + typeof(InvalidJobConfException).FullName + "but caught " + e.GetType ().FullName); } }
/// <exception cref="System.IO.IOException"/> public virtual void TestInvalidVersionNumber() { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, 3); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); try { new FileOutputCommitter(outDir, tContext); Fail("should've thrown an exception!"); } catch (IOException) { } }
/// <exception cref="System.Exception"/> public virtual void TestEmptyOutput() { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // Do not write any output // do commit committer.CommitTask(tContext); committer.CommitJob(jContext); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.Exception"/> public virtual void TestCommitter() { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(tContext); WriteOutput(theRecordWriter, tContext); // do commit committer.CommitTask(tContext); committer.CommitJob(jContext); // validate output FilePath expectedFile = new FilePath(new Path(outDir, partFile).ToString()); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append('\t').Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append('\t').Append(val2).Append("\n"); string output = UtilsForTests.Slurp(expectedFile); NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString()); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
//test passed /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void TestAbortInternal(int version) { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(tContext); WriteOutput(theRecordWriter, tContext); // do abort committer.AbortTask(tContext); FilePath expectedFile = new FilePath(new Path(committer.GetWorkPath(), partFile). ToString()); NUnit.Framework.Assert.IsFalse("task temp dir still exists", expectedFile.Exists( )); committer.AbortJob(jContext, JobStatus.State.Failed); expectedFile = new FilePath(new Path(outDir, FileOutputCommitter.PendingDirName). ToString()); NUnit.Framework.Assert.IsFalse("job temp dir still exists", expectedFile.Exists() ); NUnit.Framework.Assert.AreEqual("Output directory not empty", 0, new FilePath(outDir .ToString()).ListFiles().Length); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void TestFailAbortInternal(int version) { Job job = Job.GetInstance(); Configuration conf = job.GetConfiguration(); conf.Set(FileSystem.FsDefaultNameKey, "faildel:///"); conf.SetClass("fs.faildel.impl", typeof(TestFileOutputCommitter.FakeFileSystem), typeof(FileSystem)); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(MRJobConfig.ApplicationAttemptId, 1); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); FileOutputFormat.SetOutputPath(job, outDir); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(tContext ); WriteOutput(theRecordWriter, tContext); // do abort Exception th = null; try { committer.AbortTask(tContext); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); Path jtd = committer.GetJobAttemptPath(jContext); FilePath jobTmpDir = new FilePath(jtd.ToUri().GetPath()); Path ttd = committer.GetTaskAttemptPath(tContext); FilePath taskTmpDir = new FilePath(ttd.ToUri().GetPath()); FilePath expectedFile = new FilePath(taskTmpDir, partFile); NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists ()); th = null; try { committer.AbortJob(jContext, JobStatus.State.Failed); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists()); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.Exception"/> protected internal virtual void _testMOWithJavaSerialization(bool withCounters) { string input = "a\nb\nc\nd\ne\nc\nd\ne"; Configuration conf = CreateJobConf(); conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); Job job = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input); job.SetJobName("mo"); MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(long), typeof(string)); MultipleOutputs.SetCountersEnabled(job, withCounters); job.SetSortComparatorClass(typeof(JavaSerializationComparator)); job.SetMapOutputKeyClass(typeof(long)); job.SetMapOutputValueClass(typeof(string)); job.SetOutputKeyClass(typeof(long)); job.SetOutputValueClass(typeof(string)); job.SetMapperClass(typeof(TestMRMultipleOutputs.MOJavaSerDeMap)); job.SetReducerClass(typeof(TestMRMultipleOutputs.MOJavaSerDeReduce)); job.WaitForCompletion(true); // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OutDir.GetFileSystem(conf); FileStatus[] statuses = fs.ListStatus(OutDir); foreach (FileStatus status in statuses) { string fileName = status.GetPath().GetName(); if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName .Equals("text-r-00000")) { namedOutputCount++; } else { if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000")) { valueBasedOutputCount++; } } } NUnit.Framework.Assert.AreEqual(3, namedOutputCount); NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(job), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith(Text)); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); if (withCounters) { CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); NUnit.Framework.Assert.AreEqual(6, counters.Size()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue()); } }
/// <exception cref="System.Exception"/> protected internal virtual void _testMultipleOutputs(bool withCounters) { string input = "a\nb\nc\nd\ne\nc\nd\ne"; Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input); job.SetJobName("mo"); MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(LongWritable ), typeof(Text)); MultipleOutputs.AddNamedOutput(job, Sequence, typeof(SequenceFileOutputFormat), typeof( IntWritable), typeof(Text)); MultipleOutputs.SetCountersEnabled(job, withCounters); job.SetMapperClass(typeof(TestMRMultipleOutputs.MOMap)); job.SetReducerClass(typeof(TestMRMultipleOutputs.MOReduce)); job.WaitForCompletion(true); // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OutDir.GetFileSystem(conf); FileStatus[] statuses = fs.ListStatus(OutDir); foreach (FileStatus status in statuses) { string fileName = status.GetPath().GetName(); if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName .Equals("text-r-00000") || fileName.Equals("sequence_A-m-00000") || fileName.Equals ("sequence_A-m-00001") || fileName.Equals("sequence_B-m-00000") || fileName.Equals ("sequence_B-m-00001") || fileName.Equals("sequence_B-r-00000") || fileName.Equals ("sequence_C-r-00000")) { namedOutputCount++; } else { if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000")) { valueBasedOutputCount++; } } } NUnit.Framework.Assert.AreEqual(9, namedOutputCount); NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(job), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith(Text)); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); // assert SequenceOutputFormat files correctness SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat .GetOutputPath(job), "sequence_B-r-00000"), conf); NUnit.Framework.Assert.AreEqual(typeof(IntWritable), seqReader.GetKeyClass()); NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass()); count = 0; IntWritable key = new IntWritable(); Text value = new Text(); while (seqReader.Next(key, value)) { NUnit.Framework.Assert.AreEqual(Sequence, value.ToString()); count++; } seqReader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); if (withCounters) { CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); NUnit.Framework.Assert.AreEqual(9, counters.Size()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_A").GetValue ()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Sequence + "_B").GetValue ()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_C").GetValue ()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue()); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestBinary() { Configuration conf = new Configuration(); Job job = Job.GetInstance(conf); Path outdir = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); FileOutputFormat.SetOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable )); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable )); SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); OutputFormat <BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat (); OutputCommitter committer = outputFormat.GetOutputCommitter(context); committer.SetupJob(job); RecordWriter <BytesWritable, BytesWritable> writer = outputFormat.GetRecordWriter( context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); Log.Info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < Records; ++i) { iwritable = new IntWritable(r.Next()); iwritable.Write(outbuf); bkey.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); dwritable = new DoubleWritable(r.NextDouble()); dwritable.Write(outbuf); bval.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); writer.Write(bkey, bval); } } finally { writer.Close(context); } committer.CommitTask(context); committer.CommitJob(job); InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable , DoubleWritable>(); int count = 0; r.SetSeed(seed); SequenceFileInputFormat.SetInputPaths(job, outdir); Log.Info("Reading data by SequenceFileInputFormat"); foreach (InputSplit split in iformat.GetSplits(job)) { RecordReader <IntWritable, DoubleWritable> reader = iformat.CreateRecordReader(split , context); MapContext <IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl <IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil .CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.NextKeyValue()) { sourceInt = r.Next(); sourceDouble = r.NextDouble(); iwritable = reader.GetCurrentKey(); dwritable = reader.GetCurrentValue(); NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":" + sourceInt + "*", sourceInt, iwritable.Get()); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":" + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
/// <exception cref="System.Exception"/> private void TestRecoveryInternal(int commitVersion, int recoveryVersion) { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(MRJobConfig.ApplicationAttemptId, 1); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, commitVersion ); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(tContext); WriteOutput(theRecordWriter, tContext); // do commit committer.CommitTask(tContext); Path jobTempDir1 = committer.GetCommittedTaskPath(tContext); FilePath jtd = new FilePath(jobTempDir1.ToUri().GetPath()); if (commitVersion == 1) { NUnit.Framework.Assert.IsTrue("Version 1 commits to temporary dir " + jtd, jtd.Exists ()); ValidateContent(jtd); } else { NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd, jtd.Exists ()); } //now while running the second app attempt, //recover the task output from first attempt Configuration conf2 = job.GetConfiguration(); conf2.Set(MRJobConfig.TaskAttemptId, attempt); conf2.SetInt(MRJobConfig.ApplicationAttemptId, 2); conf2.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, recoveryVersion ); JobContext jContext2 = new JobContextImpl(conf2, taskID.GetJobID()); TaskAttemptContext tContext2 = new TaskAttemptContextImpl(conf2, taskID); FileOutputCommitter committer2 = new FileOutputCommitter(outDir, tContext2); committer2.SetupJob(tContext2); Path jobTempDir2 = committer2.GetCommittedTaskPath(tContext2); FilePath jtd2 = new FilePath(jobTempDir2.ToUri().GetPath()); committer2.RecoverTask(tContext2); if (recoveryVersion == 1) { NUnit.Framework.Assert.IsTrue("Version 1 recovers to " + jtd2, jtd2.Exists()); ValidateContent(jtd2); } else { NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd2, jtd2.Exists ()); if (commitVersion == 1) { NUnit.Framework.Assert.IsTrue("Version 2 recovery moves to output dir from " + jtd , jtd.List().Length == 0); } } committer2.CommitJob(jContext2); ValidateContent(outDir); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }