/// <exception cref="System.Exception"/> private void TestMapFileOutputCommitterInternal(int version) { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output MapFileOutputFormat theOutputFormat = new MapFileOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(tContext); WriteMapFileOutput(theRecordWriter, tContext); // do commit committer.CommitTask(tContext); committer.CommitJob(jContext); // validate output ValidateMapFileOutputContent(FileSystem.Get(job.GetConfiguration()), outDir); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <summary>Get the default path and filename for the output format.</summary> /// <param name="context">the task context</param> /// <param name="extension">an extension to add to the filename</param> /// <returns>a full path $output/_temporary/$taskid/part-[mr]-$id</returns> /// <exception cref="System.IO.IOException"/> public virtual Path GetDefaultWorkFile(TaskAttemptContext context, string extension ) { FileOutputCommitter committer = (FileOutputCommitter)GetOutputCommitter(context); return(new Path(committer.GetWorkPath(), GetUniqueFile(context, GetOutputName(context ), extension))); }
/// <exception cref="System.IO.IOException"/> public override Path GetDefaultWorkFile(TaskAttemptContext context, string extension ) { FileOutputCommitter foc = (FileOutputCommitter)this._enclosing._enclosing.GetOutputCommitter (context); return(new Path(new Path(foc.GetWorkPath(), TestFileOutputCommitter.SubDir), FileOutputFormat .GetUniqueFile(context, FileOutputFormat.GetOutputName(context), extension))); }
/// <exception cref="System.IO.IOException"/> public override OutputCommitter GetOutputCommitter(TaskAttemptContext context) { lock (this) { if (committer == null) { Path output = GetOutputPath(context); committer = new FileOutputCommitter(output, context); } return(committer); } }
/// <exception cref="System.Exception"/> private void TestConcurrentCommitTaskWithSubDir(int version) { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); conf.SetClass("fs.file.impl", typeof(TestFileOutputCommitter.RLFS), typeof(FileSystem )); FileSystem.CloseAll(); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); FileOutputCommitter amCommitter = new FileOutputCommitter(outDir, jContext); amCommitter.SetupJob(jContext); TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2]; taCtx[0] = new TaskAttemptContextImpl(conf, taskID); taCtx[1] = new TaskAttemptContextImpl(conf, taskID1); TextOutputFormat[] tof = new TextOutputFormat[2]; for (int i = 0; i < tof.Length; i++) { tof[i] = new _TextOutputFormat_508(this); } ExecutorService executor = Executors.NewFixedThreadPool(2); try { for (int i_1 = 0; i_1 < taCtx.Length; i_1++) { int taskIdx = i_1; executor.Submit(new _Callable_524(this, tof, taskIdx, taCtx)); } } finally { executor.Shutdown(); while (!executor.AwaitTermination(1, TimeUnit.Seconds)) { Log.Info("Awaiting thread termination!"); } } amCommitter.CommitJob(jContext); RawLocalFileSystem lfs = new RawLocalFileSystem(); lfs.SetConf(conf); NUnit.Framework.Assert.IsFalse("Must not end up with sub_dir/sub_dir", lfs.Exists (new Path(OutSubDir, SubDir))); // validate output ValidateContent(OutSubDir); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.Exception"/> public virtual void TestEmptyOutput() { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // Do not write any output // do commit committer.CommitTask(tContext); committer.CommitJob(jContext); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.Exception"/> public virtual void TestCommitter() { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(tContext); WriteOutput(theRecordWriter, tContext); // do commit committer.CommitTask(tContext); committer.CommitJob(jContext); // validate output FilePath expectedFile = new FilePath(new Path(outDir, partFile).ToString()); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append('\t').Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append('\t').Append(val2).Append("\n"); string output = UtilsForTests.Slurp(expectedFile); NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString()); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
//test passed /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void TestAbortInternal(int version) { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(tContext); WriteOutput(theRecordWriter, tContext); // do abort committer.AbortTask(tContext); FilePath expectedFile = new FilePath(new Path(committer.GetWorkPath(), partFile). ToString()); NUnit.Framework.Assert.IsFalse("task temp dir still exists", expectedFile.Exists( )); committer.AbortJob(jContext, JobStatus.State.Failed); expectedFile = new FilePath(new Path(outDir, FileOutputCommitter.PendingDirName). ToString()); NUnit.Framework.Assert.IsFalse("job temp dir still exists", expectedFile.Exists() ); NUnit.Framework.Assert.AreEqual("Output directory not empty", 0, new FilePath(outDir .ToString()).ListFiles().Length); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <summary> /// Get the /// <see cref="Org.Apache.Hadoop.FS.Path"/> /// to the task's temporary output directory /// for the map-reduce job /// <b id="SideEffectFiles">Tasks' Side-Effect Files</b> /// <p>Some applications need to create/write-to side-files, which differ from /// the actual job-outputs. /// <p>In such cases there could be issues with 2 instances of the same TIP /// (running simultaneously e.g. speculative tasks) trying to open/write-to the /// same file (path) on HDFS. Hence the application-writer will have to pick /// unique names per task-attempt (e.g. using the attemptid, say /// <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p> /// <p>To get around this the Map-Reduce framework helps the application-writer /// out by maintaining a special /// <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> /// sub-directory for each task-attempt on HDFS where the output of the /// task-attempt goes. On successful completion of the task-attempt the files /// in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only) /// are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the /// framework discards the sub-directory of unsuccessful task-attempts. This /// is completely transparent to the application.</p> /// <p>The application-writer can take advantage of this by creating any /// side-files required in a work directory during execution /// of his task i.e. via /// <see cref="FileOutputFormat{K, V}.GetWorkOutputPath(Org.Apache.Hadoop.Mapreduce.TaskInputOutputContext{KEYIN, VALUEIN, KEYOUT, VALUEOUT}) /// "/> /// , and /// the framework will move them out similarly - thus she doesn't have to pick /// unique paths per task-attempt.</p> /// <p>The entire discussion holds true for maps of jobs with /// reducer=NONE (i.e. 0 reduces) since output of the map, in that case, /// goes directly to HDFS.</p> /// </summary> /// <returns> /// the /// <see cref="Org.Apache.Hadoop.FS.Path"/> /// to the task's temporary output directory /// for the map-reduce job. /// </returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public static Path GetWorkOutputPath <_T0>(TaskInputOutputContext <_T0> context) { FileOutputCommitter committer = (FileOutputCommitter)context.GetOutputCommitter(); return(committer.GetWorkPath()); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void TestFailAbortInternal(int version) { Job job = Job.GetInstance(); Configuration conf = job.GetConfiguration(); conf.Set(FileSystem.FsDefaultNameKey, "faildel:///"); conf.SetClass("fs.faildel.impl", typeof(TestFileOutputCommitter.FakeFileSystem), typeof(FileSystem)); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(MRJobConfig.ApplicationAttemptId, 1); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); FileOutputFormat.SetOutputPath(job, outDir); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(tContext ); WriteOutput(theRecordWriter, tContext); // do abort Exception th = null; try { committer.AbortTask(tContext); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); Path jtd = committer.GetJobAttemptPath(jContext); FilePath jobTmpDir = new FilePath(jtd.ToUri().GetPath()); Path ttd = committer.GetTaskAttemptPath(tContext); FilePath taskTmpDir = new FilePath(ttd.ToUri().GetPath()); FilePath expectedFile = new FilePath(taskTmpDir, partFile); NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists ()); th = null; try { committer.AbortJob(jContext, JobStatus.State.Failed); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists()); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.Exception"/> private void TestRecoveryInternal(int commitVersion, int recoveryVersion) { Job job = Job.GetInstance(); FileOutputFormat.SetOutputPath(job, outDir); Configuration conf = job.GetConfiguration(); conf.Set(MRJobConfig.TaskAttemptId, attempt); conf.SetInt(MRJobConfig.ApplicationAttemptId, 1); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, commitVersion ); JobContext jContext = new JobContextImpl(conf, taskID.GetJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(tContext); WriteOutput(theRecordWriter, tContext); // do commit committer.CommitTask(tContext); Path jobTempDir1 = committer.GetCommittedTaskPath(tContext); FilePath jtd = new FilePath(jobTempDir1.ToUri().GetPath()); if (commitVersion == 1) { NUnit.Framework.Assert.IsTrue("Version 1 commits to temporary dir " + jtd, jtd.Exists ()); ValidateContent(jtd); } else { NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd, jtd.Exists ()); } //now while running the second app attempt, //recover the task output from first attempt Configuration conf2 = job.GetConfiguration(); conf2.Set(MRJobConfig.TaskAttemptId, attempt); conf2.SetInt(MRJobConfig.ApplicationAttemptId, 2); conf2.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, recoveryVersion ); JobContext jContext2 = new JobContextImpl(conf2, taskID.GetJobID()); TaskAttemptContext tContext2 = new TaskAttemptContextImpl(conf2, taskID); FileOutputCommitter committer2 = new FileOutputCommitter(outDir, tContext2); committer2.SetupJob(tContext2); Path jobTempDir2 = committer2.GetCommittedTaskPath(tContext2); FilePath jtd2 = new FilePath(jobTempDir2.ToUri().GetPath()); committer2.RecoverTask(tContext2); if (recoveryVersion == 1) { NUnit.Framework.Assert.IsTrue("Version 1 recovers to " + jtd2, jtd2.Exists()); ValidateContent(jtd2); } else { NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd2, jtd2.Exists ()); if (commitVersion == 1) { NUnit.Framework.Assert.IsTrue("Version 2 recovery moves to output dir from " + jtd , jtd.List().Length == 0); } } committer2.CommitJob(jContext2); ValidateContent(outDir); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }