/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void TestFailAbortInternal(int version) { JobConf conf = new JobConf(); conf.Set(FileSystem.FsDefaultNameKey, "faildel:///"); conf.SetClass("fs.faildel.impl", typeof(TestFileOutputCommitter.FakeFileSystem), typeof(FileSystem)); conf.Set(JobContext.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); conf.SetInt(MRConstants.ApplicationAttemptId, 1); FileOutputFormat.SetOutputPath(conf, outDir); JobContext jContext = new JobContextImpl(conf, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output FilePath jobTmpDir = new FilePath(new Path(outDir, FileOutputCommitter.TempDirName + Path.Separator + conf.GetInt(MRConstants.ApplicationAttemptId, 0) + Path.Separator + FileOutputCommitter.TempDirName).ToString()); FilePath taskTmpDir = new FilePath(jobTmpDir, "_" + taskID); FilePath expectedFile = new FilePath(taskTmpDir, partFile); TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(null , conf, expectedFile.GetAbsolutePath(), null); WriteOutput(theRecordWriter, tContext); // do abort Exception th = null; try { committer.AbortTask(tContext); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists ()); th = null; try { committer.AbortJob(jContext, JobStatus.State.Failed); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists()); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.Exception"/> private void TestCommitterInternal(int version) { JobConf conf = new JobConf(); FileOutputFormat.SetOutputPath(conf, outDir); conf.Set(JobContext.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); JobContext jContext = new JobContextImpl(conf, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(null, conf, partFile , null); WriteOutput(theRecordWriter, tContext); // do commit if (committer.NeedsTaskCommit(tContext)) { committer.CommitTask(tContext); } committer.CommitJob(jContext); // validate output ValidateContent(outDir); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
public virtual void TestFormatWithCustomSeparator() { JobConf job = new JobConf(); string separator = "\u0001"; job.Set("mapreduce.output.textoutputformat.separator", separator); job.Set(JobContext.TaskAttemptId, attempt); FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, workDir); FileSystem fs = workDir.GetFileSystem(job); if (!fs.Mkdirs(workDir)) { NUnit.Framework.Assert.Fail("Failed to create output directory"); } string file = "test_custom.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object >(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs , job, file, reporter); Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1"); Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2"); Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1"); Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2"); NullWritable nullWritable = NullWritable.Get(); try { theRecordWriter.Write(key1, val1); theRecordWriter.Write(null, nullWritable); theRecordWriter.Write(null, val1); theRecordWriter.Write(nullWritable, val2); theRecordWriter.Write(key2, nullWritable); theRecordWriter.Write(key1, null); theRecordWriter.Write(null, null); theRecordWriter.Write(key2, val2); } finally { theRecordWriter.Close(reporter); } FilePath expectedFile = new FilePath(new Path(workDir, file).ToString()); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append(separator).Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append(separator).Append(val2).Append("\n"); string output = UtilsForTests.Slurp(expectedFile); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output); }
/// <summary> /// A test that mimics a failed task to ensure that it does /// not get into the COMMIT_PENDING state, by using a fake /// UmbilicalProtocol's implementation that fails if the commit. /// </summary> /// <remarks> /// A test that mimics a failed task to ensure that it does /// not get into the COMMIT_PENDING state, by using a fake /// UmbilicalProtocol's implementation that fails if the commit. /// protocol is played. /// The test mocks the various steps in a failed task's /// life-cycle using a special OutputCommitter and UmbilicalProtocol /// implementation. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestTaskCleanupDoesNotCommit() { // Mimic a job with a special committer that does not cleanup // files when a task fails. JobConf job = new JobConf(); job.SetOutputCommitter(typeof(TestTaskCommit.CommitterWithoutCleanup)); Path outDir = new Path(rootDir, "output"); FileOutputFormat.SetOutputPath(job, outDir); // Mimic job setup string dummyAttemptID = "attempt_200707121733_0001_m_000000_0"; TaskAttemptID attemptID = ((TaskAttemptID)TaskAttemptID.ForName(dummyAttemptID)); OutputCommitter committer = new TestTaskCommit.CommitterWithoutCleanup(); JobContext jContext = new JobContextImpl(job, ((JobID)attemptID.GetJobID())); committer.SetupJob(jContext); // Mimic a map task dummyAttemptID = "attempt_200707121733_0001_m_000001_0"; attemptID = ((TaskAttemptID)TaskAttemptID.ForName(dummyAttemptID)); Task task = new MapTask(null, attemptID, 0, null, 1); task.SetConf(job); task.LocalizeConfiguration(job); task.Initialize(job, ((JobID)attemptID.GetJobID()), Reporter.Null, false); // Mimic the map task writing some output. string file = "test.txt"; FileSystem localFs = FileSystem.GetLocal(job); TextOutputFormat <Text, Text> theOutputFormat = new TextOutputFormat <Text, Text>(); RecordWriter <Text, Text> theRecordWriter = theOutputFormat.GetRecordWriter(localFs , job, file, Reporter.Null); theRecordWriter.Write(new Text("key"), new Text("value")); theRecordWriter.Close(Reporter.Null); // Mimic a task failure; setting up the task for cleanup simulates // the abort protocol to be played. // Without checks in the framework, this will fail // as the committer will cause a COMMIT to happen for // the cleanup task. task.SetTaskCleanupTask(); TestTaskCommit.MyUmbilical umbilical = new TestTaskCommit.MyUmbilical(this); task.Run(job, umbilical); NUnit.Framework.Assert.IsTrue("Task did not succeed", umbilical.taskDone); }
/// <exception cref="System.Exception"/> public virtual void TestCommitter() { JobConf job = new JobConf(); SetConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.SetWorkOutputPath(job, committer.GetTaskAttemptPath(tContext)); committer.SetupJob(jContext); committer.SetupTask(tContext); string file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; // write output FileSystem localFs = FileSystem.GetLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(localFs, job, file , reporter); WriteOutput(theRecordWriter, reporter); // do commit committer.CommitTask(tContext); committer.CommitJob(jContext); // validate output FilePath expectedFile = new FilePath(new Path(outDir, file).ToString()); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append('\t').Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append('\t').Append(val2).Append("\n"); string output = UtilsForTests.Slurp(expectedFile); NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString()); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.IO.IOException"/> public virtual void TestAbort() { JobConf job = new JobConf(); SetConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.SetWorkOutputPath(job, committer.GetTaskAttemptPath(tContext)); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); string file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; // write output FileSystem localFs = FileSystem.GetLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(localFs, job, file , reporter); WriteOutput(theRecordWriter, reporter); // do abort committer.AbortTask(tContext); FilePath expectedFile = new FilePath(new Path(committer.GetTaskAttemptPath(tContext ), file).ToString()); NUnit.Framework.Assert.IsFalse("task temp dir still exists", expectedFile.Exists( )); committer.AbortJob(jContext, JobStatus.State.Failed); expectedFile = new FilePath(new Path(outDir, FileOutputCommitter.TempDirName).ToString ()); NUnit.Framework.Assert.IsFalse("job temp dir " + expectedFile + " still exists", expectedFile.Exists()); NUnit.Framework.Assert.AreEqual("Output directory not empty", 0, new FilePath(outDir .ToString()).ListFiles().Length); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void TestAbortInternal(int version) { JobConf conf = new JobConf(); FileOutputFormat.SetOutputPath(conf, outDir); conf.Set(JobContext.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); JobContext jContext = new JobContextImpl(conf, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(null, conf, partFile , null); WriteOutput(theRecordWriter, tContext); // do abort committer.AbortTask(tContext); FilePath @out = new FilePath(outDir.ToUri().GetPath()); Path workPath = committer.GetWorkPath(tContext, outDir); FilePath wp = new FilePath(workPath.ToUri().GetPath()); FilePath expectedFile = new FilePath(wp, partFile); NUnit.Framework.Assert.IsFalse("task temp dir still exists", expectedFile.Exists( )); committer.AbortJob(jContext, JobStatus.State.Failed); expectedFile = new FilePath(@out, FileOutputCommitter.TempDirName); NUnit.Framework.Assert.IsFalse("job temp dir still exists", expectedFile.Exists() ); NUnit.Framework.Assert.AreEqual("Output directory not empty", 0, @out.ListFiles() .Length); FileUtil.FullyDelete(@out); }
public virtual void TestCombiner() { if (!new FilePath(TestRootDir).Mkdirs()) { throw new RuntimeException("Could not create test dir: " + TestRootDir); } FilePath @in = new FilePath(TestRootDir, "input"); if ([email protected]()) { throw new RuntimeException("Could not create test dir: " + @in); } FilePath @out = new FilePath(TestRootDir, "output"); PrintWriter pw = new PrintWriter(new FileWriter(new FilePath(@in, "data.txt"))); pw.WriteLine("A|a,1"); pw.WriteLine("A|b,2"); pw.WriteLine("B|a,3"); pw.WriteLine("B|b,4"); pw.WriteLine("B|c,5"); pw.Close(); JobConf job = new JobConf(); job.Set("mapreduce.framework.name", "local"); TextInputFormat.SetInputPaths(job, new Path(@in.GetPath())); TextOutputFormat.SetOutputPath(job, new Path(@out.GetPath())); job.SetMapperClass(typeof(TestOldCombinerGrouping.Map)); job.SetReducerClass(typeof(TestOldCombinerGrouping.Reduce)); job.SetInputFormat(typeof(TextInputFormat)); job.SetMapOutputKeyClass(typeof(Text)); job.SetMapOutputValueClass(typeof(LongWritable)); job.SetOutputFormat(typeof(TextOutputFormat)); job.SetOutputValueGroupingComparator(typeof(TestOldCombinerGrouping.GroupComparator )); job.SetCombinerClass(typeof(TestOldCombinerGrouping.Combiner)); job.SetCombinerKeyGroupingComparator(typeof(TestOldCombinerGrouping.GroupComparator )); job.SetInt("min.num.spills.for.combine", 0); JobClient client = new JobClient(job); RunningJob runningJob = client.SubmitJob(job); runningJob.WaitForCompletion(); if (runningJob.IsSuccessful()) { Counters counters = runningJob.GetCounters(); long combinerInputRecords = counters.GetGroup("org.apache.hadoop.mapreduce.TaskCounter" ).GetCounter("COMBINE_INPUT_RECORDS"); long combinerOutputRecords = counters.GetGroup("org.apache.hadoop.mapreduce.TaskCounter" ).GetCounter("COMBINE_OUTPUT_RECORDS"); NUnit.Framework.Assert.IsTrue(combinerInputRecords > 0); NUnit.Framework.Assert.IsTrue(combinerInputRecords > combinerOutputRecords); BufferedReader br = new BufferedReader(new FileReader(new FilePath(@out, "part-00000" ))); ICollection <string> output = new HashSet <string>(); string line = br.ReadLine(); NUnit.Framework.Assert.IsNotNull(line); output.AddItem(Sharpen.Runtime.Substring(line, 0, 1) + Sharpen.Runtime.Substring( line, 4, 5)); line = br.ReadLine(); NUnit.Framework.Assert.IsNotNull(line); output.AddItem(Sharpen.Runtime.Substring(line, 0, 1) + Sharpen.Runtime.Substring( line, 4, 5)); line = br.ReadLine(); NUnit.Framework.Assert.IsNull(line); br.Close(); ICollection <string> expected = new HashSet <string>(); expected.AddItem("A2"); expected.AddItem("B5"); NUnit.Framework.Assert.AreEqual(expected, output); } else { NUnit.Framework.Assert.Fail("Job failed"); } }
public virtual void TestCompress() { JobConf job = new JobConf(); job.Set(JobContext.TaskAttemptId, attempt); job.Set(FileOutputFormat.Compress, "true"); FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, workDir); FileSystem fs = workDir.GetFileSystem(job); if (!fs.Mkdirs(workDir)) { NUnit.Framework.Assert.Fail("Failed to create output directory"); } string file = "test_compress.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object >(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs , job, file, reporter); Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1"); Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2"); Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1"); Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2"); NullWritable nullWritable = NullWritable.Get(); try { theRecordWriter.Write(key1, val1); theRecordWriter.Write(null, nullWritable); theRecordWriter.Write(null, val1); theRecordWriter.Write(nullWritable, val2); theRecordWriter.Write(key2, nullWritable); theRecordWriter.Write(key1, null); theRecordWriter.Write(null, null); theRecordWriter.Write(key2, val2); } finally { theRecordWriter.Close(reporter); } StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append("\t").Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append("\t").Append(val2).Append("\n"); DefaultCodec codec = new DefaultCodec(); codec.SetConf(job); Path expectedFile = new Path(workDir, file + codec.GetDefaultExtension()); FileInputStream istream = new FileInputStream(expectedFile.ToString()); CompressionInputStream cistream = codec.CreateInputStream(istream); LineReader reader = new LineReader(cistream); string output = string.Empty; Org.Apache.Hadoop.IO.Text @out = new Org.Apache.Hadoop.IO.Text(); while (reader.ReadLine(@out) > 0) { output += @out; output += "\n"; } reader.Close(); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output); }
/// <exception cref="System.IO.IOException"/> public virtual void TestFailAbort() { JobConf job = new JobConf(); job.Set(FileSystem.FsDefaultNameKey, "faildel:///"); job.SetClass("fs.faildel.impl", typeof(TestMRCJCFileOutputCommitter.FakeFileSystem ), typeof(FileSystem)); SetConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.SetWorkOutputPath(job, committer.GetTaskAttemptPath(tContext)); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); string file = "test.txt"; FilePath jobTmpDir = new FilePath(committer.GetJobAttemptPath(jContext).ToUri().GetPath ()); FilePath taskTmpDir = new FilePath(committer.GetTaskAttemptPath(tContext).ToUri() .GetPath()); FilePath expectedFile = new FilePath(taskTmpDir, file); // A reporter that does nothing Reporter reporter = Reporter.Null; // write output FileSystem localFs = new TestMRCJCFileOutputCommitter.FakeFileSystem(); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(localFs, job, expectedFile .GetAbsolutePath(), reporter); WriteOutput(theRecordWriter, reporter); // do abort Exception th = null; try { committer.AbortTask(tContext); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists ()); th = null; try { committer.AbortJob(jContext, JobStatus.State.Failed); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists()); }
/// <exception cref="System.Exception"/> private void TestRecoveryInternal(int commitVersion, int recoveryVersion) { JobConf conf = new JobConf(); FileOutputFormat.SetOutputPath(conf, outDir); conf.Set(JobContext.TaskAttemptId, attempt); conf.SetInt(MRConstants.ApplicationAttemptId, 1); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, commitVersion ); JobContext jContext = new JobContextImpl(conf, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(null, conf, partFile , null); WriteOutput(theRecordWriter, tContext); // do commit if (committer.NeedsTaskCommit(tContext)) { committer.CommitTask(tContext); } Path jobTempDir1 = committer.GetCommittedTaskPath(tContext); FilePath jtd1 = new FilePath(jobTempDir1.ToUri().GetPath()); if (commitVersion == 1) { NUnit.Framework.Assert.IsTrue("Version 1 commits to temporary dir " + jtd1, jtd1. Exists()); ValidateContent(jobTempDir1); } else { NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd1, jtd1.Exists ()); } //now while running the second app attempt, //recover the task output from first attempt JobConf conf2 = new JobConf(conf); conf2.Set(JobContext.TaskAttemptId, attempt); conf2.SetInt(MRConstants.ApplicationAttemptId, 2); conf2.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, recoveryVersion ); JobContext jContext2 = new JobContextImpl(conf2, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext2 = new TaskAttemptContextImpl(conf2, taskID); FileOutputCommitter committer2 = new FileOutputCommitter(); committer2.SetupJob(jContext2); committer2.RecoverTask(tContext2); Path jobTempDir2 = committer2.GetCommittedTaskPath(tContext2); FilePath jtd2 = new FilePath(jobTempDir2.ToUri().GetPath()); if (recoveryVersion == 1) { NUnit.Framework.Assert.IsTrue("Version 1 recovers to " + jtd2, jtd2.Exists()); ValidateContent(jobTempDir2); } else { NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd2, jtd2.Exists ()); if (commitVersion == 1) { NUnit.Framework.Assert.IsTrue("Version 2 recovery moves to output dir from " + jtd1 , jtd1.List().Length == 0); } } committer2.CommitJob(jContext2); ValidateContent(outDir); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }