public virtual void TestFormatWithCustomSeparator() { JobConf job = new JobConf(); string separator = "\u0001"; job.Set("mapreduce.output.textoutputformat.separator", separator); job.Set(JobContext.TaskAttemptId, attempt); FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, workDir); FileSystem fs = workDir.GetFileSystem(job); if (!fs.Mkdirs(workDir)) { NUnit.Framework.Assert.Fail("Failed to create output directory"); } string file = "test_custom.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object >(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs , job, file, reporter); Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1"); Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2"); Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1"); Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2"); NullWritable nullWritable = NullWritable.Get(); try { theRecordWriter.Write(key1, val1); theRecordWriter.Write(null, nullWritable); theRecordWriter.Write(null, val1); theRecordWriter.Write(nullWritable, val2); theRecordWriter.Write(key2, nullWritable); theRecordWriter.Write(key1, null); theRecordWriter.Write(null, null); theRecordWriter.Write(key2, val2); } finally { theRecordWriter.Close(reporter); } FilePath expectedFile = new FilePath(new Path(workDir, file).ToString()); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append(separator).Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append(separator).Append(val2).Append("\n"); string output = UtilsForTests.Slurp(expectedFile); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output); }
/// <exception cref="System.IO.IOException"/> public virtual void TestcheckOutputSpecsForbidRecordCompression() { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path outputdir = new Path(Runtime.GetProperty("test.build.data", ".") + "/output" ); fs.Delete(dir, true); fs.Delete(outputdir, true); if (!fs.Mkdirs(dir)) { Fail("Failed to create output directory"); } FileOutputFormat.SetWorkOutputPath(job, dir); // Without outputpath, FileOutputFormat.checkoutputspecs will throw // InvalidJobConfException FileOutputFormat.SetOutputPath(job, outputdir); // SequenceFileAsBinaryOutputFormat doesn't support record compression // It should throw an exception when checked by checkOutputSpecs SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); try { new SequenceFileAsBinaryOutputFormat().CheckOutputSpecs(fs, job); } catch (Exception e) { Fail("Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:" + "Caught " + e.GetType().FullName); } SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Record); try { new SequenceFileAsBinaryOutputFormat().CheckOutputSpecs(fs, job); Fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat" ); } catch (InvalidJobConfException) { } catch (Exception e) { // expected Fail("Expected " + typeof(InvalidJobConfException).FullName + "but caught " + e.GetType ().FullName); } }
/// <exception cref="System.Exception"/> public virtual void TestCommitter() { JobConf job = new JobConf(); SetConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.SetWorkOutputPath(job, committer.GetTaskAttemptPath(tContext)); committer.SetupJob(jContext); committer.SetupTask(tContext); string file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; // write output FileSystem localFs = FileSystem.GetLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(localFs, job, file , reporter); WriteOutput(theRecordWriter, reporter); // do commit committer.CommitTask(tContext); committer.CommitJob(jContext); // validate output FilePath expectedFile = new FilePath(new Path(outDir, file).ToString()); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append('\t').Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append('\t').Append(val2).Append("\n"); string output = UtilsForTests.Slurp(expectedFile); NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString()); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.IO.IOException"/> public virtual void TestAbort() { JobConf job = new JobConf(); SetConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.SetWorkOutputPath(job, committer.GetTaskAttemptPath(tContext)); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); string file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; // write output FileSystem localFs = FileSystem.GetLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(localFs, job, file , reporter); WriteOutput(theRecordWriter, reporter); // do abort committer.AbortTask(tContext); FilePath expectedFile = new FilePath(new Path(committer.GetTaskAttemptPath(tContext ), file).ToString()); NUnit.Framework.Assert.IsFalse("task temp dir still exists", expectedFile.Exists( )); committer.AbortJob(jContext, JobStatus.State.Failed); expectedFile = new FilePath(new Path(outDir, FileOutputCommitter.TempDirName).ToString ()); NUnit.Framework.Assert.IsFalse("job temp dir " + expectedFile + " still exists", expectedFile.Exists()); NUnit.Framework.Assert.AreEqual("Output directory not empty", 0, new FilePath(outDir .ToString()).ListFiles().Length); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
// A random task attempt id for testing. /// <exception cref="System.IO.IOException"/> public virtual void TestBinary() { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(new Path(new Path(Runtime.GetProperty("test.build.data", ".") ), FileOutputCommitter.TempDirName), "_" + attempt); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); fs.Delete(dir, true); if (!fs.Mkdirs(dir)) { Fail("Failed to create output directory"); } job.Set(JobContext.TaskAttemptId, attempt); FileOutputFormat.SetOutputPath(job, dir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, dir); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable )); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable )); SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); RecordWriter <BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat ().GetRecordWriter(fs, job, file.ToString(), Reporter.Null); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); Log.Info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < Records; ++i) { iwritable = new IntWritable(r.Next()); iwritable.Write(outbuf); bkey.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); dwritable = new DoubleWritable(r.NextDouble()); dwritable.Write(outbuf); bval.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); writer.Write(bkey, bval); } } finally { writer.Close(Reporter.Null); } InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable , DoubleWritable>(); int count = 0; r.SetSeed(seed); DataInputBuffer buf = new DataInputBuffer(); int NumSplits = 3; SequenceFileInputFormat.AddInputPath(job, file); Log.Info("Reading data by SequenceFileInputFormat"); foreach (InputSplit split in iformat.GetSplits(job, NumSplits)) { RecordReader <IntWritable, DoubleWritable> reader = iformat.GetRecordReader(split, job, Reporter.Null); try { int sourceInt; double sourceDouble; while (reader.Next(iwritable, dwritable)) { sourceInt = r.Next(); sourceDouble = r.NextDouble(); NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":" + sourceInt + "*", sourceInt, iwritable.Get()); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":" + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(); job.Set(JobContext.TaskAttemptId, attempt); FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, workDir); FileSystem fs = workDir.GetFileSystem(job); if (!fs.Mkdirs(workDir)) { Fail("Failed to create output directory"); } //System.out.printf("workdir: %s\n", workDir.toString()); TestMultipleTextOutputFormat.Test1(job); TestMultipleTextOutputFormat.Test2(job); string file_11 = "1-part-00000"; FilePath expectedFile_11 = new FilePath(new Path(workDir, file_11).ToString()); //System.out.printf("expectedFile_11: %s\n", new Path(workDir, file_11).toString()); StringBuilder expectedOutput = new StringBuilder(); for (int i = 10; i < 20; i++) { expectedOutput.Append(string.Empty + i).Append('\t').Append(string.Empty + i).Append ("\n"); } string output = UtilsForTests.Slurp(expectedFile_11); //System.out.printf("File_2 output: %s\n", output); NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString()); string file_12 = "2-part-00000"; FilePath expectedFile_12 = new FilePath(new Path(workDir, file_12).ToString()); //System.out.printf("expectedFile_12: %s\n", new Path(workDir, file_12).toString()); expectedOutput = new StringBuilder(); for (int i_1 = 20; i_1 < 30; i_1++) { expectedOutput.Append(string.Empty + i_1).Append('\t').Append(string.Empty + i_1) .Append("\n"); } output = UtilsForTests.Slurp(expectedFile_12); //System.out.printf("File_2 output: %s\n", output); NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString()); string file_13 = "3-part-00000"; FilePath expectedFile_13 = new FilePath(new Path(workDir, file_13).ToString()); //System.out.printf("expectedFile_13: %s\n", new Path(workDir, file_13).toString()); expectedOutput = new StringBuilder(); for (int i_2 = 30; i_2 < 40; i_2++) { expectedOutput.Append(string.Empty + i_2).Append('\t').Append(string.Empty + i_2) .Append("\n"); } output = UtilsForTests.Slurp(expectedFile_13); //System.out.printf("File_2 output: %s\n", output); NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString()); string file_2 = "2/3"; FilePath expectedFile_2 = new FilePath(new Path(workDir, file_2).ToString()); //System.out.printf("expectedFile_2: %s\n", new Path(workDir, file_2).toString()); expectedOutput = new StringBuilder(); for (int i_3 = 10; i_3 < 40; i_3++) { expectedOutput.Append(string.Empty + i_3).Append('\t').Append(string.Empty + i_3) .Append("\n"); } output = UtilsForTests.Slurp(expectedFile_2); //System.out.printf("File_2 output: %s\n", output); NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString()); }
public virtual void TestCompress() { JobConf job = new JobConf(); job.Set(JobContext.TaskAttemptId, attempt); job.Set(FileOutputFormat.Compress, "true"); FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, workDir); FileSystem fs = workDir.GetFileSystem(job); if (!fs.Mkdirs(workDir)) { NUnit.Framework.Assert.Fail("Failed to create output directory"); } string file = "test_compress.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object >(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs , job, file, reporter); Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1"); Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2"); Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1"); Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2"); NullWritable nullWritable = NullWritable.Get(); try { theRecordWriter.Write(key1, val1); theRecordWriter.Write(null, nullWritable); theRecordWriter.Write(null, val1); theRecordWriter.Write(nullWritable, val2); theRecordWriter.Write(key2, nullWritable); theRecordWriter.Write(key1, null); theRecordWriter.Write(null, null); theRecordWriter.Write(key2, val2); } finally { theRecordWriter.Close(reporter); } StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append("\t").Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append("\t").Append(val2).Append("\n"); DefaultCodec codec = new DefaultCodec(); codec.SetConf(job); Path expectedFile = new Path(workDir, file + codec.GetDefaultExtension()); FileInputStream istream = new FileInputStream(expectedFile.ToString()); CompressionInputStream cistream = codec.CreateInputStream(istream); LineReader reader = new LineReader(cistream); string output = string.Empty; Org.Apache.Hadoop.IO.Text @out = new Org.Apache.Hadoop.IO.Text(); while (reader.ReadLine(@out) > 0) { output += @out; output += "\n"; } reader.Close(); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output); }
/// <exception cref="System.IO.IOException"/> public virtual void TestFailAbort() { JobConf job = new JobConf(); job.Set(FileSystem.FsDefaultNameKey, "faildel:///"); job.SetClass("fs.faildel.impl", typeof(TestMRCJCFileOutputCommitter.FakeFileSystem ), typeof(FileSystem)); SetConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.SetWorkOutputPath(job, committer.GetTaskAttemptPath(tContext)); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); string file = "test.txt"; FilePath jobTmpDir = new FilePath(committer.GetJobAttemptPath(jContext).ToUri().GetPath ()); FilePath taskTmpDir = new FilePath(committer.GetTaskAttemptPath(tContext).ToUri() .GetPath()); FilePath expectedFile = new FilePath(taskTmpDir, file); // A reporter that does nothing Reporter reporter = Reporter.Null; // write output FileSystem localFs = new TestMRCJCFileOutputCommitter.FakeFileSystem(); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(localFs, job, expectedFile .GetAbsolutePath(), reporter); WriteOutput(theRecordWriter, reporter); // do abort Exception th = null; try { committer.AbortTask(tContext); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists ()); th = null; try { committer.AbortJob(jContext, JobStatus.State.Failed); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists()); }