public virtual void Configure(JobConf conf) { mos = new MultipleOutputs(conf); }
/// <exception cref="System.Exception"/> protected internal virtual void _testMOWithJavaSerialization(bool withCounters) { Path inDir = GetDir(InDir); Path outDir = GetDir(OutDir); JobConf conf = CreateJobConf(); FileSystem fs = FileSystem.Get(conf); DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); fs.Delete(inDir, true); fs.Delete(outDir, true); file = fs.Create(new Path(inDir, "part-1")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); conf.SetJobName("mo"); conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetMapOutputKeyClass(typeof(long)); conf.SetMapOutputValueClass(typeof(string)); conf.SetOutputKeyComparatorClass(typeof(JavaSerializationComparator)); conf.SetOutputKeyClass(typeof(long)); conf.SetOutputValueClass(typeof(string)); conf.SetOutputFormat(typeof(TextOutputFormat)); MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(long ), typeof(string)); MultipleOutputs.SetCountersEnabled(conf, withCounters); conf.SetMapperClass(typeof(TestMultipleOutputs.MOJavaSerDeMap)); conf.SetReducerClass(typeof(TestMultipleOutputs.MOJavaSerDeReduce)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); JobClient jc = new JobClient(conf); RunningJob job = jc.SubmitJob(conf); while (!job.IsComplete()) { Sharpen.Thread.Sleep(100); } // assert number of named output part files int namedOutputCount = 0; FileStatus[] statuses = fs.ListStatus(outDir); foreach (FileStatus status in statuses) { if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName ().Equals("text-r-00000")) { namedOutputCount++; } } NUnit.Framework.Assert.AreEqual(2, namedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(conf), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith("text")); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); if (!withCounters) { NUnit.Framework.Assert.AreEqual(0, counters.Size()); } else { NUnit.Framework.Assert.AreEqual(1, counters.Size()); NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("text")); } }
/// <exception cref="System.Exception"/> protected internal virtual void _testMultipleOutputs(bool withCounters) { Path inDir = GetDir(InDir); Path outDir = GetDir(OutDir); JobConf conf = CreateJobConf(); FileSystem fs = FileSystem.Get(conf); DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); file = fs.Create(new Path(inDir, "part-1")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); conf.SetJobName("mo"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapOutputKeyClass(typeof(LongWritable)); conf.SetMapOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(TextOutputFormat)); MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(LongWritable ), typeof(Text)); MultipleOutputs.AddMultiNamedOutput(conf, "sequence", typeof(SequenceFileOutputFormat ), typeof(LongWritable), typeof(Text)); MultipleOutputs.SetCountersEnabled(conf, withCounters); conf.SetMapperClass(typeof(TestMultipleOutputs.MOMap)); conf.SetReducerClass(typeof(TestMultipleOutputs.MOReduce)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); JobClient jc = new JobClient(conf); RunningJob job = jc.SubmitJob(conf); while (!job.IsComplete()) { Sharpen.Thread.Sleep(100); } // assert number of named output part files int namedOutputCount = 0; FileStatus[] statuses = fs.ListStatus(outDir); foreach (FileStatus status in statuses) { if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName ().Equals("text-m-00001") || status.GetPath().GetName().Equals("text-r-00000") || status.GetPath().GetName().Equals("sequence_A-m-00000") || status.GetPath().GetName ().Equals("sequence_A-m-00001") || status.GetPath().GetName().Equals("sequence_B-m-00000" ) || status.GetPath().GetName().Equals("sequence_B-m-00001") || status.GetPath() .GetName().Equals("sequence_B-r-00000") || status.GetPath().GetName().Equals("sequence_C-r-00000" )) { namedOutputCount++; } } NUnit.Framework.Assert.AreEqual(9, namedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(conf), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith("text")); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); // assert SequenceOutputFormat files correctness SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat .GetOutputPath(conf), "sequence_B-r-00000"), conf); NUnit.Framework.Assert.AreEqual(typeof(LongWritable), seqReader.GetKeyClass()); NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass()); count = 0; LongWritable key = new LongWritable(); Text value = new Text(); while (seqReader.Next(key, value)) { NUnit.Framework.Assert.AreEqual("sequence", value.ToString()); count++; } seqReader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); if (!withCounters) { NUnit.Framework.Assert.AreEqual(0, counters.Size()); } else { NUnit.Framework.Assert.AreEqual(4, counters.Size()); NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("text")); NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_A")); NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("sequence_B")); NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_C")); } }