/// <exception cref="System.Exception"/> public static void Launch() { Configuration conf = new Configuration(); FileSystem fs = FileSystem.Get(conf); int numOfInputLines = 10; Path outDir = new Path(testDir, "output_for_field_selection_test"); Path inDir = new Path(testDir, "input_for_field_selection_test"); StringBuilder inputData = new StringBuilder(); StringBuilder expectedOutput = new StringBuilder(); ConstructInputOutputData(inputData, expectedOutput, numOfInputLines); conf.Set(FieldSelectionHelper.DataFieldSeperator, "-"); conf.Set(FieldSelectionHelper.MapOutputKeyValueSpec, "6,5,1-3:0-"); conf.Set(FieldSelectionHelper.ReduceOutputKeyValueSpec, ":4,3,2,1,0,0-"); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, inputData.ToString ()); job.SetMapperClass(typeof(FieldSelectionMapper)); job.SetReducerClass(typeof(FieldSelectionReducer)); job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetNumReduceTasks(1); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job Failed!", job.IsSuccessful()); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // string outdata = MapReduceTestUtil.ReadOutput(outDir, conf); NUnit.Framework.Assert.AreEqual("Outputs doesnt match.", expectedOutput.ToString( ), outdata); fs.Delete(outDir, true); }
/// <exception cref="System.Exception"/> private void Run(bool ioEx, bool rtEx) { string localPathRoot = Runtime.GetProperty("test.build.data", "/tmp"); Path inDir = new Path(localPathRoot, "testing/mt/input"); Path outDir = new Path(localPathRoot, "testing/mt/output"); Configuration conf = CreateJobConf(); if (ioEx) { conf.SetBoolean("multithreaded.ioException", true); } if (rtEx) { conf.SetBoolean("multithreaded.runtimeException", true); } Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1); job.SetJobName("mt"); job.SetMapperClass(typeof(MultithreadedMapper)); MultithreadedMapper.SetMapperClass(job, typeof(TestMultithreadedMapper.IDMap)); MultithreadedMapper.SetNumberOfThreads(job, 2); job.SetReducerClass(typeof(Reducer)); job.WaitForCompletion(true); if (job.IsSuccessful()) { NUnit.Framework.Assert.IsFalse(ioEx || rtEx); } else { NUnit.Framework.Assert.IsTrue(ioEx || rtEx); } }
/// <exception cref="System.Exception"/> private void TestComparator(string keySpec, int expect) { string root = Runtime.GetProperty("test.build.data", "/tmp"); Path inDir = new Path(root, "test_cmp/in"); Path outDir = new Path(root, "test_cmp/out"); conf.Set("mapreduce.partition.keycomparator.options", keySpec); conf.Set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1"); conf.Set(MRJobConfig.MapOutputKeyFieldSeperator, " "); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, line1 + "\n" + line2 + "\n"); job.SetMapperClass(typeof(InverseMapper)); job.SetReducerClass(typeof(Reducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(LongWritable)); job.SetSortComparatorClass(typeof(KeyFieldBasedComparator)); job.SetPartitionerClass(typeof(KeyFieldBasedPartitioner)); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue(job.IsSuccessful()); // validate output Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter ())); if (outputFiles.Length > 0) { InputStream @is = GetFileSystem().Open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(@is)); string line = reader.ReadLine(); //make sure we get what we expect as the first line, and also //that we have two lines (both the lines must end up in the same //reducer since the partitioner takes the same key spec for all //lines if (expect == 1) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line1)); } else { if (expect == 2) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line2)); } } line = reader.ReadLine(); if (expect == 1) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line2)); } else { if (expect == 2) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line1)); } } reader.Close(); } }
/// <summary>Tests Reducer throwing exception.</summary> /// <exception cref="System.Exception"/> public virtual void TestReducerFail() { Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input); job.SetJobName("chain"); ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); ChainReducer.SetReducer(job, typeof(TestChainErrors.FailReduce), typeof(LongWritable ), typeof(Text), typeof(LongWritable), typeof(Text), null); ChainReducer.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job Not failed", !job.IsSuccessful()); }
/// <summary>Tests one of the maps consuming output.</summary> /// <exception cref="System.Exception"/> public virtual void TestChainMapNoOuptut() { Configuration conf = CreateJobConf(); string expectedOutput = string.Empty; Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 0, input); job.SetJobName("chain"); ChainMapper.AddMapper(job, typeof(TestChainErrors.ConsumeMap), typeof(IntWritable ), typeof(Text), typeof(LongWritable), typeof(Text), null); ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil .ReadOutput(outDir, conf)); }
// test chain mapper and reducer by adding single mapper and reducer to chain /// <exception cref="System.Exception"/> public virtual void TestNoChain() { Path inDir = new Path(localPathRoot, "testing/chain/input"); Path outDir = new Path(localPathRoot, "testing/chain/output"); string input = "a\nb\na\n"; string expectedOutput = "a\t2\nb\t1\n"; Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input); job.SetJobName("chain"); ChainMapper.AddMapper(job, typeof(TokenCounterMapper), typeof(object), typeof(Text ), typeof(Text), typeof(IntWritable), null); ChainReducer.SetReducer(job, typeof(IntSumReducer), typeof(Text), typeof(IntWritable ), typeof(Text), typeof(IntWritable), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil .ReadOutput(outDir, conf)); }
// run a job with 1 map and let it run to completion /// <exception cref="System.Exception"/> private void TestSuccessfulJob(string filename, Type output, string[] exclude) { Path outDir = GetNewOutputDir(); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 0); job.SetOutputFormatClass(output); NUnit.Framework.Assert.IsTrue("Job failed!", job.WaitForCompletion(true)); Path testFile = new Path(outDir, filename); NUnit.Framework.Assert.IsTrue("Done file missing for job " + job.GetJobID(), fs.Exists (testFile)); // check if the files from the missing set exists foreach (string ex in exclude) { Path file = new Path(outDir, ex); NUnit.Framework.Assert.IsFalse("File " + file + " should not be present for successful job " + job.GetJobID(), fs.Exists(file)); } }
/// <summary>Tests errors during submission.</summary> /// <exception cref="System.Exception"/> public virtual void TestChainSubmission() { Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 0, 0, input); job.SetJobName("chain"); Exception th = null; // output key,value classes of first map are not same as that of second map try { ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( IntWritable), typeof(Text), null); ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); } catch (ArgumentException iae) { th = iae; } NUnit.Framework.Assert.IsTrue(th != null); th = null; // output key,value classes of reducer are not // same as that of mapper in the chain try { ChainReducer.SetReducer(job, typeof(Reducer), typeof(LongWritable), typeof(Text), typeof(IntWritable), typeof(Text), null); ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); } catch (ArgumentException iae) { th = iae; } NUnit.Framework.Assert.IsTrue(th != null); }
/// <exception cref="System.Exception"/> protected internal virtual void _testMOWithJavaSerialization(bool withCounters) { string input = "a\nb\nc\nd\ne\nc\nd\ne"; Configuration conf = CreateJobConf(); conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); Job job = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input); job.SetJobName("mo"); MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(long), typeof(string)); MultipleOutputs.SetCountersEnabled(job, withCounters); job.SetSortComparatorClass(typeof(JavaSerializationComparator)); job.SetMapOutputKeyClass(typeof(long)); job.SetMapOutputValueClass(typeof(string)); job.SetOutputKeyClass(typeof(long)); job.SetOutputValueClass(typeof(string)); job.SetMapperClass(typeof(TestMRMultipleOutputs.MOJavaSerDeMap)); job.SetReducerClass(typeof(TestMRMultipleOutputs.MOJavaSerDeReduce)); job.WaitForCompletion(true); // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OutDir.GetFileSystem(conf); FileStatus[] statuses = fs.ListStatus(OutDir); foreach (FileStatus status in statuses) { string fileName = status.GetPath().GetName(); if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName .Equals("text-r-00000")) { namedOutputCount++; } else { if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000")) { valueBasedOutputCount++; } } } NUnit.Framework.Assert.AreEqual(3, namedOutputCount); NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(job), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith(Text)); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); if (withCounters) { CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); NUnit.Framework.Assert.AreEqual(6, counters.Size()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue()); } }
/// <exception cref="System.Exception"/> protected internal virtual void _testMultipleOutputs(bool withCounters) { string input = "a\nb\nc\nd\ne\nc\nd\ne"; Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input); job.SetJobName("mo"); MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(LongWritable ), typeof(Text)); MultipleOutputs.AddNamedOutput(job, Sequence, typeof(SequenceFileOutputFormat), typeof( IntWritable), typeof(Text)); MultipleOutputs.SetCountersEnabled(job, withCounters); job.SetMapperClass(typeof(TestMRMultipleOutputs.MOMap)); job.SetReducerClass(typeof(TestMRMultipleOutputs.MOReduce)); job.WaitForCompletion(true); // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OutDir.GetFileSystem(conf); FileStatus[] statuses = fs.ListStatus(OutDir); foreach (FileStatus status in statuses) { string fileName = status.GetPath().GetName(); if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName .Equals("text-r-00000") || fileName.Equals("sequence_A-m-00000") || fileName.Equals ("sequence_A-m-00001") || fileName.Equals("sequence_B-m-00000") || fileName.Equals ("sequence_B-m-00001") || fileName.Equals("sequence_B-r-00000") || fileName.Equals ("sequence_C-r-00000")) { namedOutputCount++; } else { if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000")) { valueBasedOutputCount++; } } } NUnit.Framework.Assert.AreEqual(9, namedOutputCount); NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(job), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith(Text)); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); // assert SequenceOutputFormat files correctness SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat .GetOutputPath(job), "sequence_B-r-00000"), conf); NUnit.Framework.Assert.AreEqual(typeof(IntWritable), seqReader.GetKeyClass()); NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass()); count = 0; IntWritable key = new IntWritable(); Text value = new Text(); while (seqReader.Next(key, value)) { NUnit.Framework.Assert.AreEqual(Sequence, value.ToString()); count++; } seqReader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); if (withCounters) { CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); NUnit.Framework.Assert.AreEqual(9, counters.Size()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_A").GetValue ()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Sequence + "_B").GetValue ()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_C").GetValue ()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue()); } }
/// <exception cref="System.Exception"/> public virtual void TestChain() { Path inDir = new Path(localPathRoot, "testing/chain/input"); Path outDir = new Path(localPathRoot, "testing/chain/output"); string input = "1\n2\n"; string expectedOutput = "0\t1ABCRDEF\n2\t2ABCRDEF\n"; Configuration conf = CreateJobConf(); CleanFlags(conf); conf.Set("a", "X"); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input); job.SetJobName("chain"); Configuration mapAConf = new Configuration(false); mapAConf.Set("a", "A"); ChainMapper.AddMapper(job, typeof(TestMapReduceChain.AMap), typeof(LongWritable), typeof(Text), typeof(LongWritable), typeof(Text), mapAConf); ChainMapper.AddMapper(job, typeof(TestMapReduceChain.BMap), typeof(LongWritable), typeof(Text), typeof(LongWritable), typeof(Text), null); ChainMapper.AddMapper(job, typeof(TestMapReduceChain.CMap), typeof(LongWritable), typeof(Text), typeof(LongWritable), typeof(Text), null); Configuration reduceConf = new Configuration(false); reduceConf.Set("a", "C"); ChainReducer.SetReducer(job, typeof(TestMapReduceChain.RReduce), typeof(LongWritable ), typeof(Text), typeof(LongWritable), typeof(Text), reduceConf); ChainReducer.AddMapper(job, typeof(TestMapReduceChain.DMap), typeof(LongWritable) , typeof(Text), typeof(LongWritable), typeof(Text), null); Configuration mapEConf = new Configuration(false); mapEConf.Set("a", "E"); ChainReducer.AddMapper(job, typeof(TestMapReduceChain.EMap), typeof(LongWritable) , typeof(Text), typeof(LongWritable), typeof(Text), mapEConf); ChainReducer.AddMapper(job, typeof(TestMapReduceChain.FMap), typeof(LongWritable) , typeof(Text), typeof(LongWritable), typeof(Text), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); string str = "flag not set"; NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.A")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.B")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.C")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "reduce.setup.R")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.D")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.E")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.F")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.A.value.1")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.A.value.2")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.B.value.1A")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.B.value.2A")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.C.value.1AB")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.C.value.2AB")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "reduce.R.value.1ABC")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "reduce.R.value.2ABC")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.D.value.1ABCR")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.D.value.2ABCR")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.E.value.1ABCRD")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.E.value.2ABCRD")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.F.value.1ABCRDE")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.F.value.2ABCRDE")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.A")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.B")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.C")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "reduce.cleanup.R")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.D")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.E")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.F")); NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil .ReadOutput(outDir, conf)); }