/// <exception cref="System.Exception"/> public static void Launch() { Configuration conf = new Configuration(); FileSystem fs = FileSystem.Get(conf); int numOfInputLines = 10; Path outDir = new Path(testDir, "output_for_field_selection_test"); Path inDir = new Path(testDir, "input_for_field_selection_test"); StringBuilder inputData = new StringBuilder(); StringBuilder expectedOutput = new StringBuilder(); ConstructInputOutputData(inputData, expectedOutput, numOfInputLines); conf.Set(FieldSelectionHelper.DataFieldSeperator, "-"); conf.Set(FieldSelectionHelper.MapOutputKeyValueSpec, "6,5,1-3:0-"); conf.Set(FieldSelectionHelper.ReduceOutputKeyValueSpec, ":4,3,2,1,0,0-"); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, inputData.ToString ()); job.SetMapperClass(typeof(FieldSelectionMapper)); job.SetReducerClass(typeof(FieldSelectionReducer)); job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetNumReduceTasks(1); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job Failed!", job.IsSuccessful()); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // string outdata = MapReduceTestUtil.ReadOutput(outDir, conf); NUnit.Framework.Assert.AreEqual("Outputs doesnt match.", expectedOutput.ToString( ), outdata); fs.Delete(outDir, true); }
/// <exception cref="System.Exception"/> public static void Launch() { JobConf conf = new JobConf(typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection)); FileSystem fs = FileSystem.Get(conf); int numOfInputLines = 10; Path OutputDir = new Path("build/test/output_for_field_selection_test"); Path InputDir = new Path("build/test/input_for_field_selection_test"); string inputFile = "input.txt"; fs.Delete(InputDir, true); fs.Mkdirs(InputDir); fs.Delete(OutputDir, true); StringBuilder inputData = new StringBuilder(); StringBuilder expectedOutput = new StringBuilder(); TestMRFieldSelection.ConstructInputOutputData(inputData, expectedOutput, numOfInputLines ); FSDataOutputStream fileOut = fs.Create(new Path(InputDir, inputFile)); fileOut.Write(Sharpen.Runtime.GetBytesForString(inputData.ToString(), "utf-8")); fileOut.Close(); System.Console.Out.WriteLine("inputData:"); System.Console.Out.WriteLine(inputData.ToString()); JobConf job = new JobConf(conf, typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection )); FileInputFormat.SetInputPaths(job, InputDir); job.SetInputFormat(typeof(TextInputFormat)); job.SetMapperClass(typeof(FieldSelectionMapReduce)); job.SetReducerClass(typeof(FieldSelectionMapReduce)); FileOutputFormat.SetOutputPath(job, OutputDir); job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputFormat(typeof(TextOutputFormat)); job.SetNumReduceTasks(1); job.Set(FieldSelectionHelper.DataFieldSeperator, "-"); job.Set(FieldSelectionHelper.MapOutputKeyValueSpec, "6,5,1-3:0-"); job.Set(FieldSelectionHelper.ReduceOutputKeyValueSpec, ":4,3,2,1,0,0-"); JobClient.RunJob(job); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // bool success = true; Path outPath = new Path(OutputDir, "part-00000"); string outdata = MapReduceTestUtil.ReadOutput(outPath, job); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), outdata); fs.Delete(OutputDir, true); fs.Delete(InputDir, true); }
/// <summary>Tests one of the maps consuming output.</summary> /// <exception cref="System.Exception"/> public virtual void TestChainMapNoOuptut() { Configuration conf = CreateJobConf(); string expectedOutput = string.Empty; Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 0, input); job.SetJobName("chain"); ChainMapper.AddMapper(job, typeof(TestChainErrors.ConsumeMap), typeof(IntWritable ), typeof(Text), typeof(LongWritable), typeof(Text), null); ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil .ReadOutput(outDir, conf)); }
// test chain mapper and reducer by adding single mapper and reducer to chain /// <exception cref="System.Exception"/> public virtual void TestNoChain() { Path inDir = new Path(localPathRoot, "testing/chain/input"); Path outDir = new Path(localPathRoot, "testing/chain/output"); string input = "a\nb\na\n"; string expectedOutput = "a\t2\nb\t1\n"; Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input); job.SetJobName("chain"); ChainMapper.AddMapper(job, typeof(TokenCounterMapper), typeof(object), typeof(Text ), typeof(Text), typeof(IntWritable), null); ChainReducer.SetReducer(job, typeof(IntSumReducer), typeof(Text), typeof(IntWritable ), typeof(Text), typeof(IntWritable), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil .ReadOutput(outDir, conf)); }
/// <exception cref="System.IO.IOException"/> public static TestJobSysDirWithDFS.TestResult LaunchWordCount(JobConf conf, Path inDir, Path outDir, string input, int numMaps, int numReduces, string sysDir) { FileSystem inFs = inDir.GetFileSystem(conf); FileSystem outFs = outDir.GetFileSystem(conf); outFs.Delete(outDir, true); if (!inFs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { DataOutputStream file = inFs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); } conf.SetJobName("wordcount"); conf.SetInputFormat(typeof(TextInputFormat)); // the keys are words (strings) conf.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) conf.SetOutputValueClass(typeof(IntWritable)); conf.SetMapperClass(typeof(WordCount.MapClass)); conf.SetCombinerClass(typeof(WordCount.Reduce)); conf.SetReducerClass(typeof(WordCount.Reduce)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReduces); conf.Set(JTConfig.JtSystemDir, "/tmp/subru/mapred/system"); JobClient jobClient = new JobClient(conf); RunningJob job = JobClient.RunJob(conf); // Checking that the Job Client system dir is not used NUnit.Framework.Assert.IsFalse(FileSystem.Get(conf).Exists(new Path(conf.Get(JTConfig .JtSystemDir)))); // Check if the Job Tracker system dir is propogated to client NUnit.Framework.Assert.IsFalse(sysDir.Contains("/tmp/subru/mapred/system")); NUnit.Framework.Assert.IsTrue(sysDir.Contains("custom")); return(new TestJobSysDirWithDFS.TestResult(job, MapReduceTestUtil.ReadOutput(outDir , conf))); }
/// <exception cref="System.IO.IOException"/> private string LaunchWordCount(JobConf conf, string input, int numMaps, int numReduces ) { Path inDir = new Path("testing/wc/input"); Path outDir = new Path("testing/wc/output"); // Hack for local FS that does not have the concept of a 'mounting point' if (IsLocalFS()) { string localPathRoot = Runtime.GetProperty("test.build.data", "/tmp").ToString(). Replace(' ', '+'); inDir = new Path(localPathRoot, inDir); outDir = new Path(localPathRoot, outDir); } FileSystem fs = FileSystem.Get(conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); } conf.SetJobName("wordcount"); conf.SetInputFormat(typeof(TextInputFormat)); // the keys are words (strings) conf.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) conf.SetOutputValueClass(typeof(IntWritable)); conf.SetMapperClass(typeof(WordCount.MapClass)); conf.SetCombinerClass(typeof(WordCount.Reduce)); conf.SetReducerClass(typeof(WordCount.Reduce)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReduces); JobClient.RunJob(conf); return(MapReduceTestUtil.ReadOutput(outDir, conf)); }
/// <exception cref="System.IO.IOException"/> internal static void RunProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program , Path inputPath, Path outputPath, int numMaps, int numReduces, string[] expectedResults , JobConf conf) { Path wordExec = new Path("testing/bin/application"); JobConf job = null; if (conf == null) { job = mr.CreateJobConf(); } else { job = new JobConf(conf); } job.SetNumMapTasks(numMaps); job.SetNumReduceTasks(numReduces); { FileSystem fs = dfs.GetFileSystem(); fs.Delete(wordExec.GetParent(), true); fs.CopyFromLocalFile(program, wordExec); Submitter.SetExecutable(job, fs.MakeQualified(wordExec).ToString()); Submitter.SetIsJavaRecordReader(job, true); Submitter.SetIsJavaRecordWriter(job, true); FileInputFormat.SetInputPaths(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); RunningJob rJob = null; if (numReduces == 0) { rJob = Submitter.JobSubmit(job); while (!rJob.IsComplete()) { try { Sharpen.Thread.Sleep(1000); } catch (Exception ie) { throw new RuntimeException(ie); } } } else { rJob = Submitter.RunJob(job); } NUnit.Framework.Assert.IsTrue("pipes job failed", rJob.IsSuccessful()); Counters counters = rJob.GetCounters(); Counters.Group wordCountCounters = counters.GetGroup("WORDCOUNT"); int numCounters = 0; foreach (Counters.Counter c in wordCountCounters) { System.Console.Out.WriteLine(c); ++numCounters; } NUnit.Framework.Assert.IsTrue("No counters found!", (numCounters > 0)); } IList <string> results = new AList <string>(); foreach (Path p in FileUtil.Stat2Paths(dfs.GetFileSystem().ListStatus(outputPath, new Utils.OutputFileUtils.OutputFilesFilter()))) { results.AddItem(MapReduceTestUtil.ReadOutput(p, job)); } NUnit.Framework.Assert.AreEqual("number of reduces is wrong", expectedResults.Length , results.Count); for (int i = 0; i < results.Count; i++) { NUnit.Framework.Assert.AreEqual("pipes program " + program + " output " + i + " wrong" , expectedResults[i], results[i]); } }
/// <exception cref="System.Exception"/> public static void Launch() { JobConf conf = new JobConf(typeof(Org.Apache.Hadoop.Mapred.Lib.Aggregate.TestAggregates )); FileSystem fs = FileSystem.Get(conf); int numOfInputLines = 20; Path OutputDir = new Path("build/test/output_for_aggregates_test"); Path InputDir = new Path("build/test/input_for_aggregates_test"); string inputFile = "input.txt"; fs.Delete(InputDir, true); fs.Mkdirs(InputDir); fs.Delete(OutputDir, true); StringBuilder inputData = new StringBuilder(); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append("max\t19\n"); expectedOutput.Append("min\t1\n"); FSDataOutputStream fileOut = fs.Create(new Path(InputDir, inputFile)); for (int i = 1; i < numOfInputLines; i++) { expectedOutput.Append("count_").Append(idFormat.Format(i)); expectedOutput.Append("\t").Append(i).Append("\n"); inputData.Append(idFormat.Format(i)); for (int j = 1; j < i; j++) { inputData.Append(" ").Append(idFormat.Format(i)); } inputData.Append("\n"); } expectedOutput.Append("value_as_string_max\t9\n"); expectedOutput.Append("value_as_string_min\t1\n"); expectedOutput.Append("uniq_count\t15\n"); fileOut.Write(Sharpen.Runtime.GetBytesForString(inputData.ToString(), "utf-8")); fileOut.Close(); System.Console.Out.WriteLine("inputData:"); System.Console.Out.WriteLine(inputData.ToString()); JobConf job = new JobConf(conf, typeof(Org.Apache.Hadoop.Mapred.Lib.Aggregate.TestAggregates )); FileInputFormat.SetInputPaths(job, InputDir); job.SetInputFormat(typeof(TextInputFormat)); FileOutputFormat.SetOutputPath(job, OutputDir); job.SetOutputFormat(typeof(TextOutputFormat)); job.SetMapOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetMapOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetNumReduceTasks(1); job.SetMapperClass(typeof(ValueAggregatorMapper)); job.SetReducerClass(typeof(ValueAggregatorReducer)); job.SetCombinerClass(typeof(ValueAggregatorCombiner)); job.SetInt("aggregator.descriptor.num", 1); job.Set("aggregator.descriptor.0", "UserDefined,org.apache.hadoop.mapred.lib.aggregate.AggregatorTests" ); job.SetLong("aggregate.max.num.unique.values", 14); JobClient.RunJob(job); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // bool success = true; Path outPath = new Path(OutputDir, "part-00000"); string outdata = MapReduceTestUtil.ReadOutput(outPath, job); System.Console.Out.WriteLine("full out data:"); System.Console.Out.WriteLine(outdata.ToString()); outdata = Sharpen.Runtime.Substring(outdata, 0, expectedOutput.ToString().Length); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), outdata); //fs.delete(OUTPUT_DIR); fs.Delete(InputDir, true); }
/// <exception cref="System.Exception"/> public virtual void TestChain() { Path inDir = new Path(localPathRoot, "testing/chain/input"); Path outDir = new Path(localPathRoot, "testing/chain/output"); string input = "1\n2\n"; string expectedOutput = "0\t1ABCRDEF\n2\t2ABCRDEF\n"; Configuration conf = CreateJobConf(); CleanFlags(conf); conf.Set("a", "X"); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input); job.SetJobName("chain"); Configuration mapAConf = new Configuration(false); mapAConf.Set("a", "A"); ChainMapper.AddMapper(job, typeof(TestMapReduceChain.AMap), typeof(LongWritable), typeof(Text), typeof(LongWritable), typeof(Text), mapAConf); ChainMapper.AddMapper(job, typeof(TestMapReduceChain.BMap), typeof(LongWritable), typeof(Text), typeof(LongWritable), typeof(Text), null); ChainMapper.AddMapper(job, typeof(TestMapReduceChain.CMap), typeof(LongWritable), typeof(Text), typeof(LongWritable), typeof(Text), null); Configuration reduceConf = new Configuration(false); reduceConf.Set("a", "C"); ChainReducer.SetReducer(job, typeof(TestMapReduceChain.RReduce), typeof(LongWritable ), typeof(Text), typeof(LongWritable), typeof(Text), reduceConf); ChainReducer.AddMapper(job, typeof(TestMapReduceChain.DMap), typeof(LongWritable) , typeof(Text), typeof(LongWritable), typeof(Text), null); Configuration mapEConf = new Configuration(false); mapEConf.Set("a", "E"); ChainReducer.AddMapper(job, typeof(TestMapReduceChain.EMap), typeof(LongWritable) , typeof(Text), typeof(LongWritable), typeof(Text), mapEConf); ChainReducer.AddMapper(job, typeof(TestMapReduceChain.FMap), typeof(LongWritable) , typeof(Text), typeof(LongWritable), typeof(Text), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); string str = "flag not set"; NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.A")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.B")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.C")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "reduce.setup.R")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.D")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.E")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.F")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.A.value.1")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.A.value.2")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.B.value.1A")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.B.value.2A")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.C.value.1AB")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.C.value.2AB")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "reduce.R.value.1ABC")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "reduce.R.value.2ABC")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.D.value.1ABCR")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.D.value.2ABCR")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.E.value.1ABCRD")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.E.value.2ABCRD")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.F.value.1ABCRDE")); NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.F.value.2ABCRDE")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.A")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.B")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.C")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "reduce.cleanup.R")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.D")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.E")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.F")); NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil .ReadOutput(outDir, conf)); }
/// <exception cref="System.Exception"/> public static void Launch() { Configuration conf = new Configuration(); FileSystem fs = FileSystem.Get(conf); int numOfInputLines = 20; Path OutputDir = new Path("build/test/output_for_aggregates_test"); Path InputDir = new Path("build/test/input_for_aggregates_test"); string inputFile = "input.txt"; fs.Delete(InputDir, true); fs.Mkdirs(InputDir); fs.Delete(OutputDir, true); StringBuilder inputData = new StringBuilder(); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append("max\t19\n"); expectedOutput.Append("min\t1\n"); FSDataOutputStream fileOut = fs.Create(new Path(InputDir, inputFile)); for (int i = 1; i < numOfInputLines; i++) { expectedOutput.Append("count_").Append(idFormat.Format(i)); expectedOutput.Append("\t").Append(i).Append("\n"); inputData.Append(idFormat.Format(i)); for (int j = 1; j < i; j++) { inputData.Append(" ").Append(idFormat.Format(i)); } inputData.Append("\n"); } expectedOutput.Append("value_as_string_max\t9\n"); expectedOutput.Append("value_as_string_min\t1\n"); expectedOutput.Append("uniq_count\t15\n"); fileOut.Write(Sharpen.Runtime.GetBytesForString(inputData.ToString(), "utf-8")); fileOut.Close(); System.Console.Out.WriteLine("inputData:"); System.Console.Out.WriteLine(inputData.ToString()); conf.SetInt(ValueAggregatorJobBase.DescriptorNum, 1); conf.Set(ValueAggregatorJobBase.Descriptor + ".0", "UserDefined,org.apache.hadoop.mapreduce.lib.aggregate.AggregatorTests" ); conf.SetLong(UniqValueCount.MaxNumUniqueValues, 14); Job job = Job.GetInstance(conf); FileInputFormat.SetInputPaths(job, InputDir); job.SetInputFormatClass(typeof(TextInputFormat)); FileOutputFormat.SetOutputPath(job, OutputDir); job.SetOutputFormatClass(typeof(TextOutputFormat)); job.SetMapOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetMapOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetNumReduceTasks(1); job.SetMapperClass(typeof(ValueAggregatorMapper)); job.SetReducerClass(typeof(ValueAggregatorReducer)); job.SetCombinerClass(typeof(ValueAggregatorCombiner)); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue(job.IsSuccessful()); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // string outdata = MapReduceTestUtil.ReadOutput(OutputDir, conf); System.Console.Out.WriteLine("full out data:"); System.Console.Out.WriteLine(outdata.ToString()); outdata = Sharpen.Runtime.Substring(outdata, 0, expectedOutput.ToString().Length); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), outdata); fs.Delete(OutputDir, true); fs.Delete(InputDir, true); }