/// <summary> /// Run a map/reduce word count that does all of the map input and reduce /// output directly rather than sending it back up to Java. /// </summary> /// <param name="mr">The mini mr cluster</param> /// <param name="dfs">the dfs cluster</param> /// <param name="program">the program to run</param> /// <exception cref="System.IO.IOException"/> internal static void RunNonPipedProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program, JobConf conf) { JobConf job; if (conf == null) { job = mr.CreateJobConf(); } else { job = new JobConf(conf); } job.SetInputFormat(typeof(WordCountInputFormat)); FileSystem local = FileSystem.GetLocal(job); Path testDir = new Path("file:" + Runtime.GetProperty("test.build.data"), "pipes" ); Path inDir = new Path(testDir, "input"); nonPipedOutDir = new Path(testDir, "output"); Path wordExec = new Path("testing/bin/application"); Path jobXml = new Path(testDir, "job.xml"); { FileSystem fs = dfs.GetFileSystem(); fs.Delete(wordExec.GetParent(), true); fs.CopyFromLocalFile(program, wordExec); } DataOutputStream @out = local.Create(new Path(inDir, "part0")); @out.WriteBytes("i am a silly test\n"); @out.WriteBytes("you are silly\n"); @out.WriteBytes("i am a cat test\n"); @out.WriteBytes("you is silly\n"); @out.WriteBytes("i am a billy test\n"); @out.WriteBytes("hello are silly\n"); @out.Close(); @out = local.Create(new Path(inDir, "part1")); @out.WriteBytes("mall world things drink java\n"); @out.WriteBytes("hall silly cats drink java\n"); @out.WriteBytes("all dogs bow wow\n"); @out.WriteBytes("hello drink java\n"); @out.Close(); local.Delete(nonPipedOutDir, true); local.Mkdirs(nonPipedOutDir, new FsPermission(FsAction.All, FsAction.All, FsAction .All)); @out = local.Create(jobXml); job.WriteXml(@out); @out.Close(); System.Console.Error.WriteLine("About to run: Submitter -conf " + jobXml + " -input " + inDir + " -output " + nonPipedOutDir + " -program " + dfs.GetFileSystem().MakeQualified (wordExec)); try { int ret = ToolRunner.Run(new Submitter(), new string[] { "-conf", jobXml.ToString (), "-input", inDir.ToString(), "-output", nonPipedOutDir.ToString(), "-program" , dfs.GetFileSystem().MakeQualified(wordExec).ToString(), "-reduces", "2" }); NUnit.Framework.Assert.AreEqual(0, ret); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("got exception: " + StringUtils.StringifyException( e), false); } }
protected internal virtual JobConf GetJobConf() { return(mrCluster.CreateJobConf()); }
/// <exception cref="System.IO.IOException"/> internal static void RunProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program , Path inputPath, Path outputPath, int numMaps, int numReduces, string[] expectedResults , JobConf conf) { Path wordExec = new Path("testing/bin/application"); JobConf job = null; if (conf == null) { job = mr.CreateJobConf(); } else { job = new JobConf(conf); } job.SetNumMapTasks(numMaps); job.SetNumReduceTasks(numReduces); { FileSystem fs = dfs.GetFileSystem(); fs.Delete(wordExec.GetParent(), true); fs.CopyFromLocalFile(program, wordExec); Submitter.SetExecutable(job, fs.MakeQualified(wordExec).ToString()); Submitter.SetIsJavaRecordReader(job, true); Submitter.SetIsJavaRecordWriter(job, true); FileInputFormat.SetInputPaths(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); RunningJob rJob = null; if (numReduces == 0) { rJob = Submitter.JobSubmit(job); while (!rJob.IsComplete()) { try { Sharpen.Thread.Sleep(1000); } catch (Exception ie) { throw new RuntimeException(ie); } } } else { rJob = Submitter.RunJob(job); } NUnit.Framework.Assert.IsTrue("pipes job failed", rJob.IsSuccessful()); Counters counters = rJob.GetCounters(); Counters.Group wordCountCounters = counters.GetGroup("WORDCOUNT"); int numCounters = 0; foreach (Counters.Counter c in wordCountCounters) { System.Console.Out.WriteLine(c); ++numCounters; } NUnit.Framework.Assert.IsTrue("No counters found!", (numCounters > 0)); } IList <string> results = new AList <string>(); foreach (Path p in FileUtil.Stat2Paths(dfs.GetFileSystem().ListStatus(outputPath, new Utils.OutputFileUtils.OutputFilesFilter()))) { results.AddItem(MapReduceTestUtil.ReadOutput(p, job)); } NUnit.Framework.Assert.AreEqual("number of reduces is wrong", expectedResults.Length , results.Count); for (int i = 0; i < results.Count; i++) { NUnit.Framework.Assert.AreEqual("pipes program " + program + " output " + i + " wrong" , expectedResults[i], results[i]); } }
/// <exception cref="System.Exception"/> public virtual void TestLazyOutput() { MiniDFSCluster dfs = null; MiniMRCluster mr = null; FileSystem fileSys = null; try { Configuration conf = new Configuration(); // Start the mini-MR and mini-DFS clusters dfs = new MiniDFSCluster.Builder(conf).NumDataNodes(NumHadoopSlaves).Build(); fileSys = dfs.GetFileSystem(); mr = new MiniMRCluster(NumHadoopSlaves, fileSys.GetUri().ToString(), 1); int numReducers = 2; int numMappers = NumHadoopSlaves * NumMapsPerNode; CreateInput(fileSys, numMappers); Path output1 = new Path("/testlazy/output1"); // Test 1. RunTestLazyOutput(mr.CreateJobConf(), output1, numReducers, true); Path[] fileList = FileUtil.Stat2Paths(fileSys.ListStatus(output1, new Utils.OutputFileUtils.OutputFilesFilter ())); for (int i = 0; i < fileList.Length; ++i) { System.Console.Out.WriteLine("Test1 File list[" + i + "]" + ": " + fileList[i]); } NUnit.Framework.Assert.IsTrue(fileList.Length == (numReducers - 1)); // Test 2. 0 Reducers, maps directly write to the output files Path output2 = new Path("/testlazy/output2"); RunTestLazyOutput(mr.CreateJobConf(), output2, 0, true); fileList = FileUtil.Stat2Paths(fileSys.ListStatus(output2, new Utils.OutputFileUtils.OutputFilesFilter ())); for (int i_1 = 0; i_1 < fileList.Length; ++i_1) { System.Console.Out.WriteLine("Test2 File list[" + i_1 + "]" + ": " + fileList[i_1 ]); } NUnit.Framework.Assert.IsTrue(fileList.Length == numMappers - 1); // Test 3. 0 Reducers, but flag is turned off Path output3 = new Path("/testlazy/output3"); RunTestLazyOutput(mr.CreateJobConf(), output3, 0, false); fileList = FileUtil.Stat2Paths(fileSys.ListStatus(output3, new Utils.OutputFileUtils.OutputFilesFilter ())); for (int i_2 = 0; i_2 < fileList.Length; ++i_2) { System.Console.Out.WriteLine("Test3 File list[" + i_2 + "]" + ": " + fileList[i_2 ]); } NUnit.Framework.Assert.IsTrue(fileList.Length == numMappers); } finally { if (dfs != null) { dfs.Shutdown(); } if (mr != null) { mr.Shutdown(); } } }