public virtual void Setup() { user1 = UserGroupInformation.CreateUserForTesting("alice", new string[] { "users" }); user2 = UserGroupInformation.CreateUserForTesting("bob", new string[] { "users" } ); cluster = new MiniMRCluster(0, 0, 1, "file:///", 1); }
/// <exception cref="System.Exception"/> protected override void SetUp() { base.SetUp(); if (Runtime.GetProperty("hadoop.log.dir") == null) { Runtime.SetProperty("hadoop.log.dir", "/tmp"); } int taskTrackers = 2; int dataNodes = 2; string proxyUser = Runtime.GetProperty("user.name"); string proxyGroup = "g"; StringBuilder sb = new StringBuilder(); sb.Append("127.0.0.1,localhost"); foreach (IPAddress i in IPAddress.GetAllByName(Sharpen.Runtime.GetLocalHost().GetHostName ())) { sb.Append(",").Append(i.ToString()); } JobConf conf = new JobConf(); conf.Set("dfs.block.access.token.enable", "false"); conf.Set("dfs.permissions", "true"); conf.Set("hadoop.security.authentication", "simple"); conf.Set("hadoop.proxyuser." + proxyUser + ".hosts", sb.ToString()); conf.Set("hadoop.proxyuser." + proxyUser + ".groups", proxyGroup); string[] userGroups = new string[] { proxyGroup }; UserGroupInformation.CreateUserForTesting(proxyUser, userGroups); UserGroupInformation.CreateUserForTesting("u1", userGroups); UserGroupInformation.CreateUserForTesting("u2", new string[] { "gg" }); dfsCluster = new MiniDFSCluster.Builder(conf).NumDataNodes(dataNodes).Build(); FileSystem fileSystem = dfsCluster.GetFileSystem(); fileSystem.Mkdirs(new Path("/tmp")); fileSystem.Mkdirs(new Path("/user")); fileSystem.Mkdirs(new Path("/hadoop/mapred/system")); fileSystem.SetPermission(new Path("/tmp"), FsPermission.ValueOf("-rwxrwxrwx")); fileSystem.SetPermission(new Path("/user"), FsPermission.ValueOf("-rwxrwxrwx")); fileSystem.SetPermission(new Path("/hadoop/mapred/system"), FsPermission.ValueOf( "-rwx------")); string nnURI = fileSystem.GetUri().ToString(); int numDirs = 1; string[] racks = null; string[] hosts = null; mrCluster = new MiniMRCluster(0, 0, taskTrackers, nnURI, numDirs, racks, hosts, null , conf); ProxyUsers.RefreshSuperUserGroupsConfiguration(conf); }
/// <exception cref="System.IO.IOException"/> public virtual void TestPipes() { if (Runtime.GetProperty("compile.c++") == null) { Log.Info("compile.c++ is not defined, so skipping TestPipes"); return; } MiniDFSCluster dfs = null; MiniMRCluster mr = null; Path inputPath = new Path("testing/in"); Path outputPath = new Path("testing/out"); try { int numSlaves = 2; Configuration conf = new Configuration(); dfs = new MiniDFSCluster.Builder(conf).NumDataNodes(numSlaves).Build(); mr = new MiniMRCluster(numSlaves, dfs.GetFileSystem().GetUri().ToString(), 1); WriteInputFile(dfs.GetFileSystem(), inputPath); RunProgram(mr, dfs, wordCountSimple, inputPath, outputPath, 3, 2, twoSplitOutput, null); Cleanup(dfs.GetFileSystem(), outputPath); RunProgram(mr, dfs, wordCountSimple, inputPath, outputPath, 3, 0, noSortOutput, null ); Cleanup(dfs.GetFileSystem(), outputPath); RunProgram(mr, dfs, wordCountPart, inputPath, outputPath, 3, 2, fixedPartitionOutput , null); RunNonPipedProgram(mr, dfs, wordCountNoPipes, null); mr.WaitUntilIdle(); } finally { mr.Shutdown(); dfs.Shutdown(); } }
/// <summary> /// Run a map/reduce word count that does all of the map input and reduce /// output directly rather than sending it back up to Java. /// </summary> /// <param name="mr">The mini mr cluster</param> /// <param name="dfs">the dfs cluster</param> /// <param name="program">the program to run</param> /// <exception cref="System.IO.IOException"/> internal static void RunNonPipedProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program, JobConf conf) { JobConf job; if (conf == null) { job = mr.CreateJobConf(); } else { job = new JobConf(conf); } job.SetInputFormat(typeof(WordCountInputFormat)); FileSystem local = FileSystem.GetLocal(job); Path testDir = new Path("file:" + Runtime.GetProperty("test.build.data"), "pipes" ); Path inDir = new Path(testDir, "input"); nonPipedOutDir = new Path(testDir, "output"); Path wordExec = new Path("testing/bin/application"); Path jobXml = new Path(testDir, "job.xml"); { FileSystem fs = dfs.GetFileSystem(); fs.Delete(wordExec.GetParent(), true); fs.CopyFromLocalFile(program, wordExec); } DataOutputStream @out = local.Create(new Path(inDir, "part0")); @out.WriteBytes("i am a silly test\n"); @out.WriteBytes("you are silly\n"); @out.WriteBytes("i am a cat test\n"); @out.WriteBytes("you is silly\n"); @out.WriteBytes("i am a billy test\n"); @out.WriteBytes("hello are silly\n"); @out.Close(); @out = local.Create(new Path(inDir, "part1")); @out.WriteBytes("mall world things drink java\n"); @out.WriteBytes("hall silly cats drink java\n"); @out.WriteBytes("all dogs bow wow\n"); @out.WriteBytes("hello drink java\n"); @out.Close(); local.Delete(nonPipedOutDir, true); local.Mkdirs(nonPipedOutDir, new FsPermission(FsAction.All, FsAction.All, FsAction .All)); @out = local.Create(jobXml); job.WriteXml(@out); @out.Close(); System.Console.Error.WriteLine("About to run: Submitter -conf " + jobXml + " -input " + inDir + " -output " + nonPipedOutDir + " -program " + dfs.GetFileSystem().MakeQualified (wordExec)); try { int ret = ToolRunner.Run(new Submitter(), new string[] { "-conf", jobXml.ToString (), "-input", inDir.ToString(), "-output", nonPipedOutDir.ToString(), "-program" , dfs.GetFileSystem().MakeQualified(wordExec).ToString(), "-reduces", "2" }); NUnit.Framework.Assert.AreEqual(0, ret); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("got exception: " + StringUtils.StringifyException( e), false); } }
/// <exception cref="System.IO.IOException"/> internal static void RunProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program , Path inputPath, Path outputPath, int numMaps, int numReduces, string[] expectedResults , JobConf conf) { Path wordExec = new Path("testing/bin/application"); JobConf job = null; if (conf == null) { job = mr.CreateJobConf(); } else { job = new JobConf(conf); } job.SetNumMapTasks(numMaps); job.SetNumReduceTasks(numReduces); { FileSystem fs = dfs.GetFileSystem(); fs.Delete(wordExec.GetParent(), true); fs.CopyFromLocalFile(program, wordExec); Submitter.SetExecutable(job, fs.MakeQualified(wordExec).ToString()); Submitter.SetIsJavaRecordReader(job, true); Submitter.SetIsJavaRecordWriter(job, true); FileInputFormat.SetInputPaths(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); RunningJob rJob = null; if (numReduces == 0) { rJob = Submitter.JobSubmit(job); while (!rJob.IsComplete()) { try { Sharpen.Thread.Sleep(1000); } catch (Exception ie) { throw new RuntimeException(ie); } } } else { rJob = Submitter.RunJob(job); } NUnit.Framework.Assert.IsTrue("pipes job failed", rJob.IsSuccessful()); Counters counters = rJob.GetCounters(); Counters.Group wordCountCounters = counters.GetGroup("WORDCOUNT"); int numCounters = 0; foreach (Counters.Counter c in wordCountCounters) { System.Console.Out.WriteLine(c); ++numCounters; } NUnit.Framework.Assert.IsTrue("No counters found!", (numCounters > 0)); } IList <string> results = new AList <string>(); foreach (Path p in FileUtil.Stat2Paths(dfs.GetFileSystem().ListStatus(outputPath, new Utils.OutputFileUtils.OutputFilesFilter()))) { results.AddItem(MapReduceTestUtil.ReadOutput(p, job)); } NUnit.Framework.Assert.AreEqual("number of reduces is wrong", expectedResults.Length , results.Count); for (int i = 0; i < results.Count; i++) { NUnit.Framework.Assert.AreEqual("pipes program " + program + " output " + i + " wrong" , expectedResults[i], results[i]); } }
/// <exception cref="System.Exception"/> public virtual void TestLazyOutput() { MiniDFSCluster dfs = null; MiniMRCluster mr = null; FileSystem fileSys = null; try { Configuration conf = new Configuration(); // Start the mini-MR and mini-DFS clusters dfs = new MiniDFSCluster.Builder(conf).NumDataNodes(NumHadoopSlaves).Build(); fileSys = dfs.GetFileSystem(); mr = new MiniMRCluster(NumHadoopSlaves, fileSys.GetUri().ToString(), 1); int numReducers = 2; int numMappers = NumHadoopSlaves * NumMapsPerNode; CreateInput(fileSys, numMappers); Path output1 = new Path("/testlazy/output1"); // Test 1. RunTestLazyOutput(mr.CreateJobConf(), output1, numReducers, true); Path[] fileList = FileUtil.Stat2Paths(fileSys.ListStatus(output1, new Utils.OutputFileUtils.OutputFilesFilter ())); for (int i = 0; i < fileList.Length; ++i) { System.Console.Out.WriteLine("Test1 File list[" + i + "]" + ": " + fileList[i]); } NUnit.Framework.Assert.IsTrue(fileList.Length == (numReducers - 1)); // Test 2. 0 Reducers, maps directly write to the output files Path output2 = new Path("/testlazy/output2"); RunTestLazyOutput(mr.CreateJobConf(), output2, 0, true); fileList = FileUtil.Stat2Paths(fileSys.ListStatus(output2, new Utils.OutputFileUtils.OutputFilesFilter ())); for (int i_1 = 0; i_1 < fileList.Length; ++i_1) { System.Console.Out.WriteLine("Test2 File list[" + i_1 + "]" + ": " + fileList[i_1 ]); } NUnit.Framework.Assert.IsTrue(fileList.Length == numMappers - 1); // Test 3. 0 Reducers, but flag is turned off Path output3 = new Path("/testlazy/output3"); RunTestLazyOutput(mr.CreateJobConf(), output3, 0, false); fileList = FileUtil.Stat2Paths(fileSys.ListStatus(output3, new Utils.OutputFileUtils.OutputFilesFilter ())); for (int i_2 = 0; i_2 < fileList.Length; ++i_2) { System.Console.Out.WriteLine("Test3 File list[" + i_2 + "]" + ": " + fileList[i_2 ]); } NUnit.Framework.Assert.IsTrue(fileList.Length == numMappers); } finally { if (dfs != null) { dfs.Shutdown(); } if (mr != null) { mr.Shutdown(); } } }