public virtual void TestStatusLimit() { Path test = new Path(testRootTempDir, "testStatusLimit"); Configuration conf = new Configuration(); Path inDir = new Path(test, "in"); Path outDir = new Path(test, "out"); FileSystem fs = FileSystem.Get(conf); if (fs.Exists(inDir)) { fs.Delete(inDir, true); } fs.Mkdirs(inDir); DataOutputStream file = fs.Create(new Path(inDir, "part-" + 0)); file.WriteBytes("testStatusLimit"); file.Close(); if (fs.Exists(outDir)) { fs.Delete(outDir, true); } Job job = Job.GetInstance(conf, "testStatusLimit"); job.SetMapperClass(typeof(TestReporter.StatusLimitMapper)); job.SetNumReduceTasks(0); FileInputFormat.AddInputPath(job, inDir); FileOutputFormat.SetOutputPath(job, outDir); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); }
/// <exception cref="System.IO.IOException"/> internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces , Path sortInput, Path sortOutput) { JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker)); jobConf.SetJobName("sortvalidate-record-checker"); jobConf.SetInputFormat(typeof(SequenceFileInputFormat)); jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat)); jobConf.SetOutputKeyClass(typeof(BytesWritable)); jobConf.SetOutputValueClass(typeof(IntWritable)); jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map)); jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce)); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.GetClusterStatus(); if (noMaps == -1) { noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10); } if (noReduces == -1) { noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9); string sortReduces = jobConf.Get(ReducesPerHost); if (sortReduces != null) { noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces); } } jobConf.SetNumMapTasks(noMaps); jobConf.SetNumReduceTasks(noReduces); FileInputFormat.SetInputPaths(jobConf, sortInput); FileInputFormat.AddInputPath(jobConf, sortOutput); Path outputPath = new Path("/tmp/sortvalidate/recordchecker"); FileSystem fs = FileSystem.Get(defaults); if (fs.Exists(outputPath)) { fs.Delete(outputPath, true); } FileOutputFormat.SetOutputPath(jobConf, outputPath); // Uncomment to run locally in a single process //job_conf.set(JTConfig.JT, "local"); Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf); System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces + " reduces."); DateTime startTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); JobClient.RunJob(jobConf); DateTime end_time = new DateTime(); System.Console.Out.WriteLine("Job ended: " + end_time); System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime ()) / 1000 + " seconds."); }
/// <summary>Creates and runs an MR job</summary> /// <param name="conf"/> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public virtual void CreateAndRunJob(Configuration conf) { JobConf job = new JobConf(conf); job.SetJarByClass(typeof(TestLineRecordReaderJobs)); job.SetMapperClass(typeof(IdentityMapper)); job.SetReducerClass(typeof(IdentityReducer)); FileInputFormat.AddInputPath(job, inputDir); FileOutputFormat.SetOutputPath(job, outputDir); JobClient.RunJob(job); }
public virtual void TestGetRunningJobFromJobClient() { JobConf conf = new JobConf(); conf.Set("mapreduce.framework.name", "local"); FileInputFormat.AddInputPath(conf, CreateTempFile("in", "hello")); Path outputDir = new Path(TestRootDir, GetType().Name); outputDir.GetFileSystem(conf).Delete(outputDir, true); FileOutputFormat.SetOutputPath(conf, outputDir); JobClient jc = new JobClient(conf); RunningJob runningJob = jc.SubmitJob(conf); NUnit.Framework.Assert.IsNotNull("Running job", runningJob); // Check that the running job can be retrieved by ID RunningJob newRunningJob = jc.GetJob(runningJob.GetID()); NUnit.Framework.Assert.IsNotNull("New running job", newRunningJob); }
/// <summary>Create the job configuration.</summary> private JobConf SetupJob(int numMaps, int numReduces, string jarFile) { JobConf jobConf = new JobConf(GetConf()); jobConf.SetJarByClass(typeof(MRBench)); FileInputFormat.AddInputPath(jobConf, InputDir); jobConf.SetInputFormat(typeof(TextInputFormat)); jobConf.SetOutputFormat(typeof(TextOutputFormat)); jobConf.SetOutputValueClass(typeof(UTF8)); jobConf.SetMapOutputKeyClass(typeof(UTF8)); jobConf.SetMapOutputValueClass(typeof(UTF8)); if (null != jarFile) { jobConf.SetJar(jarFile); } jobConf.SetMapperClass(typeof(MRBench.Map)); jobConf.SetReducerClass(typeof(MRBench.Reduce)); jobConf.SetNumMapTasks(numMaps); jobConf.SetNumReduceTasks(numReduces); jobConf.SetBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); return jobConf; }
public virtual void TestNewApis() { Random r = new Random(Runtime.CurrentTimeMillis()); Path tmpBaseDir = new Path("/tmp/wc-" + r.Next()); Path inDir = new Path(tmpBaseDir, "input"); Path outDir = new Path(tmpBaseDir, "output"); string input = "The quick brown fox\nhas many silly\nred fox sox\n"; FileSystem inFs = inDir.GetFileSystem(conf); FileSystem outFs = outDir.GetFileSystem(conf); outFs.Delete(outDir, true); if (!inFs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { DataOutputStream file = inFs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); } Job job = Job.GetInstance(conf, "word count"); job.SetJarByClass(typeof(TestLocalModeWithNewApis)); job.SetMapperClass(typeof(TestLocalModeWithNewApis.TokenizerMapper)); job.SetCombinerClass(typeof(TestLocalModeWithNewApis.IntSumReducer)); job.SetReducerClass(typeof(TestLocalModeWithNewApis.IntSumReducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(IntWritable)); FileInputFormat.AddInputPath(job, inDir); FileOutputFormat.SetOutputPath(job, outDir); NUnit.Framework.Assert.AreEqual(job.WaitForCompletion(true), true); string output = ReadOutput(outDir, conf); NUnit.Framework.Assert.AreEqual("The\t1\nbrown\t1\nfox\t2\nhas\t1\nmany\t1\n" + "quick\t1\nred\t1\nsilly\t1\nsox\t1\n" , output); outFs.Delete(tmpBaseDir, true); }
// generates the input for the benchmark /// <summary>This is the main routine for launching the benchmark.</summary> /// <remarks> /// This is the main routine for launching the benchmark. It generates random /// input data. The input is non-splittable. Sort is used for benchmarking. /// This benchmark reports the effect of having multiple sort and spill /// cycles over a single sort and spill. /// </remarks> /// <exception cref="System.IO.IOException"></exception> /// <exception cref="System.Exception"/> public virtual int Run(string[] args) { Log.Info("Starting the benchmark for threaded spills"); string version = "ThreadedMapBenchmark.0.0.1"; System.Console.Out.WriteLine(version); string usage = "Usage: threadedmapbenchmark " + "[-dataSizePerMap <data size (in mb) per map, default is 128 mb>] " + "[-numSpillsPerMap <number of spills per map, default is 2>] " + "[-numMapsPerHost <number of maps per host, default is 1>]"; int dataSizePerMap = 128; // in mb int numSpillsPerMap = 2; int numMapsPerHost = 1; JobConf masterConf = new JobConf(GetConf()); for (int i = 0; i < args.Length; i++) { // parse command line if (args[i].Equals("-dataSizePerMap")) { dataSizePerMap = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-numSpillsPerMap")) { numSpillsPerMap = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-numMapsPerHost")) { numMapsPerHost = System.Convert.ToInt32(args[++i]); } else { System.Console.Error.WriteLine(usage); System.Environment.Exit(-1); } } } } if (dataSizePerMap < 1 || numSpillsPerMap < 1 || numMapsPerHost < 1) { // verify arguments System.Console.Error.WriteLine(usage); System.Environment.Exit(-1); } FileSystem fs = null; try { // using random-writer to generate the input data GenerateInputData(dataSizePerMap, numSpillsPerMap, numMapsPerHost, masterConf); // configure job for sorting JobConf job = new JobConf(masterConf, typeof(ThreadedMapBenchmark)); job.SetJobName("threaded-map-benchmark-unspilled"); job.SetJarByClass(typeof(ThreadedMapBenchmark)); job.SetInputFormat(typeof(SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat )); job.SetOutputFormat(typeof(SequenceFileOutputFormat)); job.SetOutputKeyClass(typeof(BytesWritable)); job.SetOutputValueClass(typeof(BytesWritable)); job.SetMapperClass(typeof(IdentityMapper)); job.SetReducerClass(typeof(IdentityReducer)); FileInputFormat.AddInputPath(job, InputDir); FileOutputFormat.SetOutputPath(job, OutputDir); JobClient client = new JobClient(job); ClusterStatus cluster = client.GetClusterStatus(); job.SetNumMapTasks(numMapsPerHost * cluster.GetTaskTrackers()); job.SetNumReduceTasks(1); // set mapreduce.task.io.sort.mb to avoid spill int ioSortMb = (int)Math.Ceil(Factor * dataSizePerMap); job.Set(JobContext.IoSortMb, ioSortMb.ToString()); fs = FileSystem.Get(job); Log.Info("Running sort with 1 spill per map"); long startTime = Runtime.CurrentTimeMillis(); JobClient.RunJob(job); long endTime = Runtime.CurrentTimeMillis(); Log.Info("Total time taken : " + (endTime - startTime).ToString() + " millisec"); fs.Delete(OutputDir, true); // set mapreduce.task.io.sort.mb to have multiple spills JobConf spilledJob = new JobConf(job, typeof(ThreadedMapBenchmark)); ioSortMb = (int)Math.Ceil(Factor * Math.Ceil((double)dataSizePerMap / numSpillsPerMap )); spilledJob.Set(JobContext.IoSortMb, ioSortMb.ToString()); spilledJob.SetJobName("threaded-map-benchmark-spilled"); spilledJob.SetJarByClass(typeof(ThreadedMapBenchmark)); Log.Info("Running sort with " + numSpillsPerMap + " spills per map"); startTime = Runtime.CurrentTimeMillis(); JobClient.RunJob(spilledJob); endTime = Runtime.CurrentTimeMillis(); Log.Info("Total time taken : " + (endTime - startTime).ToString() + " millisec"); } finally { if (fs != null) { fs.Delete(BaseDir, true); } } return(0); }
/// <exception cref="System.IO.IOException"/> internal static void CheckRecords(Configuration defaults, Path sortInput, Path sortOutput ) { FileSystem inputfs = sortInput.GetFileSystem(defaults); FileSystem outputfs = sortOutput.GetFileSystem(defaults); FileSystem defaultfs = FileSystem.Get(defaults); JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordStatsChecker)); jobConf.SetJobName("sortvalidate-recordstats-checker"); int noSortReduceTasks = outputfs.ListStatus(sortOutput, sortPathsFilter).Length; jobConf.SetInt(SortReduces, noSortReduceTasks); int noSortInputpaths = inputfs.ListStatus(sortInput).Length; jobConf.SetInputFormat(typeof(SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat )); jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat)); jobConf.SetOutputKeyClass(typeof(IntWritable)); jobConf.SetOutputValueClass(typeof(SortValidator.RecordStatsChecker.RecordStatsWritable )); jobConf.SetMapperClass(typeof(SortValidator.RecordStatsChecker.Map)); jobConf.SetCombinerClass(typeof(SortValidator.RecordStatsChecker.Reduce)); jobConf.SetReducerClass(typeof(SortValidator.RecordStatsChecker.Reduce)); jobConf.SetNumMapTasks(noSortReduceTasks); jobConf.SetNumReduceTasks(1); FileInputFormat.SetInputPaths(jobConf, sortInput); FileInputFormat.AddInputPath(jobConf, sortOutput); Path outputPath = new Path(new Path("/tmp", "sortvalidate"), UUID.RandomUUID().ToString ()); if (defaultfs.Exists(outputPath)) { defaultfs.Delete(outputPath, true); } FileOutputFormat.SetOutputPath(jobConf, outputPath); // Uncomment to run locally in a single process //job_conf.set(JTConfig.JT, "local"); Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf); System.Console.Out.WriteLine("\nSortValidator.RecordStatsChecker: Validate sort " + "from " + inputPaths[0] + " (" + noSortInputpaths + " files), " + inputPaths[ 1] + " (" + noSortReduceTasks + " files) into " + FileOutputFormat.GetOutputPath (jobConf) + " with 1 reducer."); DateTime startTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); JobClient.RunJob(jobConf); try { DateTime end_time = new DateTime(); System.Console.Out.WriteLine("Job ended: " + end_time); System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime ()) / 1000 + " seconds."); // Check to ensure that the statistics of the // framework's sort-input and sort-output match SequenceFile.Reader stats = new SequenceFile.Reader(defaultfs, new Path(outputPath , "part-00000"), defaults); try { IntWritable k1 = new IntWritable(); IntWritable k2 = new IntWritable(); SortValidator.RecordStatsChecker.RecordStatsWritable v1 = new SortValidator.RecordStatsChecker.RecordStatsWritable (); SortValidator.RecordStatsChecker.RecordStatsWritable v2 = new SortValidator.RecordStatsChecker.RecordStatsWritable (); if (!stats.Next(k1, v1)) { throw new IOException("Failed to read record #1 from reduce's output"); } if (!stats.Next(k2, v2)) { throw new IOException("Failed to read record #2 from reduce's output"); } if ((v1.GetBytes() != v2.GetBytes()) || (v1.GetRecords() != v2.GetRecords()) || v1 .GetChecksum() != v2.GetChecksum()) { throw new IOException("(" + v1.GetBytes() + ", " + v1.GetRecords() + ", " + v1.GetChecksum () + ") v/s (" + v2.GetBytes() + ", " + v2.GetRecords() + ", " + v2.GetChecksum( ) + ")"); } } finally { stats.Close(); } } finally { defaultfs.Delete(outputPath, true); } }
/// <exception cref="System.Exception"/> public virtual void TestInputPath() { JobConf jobConf = new JobConf(); Path workingDir = jobConf.GetWorkingDirectory(); Path path = new Path(workingDir, "xx{y" + StringUtils.CommaStr + "z}"); FileInputFormat.SetInputPaths(jobConf, path); Path[] paths = FileInputFormat.GetInputPaths(jobConf); NUnit.Framework.Assert.AreEqual(1, paths.Length); NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString()); StringBuilder pathStr = new StringBuilder(); pathStr.Append(StringUtils.EscapeChar); pathStr.Append(StringUtils.EscapeChar); pathStr.Append(StringUtils.Comma); pathStr.Append(StringUtils.Comma); pathStr.Append('a'); path = new Path(workingDir, pathStr.ToString()); FileInputFormat.SetInputPaths(jobConf, path); paths = FileInputFormat.GetInputPaths(jobConf); NUnit.Framework.Assert.AreEqual(1, paths.Length); NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString()); pathStr.Length = 0; pathStr.Append(StringUtils.EscapeChar); pathStr.Append("xx"); pathStr.Append(StringUtils.EscapeChar); path = new Path(workingDir, pathStr.ToString()); Path path1 = new Path(workingDir, "yy" + StringUtils.CommaStr + "zz"); FileInputFormat.SetInputPaths(jobConf, path); FileInputFormat.AddInputPath(jobConf, path1); paths = FileInputFormat.GetInputPaths(jobConf); NUnit.Framework.Assert.AreEqual(2, paths.Length); NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString()); NUnit.Framework.Assert.AreEqual(path1.ToString(), paths[1].ToString()); FileInputFormat.SetInputPaths(jobConf, path, path1); paths = FileInputFormat.GetInputPaths(jobConf); NUnit.Framework.Assert.AreEqual(2, paths.Length); NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString()); NUnit.Framework.Assert.AreEqual(path1.ToString(), paths[1].ToString()); Path[] input = new Path[] { path, path1 }; FileInputFormat.SetInputPaths(jobConf, input); paths = FileInputFormat.GetInputPaths(jobConf); NUnit.Framework.Assert.AreEqual(2, paths.Length); NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString()); NUnit.Framework.Assert.AreEqual(path1.ToString(), paths[1].ToString()); pathStr.Length = 0; string str1 = "{a{b,c},de}"; string str2 = "xyz"; string str3 = "x{y,z}"; pathStr.Append(str1); pathStr.Append(StringUtils.Comma); pathStr.Append(str2); pathStr.Append(StringUtils.Comma); pathStr.Append(str3); FileInputFormat.SetInputPaths(jobConf, pathStr.ToString()); paths = FileInputFormat.GetInputPaths(jobConf); NUnit.Framework.Assert.AreEqual(3, paths.Length); NUnit.Framework.Assert.AreEqual(new Path(workingDir, str1).ToString(), paths[0].ToString ()); NUnit.Framework.Assert.AreEqual(new Path(workingDir, str2).ToString(), paths[1].ToString ()); NUnit.Framework.Assert.AreEqual(new Path(workingDir, str3).ToString(), paths[2].ToString ()); pathStr.Length = 0; string str4 = "abc"; string str5 = "pq{r,s}"; pathStr.Append(str4); pathStr.Append(StringUtils.Comma); pathStr.Append(str5); FileInputFormat.AddInputPaths(jobConf, pathStr.ToString()); paths = FileInputFormat.GetInputPaths(jobConf); NUnit.Framework.Assert.AreEqual(5, paths.Length); NUnit.Framework.Assert.AreEqual(new Path(workingDir, str1).ToString(), paths[0].ToString ()); NUnit.Framework.Assert.AreEqual(new Path(workingDir, str2).ToString(), paths[1].ToString ()); NUnit.Framework.Assert.AreEqual(new Path(workingDir, str3).ToString(), paths[2].ToString ()); NUnit.Framework.Assert.AreEqual(new Path(workingDir, str4).ToString(), paths[3].ToString ()); NUnit.Framework.Assert.AreEqual(new Path(workingDir, str5).ToString(), paths[4].ToString ()); }