/// <summary>Generate input data for the benchmark</summary> /// <exception cref="System.Exception"/> public static void GenerateInputData(int dataSizePerMap, int numSpillsPerMap, int numMapsPerHost, JobConf masterConf) { JobConf job = new JobConf(masterConf, typeof(ThreadedMapBenchmark)); job.SetJobName("threaded-map-benchmark-random-writer"); job.SetJarByClass(typeof(ThreadedMapBenchmark)); job.SetInputFormat(typeof(UtilsForTests.RandomInputFormat)); job.SetOutputFormat(typeof(SequenceFileOutputFormat)); job.SetMapperClass(typeof(ThreadedMapBenchmark.Map)); job.SetReducerClass(typeof(IdentityReducer)); job.SetOutputKeyClass(typeof(BytesWritable)); job.SetOutputValueClass(typeof(BytesWritable)); JobClient client = new JobClient(job); ClusterStatus cluster = client.GetClusterStatus(); long totalDataSize = dataSizePerMap * numMapsPerHost * cluster.GetTaskTrackers(); job.Set("test.tmb.bytes_per_map", (dataSizePerMap * 1024 * 1024).ToString()); job.SetNumReduceTasks(0); // none reduce job.SetNumMapTasks(numMapsPerHost * cluster.GetTaskTrackers()); FileOutputFormat.SetOutputPath(job, InputDir); FileSystem fs = FileSystem.Get(job); fs.Delete(BaseDir, true); Log.Info("Generating random input for the benchmark"); Log.Info("Total data : " + totalDataSize + " mb"); Log.Info("Data per map: " + dataSizePerMap + " mb"); Log.Info("Number of spills : " + numSpillsPerMap); Log.Info("Number of maps per host : " + numMapsPerHost); Log.Info("Number of hosts : " + cluster.GetTaskTrackers()); JobClient.RunJob(job); }
/// <exception cref="System.IO.IOException"/> internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces , Path sortInput, Path sortOutput) { JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker)); jobConf.SetJobName("sortvalidate-record-checker"); jobConf.SetInputFormat(typeof(SequenceFileInputFormat)); jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat)); jobConf.SetOutputKeyClass(typeof(BytesWritable)); jobConf.SetOutputValueClass(typeof(IntWritable)); jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map)); jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce)); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.GetClusterStatus(); if (noMaps == -1) { noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10); } if (noReduces == -1) { noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9); string sortReduces = jobConf.Get(ReducesPerHost); if (sortReduces != null) { noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces); } } jobConf.SetNumMapTasks(noMaps); jobConf.SetNumReduceTasks(noReduces); FileInputFormat.SetInputPaths(jobConf, sortInput); FileInputFormat.AddInputPath(jobConf, sortOutput); Path outputPath = new Path("/tmp/sortvalidate/recordchecker"); FileSystem fs = FileSystem.Get(defaults); if (fs.Exists(outputPath)) { fs.Delete(outputPath, true); } FileOutputFormat.SetOutputPath(jobConf, outputPath); // Uncomment to run locally in a single process //job_conf.set(JTConfig.JT, "local"); Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf); System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces + " reduces."); DateTime startTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); JobClient.RunJob(jobConf); DateTime end_time = new DateTime(); System.Console.Out.WriteLine("Job ended: " + end_time); System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime ()) / 1000 + " seconds."); }
// generates the input for the benchmark /// <summary>This is the main routine for launching the benchmark.</summary> /// <remarks> /// This is the main routine for launching the benchmark. It generates random /// input data. The input is non-splittable. Sort is used for benchmarking. /// This benchmark reports the effect of having multiple sort and spill /// cycles over a single sort and spill. /// </remarks> /// <exception cref="System.IO.IOException"></exception> /// <exception cref="System.Exception"/> public virtual int Run(string[] args) { Log.Info("Starting the benchmark for threaded spills"); string version = "ThreadedMapBenchmark.0.0.1"; System.Console.Out.WriteLine(version); string usage = "Usage: threadedmapbenchmark " + "[-dataSizePerMap <data size (in mb) per map, default is 128 mb>] " + "[-numSpillsPerMap <number of spills per map, default is 2>] " + "[-numMapsPerHost <number of maps per host, default is 1>]"; int dataSizePerMap = 128; // in mb int numSpillsPerMap = 2; int numMapsPerHost = 1; JobConf masterConf = new JobConf(GetConf()); for (int i = 0; i < args.Length; i++) { // parse command line if (args[i].Equals("-dataSizePerMap")) { dataSizePerMap = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-numSpillsPerMap")) { numSpillsPerMap = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-numMapsPerHost")) { numMapsPerHost = System.Convert.ToInt32(args[++i]); } else { System.Console.Error.WriteLine(usage); System.Environment.Exit(-1); } } } } if (dataSizePerMap < 1 || numSpillsPerMap < 1 || numMapsPerHost < 1) { // verify arguments System.Console.Error.WriteLine(usage); System.Environment.Exit(-1); } FileSystem fs = null; try { // using random-writer to generate the input data GenerateInputData(dataSizePerMap, numSpillsPerMap, numMapsPerHost, masterConf); // configure job for sorting JobConf job = new JobConf(masterConf, typeof(ThreadedMapBenchmark)); job.SetJobName("threaded-map-benchmark-unspilled"); job.SetJarByClass(typeof(ThreadedMapBenchmark)); job.SetInputFormat(typeof(SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat )); job.SetOutputFormat(typeof(SequenceFileOutputFormat)); job.SetOutputKeyClass(typeof(BytesWritable)); job.SetOutputValueClass(typeof(BytesWritable)); job.SetMapperClass(typeof(IdentityMapper)); job.SetReducerClass(typeof(IdentityReducer)); FileInputFormat.AddInputPath(job, InputDir); FileOutputFormat.SetOutputPath(job, OutputDir); JobClient client = new JobClient(job); ClusterStatus cluster = client.GetClusterStatus(); job.SetNumMapTasks(numMapsPerHost * cluster.GetTaskTrackers()); job.SetNumReduceTasks(1); // set mapreduce.task.io.sort.mb to avoid spill int ioSortMb = (int)Math.Ceil(Factor * dataSizePerMap); job.Set(JobContext.IoSortMb, ioSortMb.ToString()); fs = FileSystem.Get(job); Log.Info("Running sort with 1 spill per map"); long startTime = Runtime.CurrentTimeMillis(); JobClient.RunJob(job); long endTime = Runtime.CurrentTimeMillis(); Log.Info("Total time taken : " + (endTime - startTime).ToString() + " millisec"); fs.Delete(OutputDir, true); // set mapreduce.task.io.sort.mb to have multiple spills JobConf spilledJob = new JobConf(job, typeof(ThreadedMapBenchmark)); ioSortMb = (int)Math.Ceil(Factor * Math.Ceil((double)dataSizePerMap / numSpillsPerMap )); spilledJob.Set(JobContext.IoSortMb, ioSortMb.ToString()); spilledJob.SetJobName("threaded-map-benchmark-spilled"); spilledJob.SetJarByClass(typeof(ThreadedMapBenchmark)); Log.Info("Running sort with " + numSpillsPerMap + " spills per map"); startTime = Runtime.CurrentTimeMillis(); JobClient.RunJob(spilledJob); endTime = Runtime.CurrentTimeMillis(); Log.Info("Total time taken : " + (endTime - startTime).ToString() + " millisec"); } finally { if (fs != null) { fs.Delete(BaseDir, true); } } return(0); }
private void KillBasedOnProgress(bool considerMaps) { bool fail = false; if (considerMaps) { ReliabilityTest.Log.Info("Will kill tasks based on Maps' progress"); } else { ReliabilityTest.Log.Info("Will kill tasks based on Reduces' progress"); } ReliabilityTest.Log.Info("Initial progress threshold: " + this.threshold + ". Threshold Multiplier: " + this.thresholdMultiplier + ". Number of iterations: " + this.numIterations); float thresholdVal = this.threshold; int numIterationsDone = 0; while (!this.killed) { try { float progress; if (this.jc.GetJob(this.rJob.GetID()).IsComplete() || numIterationsDone == this.numIterations) { break; } if (considerMaps) { progress = this.jc.GetJob(this.rJob.GetID()).MapProgress(); } else { progress = this.jc.GetJob(this.rJob.GetID()).ReduceProgress(); } if (progress >= thresholdVal) { numIterationsDone++; if (numIterationsDone > 0 && numIterationsDone % 2 == 0) { fail = true; } //fail tasks instead of kill ClusterStatus c = this.jc.GetClusterStatus(); ReliabilityTest.Log.Info(new DateTime() + " Killing a few tasks"); ICollection <TaskAttemptID> runningTasks = new AList <TaskAttemptID>(); TaskReport[] mapReports = this.jc.GetMapTaskReports(this.rJob.GetID()); foreach (TaskReport mapReport in mapReports) { if (mapReport.GetCurrentStatus() == TIPStatus.Running) { Sharpen.Collections.AddAll(runningTasks, mapReport.GetRunningTaskAttempts()); } } if (runningTasks.Count > c.GetTaskTrackers() / 2) { int count = 0; foreach (TaskAttemptID t in runningTasks) { ReliabilityTest.Log.Info(new DateTime() + " Killed task : " + t); this.rJob.KillTask(t, fail); if (count++ > runningTasks.Count / 2) { //kill 50% break; } } } runningTasks.Clear(); TaskReport[] reduceReports = this.jc.GetReduceTaskReports(this.rJob.GetID()); foreach (TaskReport reduceReport in reduceReports) { if (reduceReport.GetCurrentStatus() == TIPStatus.Running) { Sharpen.Collections.AddAll(runningTasks, reduceReport.GetRunningTaskAttempts()); } } if (runningTasks.Count > c.GetTaskTrackers() / 2) { int count = 0; foreach (TaskAttemptID t in runningTasks) { ReliabilityTest.Log.Info(new DateTime() + " Killed task : " + t); this.rJob.KillTask(t, fail); if (count++ > runningTasks.Count / 2) { //kill 50% break; } } } thresholdVal = thresholdVal * this.thresholdMultiplier; } Sharpen.Thread.Sleep(5000); } catch (Exception) { this.killed = true; } catch (Exception e) { ReliabilityTest.Log.Fatal(StringUtils.StringifyException(e)); } } }
/// <summary>test JobConf</summary> /// <exception cref="System.Exception"/> public virtual void TestNetworkedJob() { // mock creation MiniMRClientCluster mr = null; FileSystem fileSys = null; try { mr = CreateMiniClusterWithCapacityScheduler(); JobConf job = new JobConf(mr.GetConfig()); fileSys = FileSystem.Get(job); fileSys.Delete(testDir, true); FSDataOutputStream @out = fileSys.Create(inFile, true); @out.WriteBytes("This is a test file"); @out.Close(); FileInputFormat.SetInputPaths(job, inFile); FileOutputFormat.SetOutputPath(job, outDir); job.SetInputFormat(typeof(TextInputFormat)); job.SetOutputFormat(typeof(TextOutputFormat)); job.SetMapperClass(typeof(IdentityMapper)); job.SetReducerClass(typeof(IdentityReducer)); job.SetNumReduceTasks(0); JobClient client = new JobClient(mr.GetConfig()); RunningJob rj = client.SubmitJob(job); JobID jobId = rj.GetID(); JobClient.NetworkedJob runningJob = (JobClient.NetworkedJob)client.GetJob(jobId); runningJob.SetJobPriority(JobPriority.High.ToString()); // test getters NUnit.Framework.Assert.IsTrue(runningJob.GetConfiguration().ToString().EndsWith("0001/job.xml" )); NUnit.Framework.Assert.AreEqual(runningJob.GetID(), jobId); NUnit.Framework.Assert.AreEqual(runningJob.GetJobID(), jobId.ToString()); NUnit.Framework.Assert.AreEqual(runningJob.GetJobName(), "N/A"); NUnit.Framework.Assert.IsTrue(runningJob.GetJobFile().EndsWith(".staging/" + runningJob .GetJobID() + "/job.xml")); NUnit.Framework.Assert.IsTrue(runningJob.GetTrackingURL().Length > 0); NUnit.Framework.Assert.IsTrue(runningJob.MapProgress() == 0.0f); NUnit.Framework.Assert.IsTrue(runningJob.ReduceProgress() == 0.0f); NUnit.Framework.Assert.IsTrue(runningJob.CleanupProgress() == 0.0f); NUnit.Framework.Assert.IsTrue(runningJob.SetupProgress() == 0.0f); TaskCompletionEvent[] tce = runningJob.GetTaskCompletionEvents(0); NUnit.Framework.Assert.AreEqual(tce.Length, 0); NUnit.Framework.Assert.AreEqual(runningJob.GetHistoryUrl(), string.Empty); NUnit.Framework.Assert.IsFalse(runningJob.IsRetired()); NUnit.Framework.Assert.AreEqual(runningJob.GetFailureInfo(), string.Empty); NUnit.Framework.Assert.AreEqual(runningJob.GetJobStatus().GetJobName(), "N/A"); NUnit.Framework.Assert.AreEqual(client.GetMapTaskReports(jobId).Length, 0); try { client.GetSetupTaskReports(jobId); } catch (YarnRuntimeException e) { NUnit.Framework.Assert.AreEqual(e.Message, "Unrecognized task type: JOB_SETUP"); } try { client.GetCleanupTaskReports(jobId); } catch (YarnRuntimeException e) { NUnit.Framework.Assert.AreEqual(e.Message, "Unrecognized task type: JOB_CLEANUP"); } NUnit.Framework.Assert.AreEqual(client.GetReduceTaskReports(jobId).Length, 0); // test ClusterStatus ClusterStatus status = client.GetClusterStatus(true); NUnit.Framework.Assert.AreEqual(status.GetActiveTrackerNames().Count, 2); // it method does not implemented and always return empty array or null; NUnit.Framework.Assert.AreEqual(status.GetBlacklistedTrackers(), 0); NUnit.Framework.Assert.AreEqual(status.GetBlacklistedTrackerNames().Count, 0); NUnit.Framework.Assert.AreEqual(status.GetBlackListedTrackersInfo().Count, 0); NUnit.Framework.Assert.AreEqual(status.GetJobTrackerStatus(), Cluster.JobTrackerStatus .Running); NUnit.Framework.Assert.AreEqual(status.GetMapTasks(), 1); NUnit.Framework.Assert.AreEqual(status.GetMaxMapTasks(), 20); NUnit.Framework.Assert.AreEqual(status.GetMaxReduceTasks(), 4); NUnit.Framework.Assert.AreEqual(status.GetNumExcludedNodes(), 0); NUnit.Framework.Assert.AreEqual(status.GetReduceTasks(), 1); NUnit.Framework.Assert.AreEqual(status.GetTaskTrackers(), 2); NUnit.Framework.Assert.AreEqual(status.GetTTExpiryInterval(), 0); NUnit.Framework.Assert.AreEqual(status.GetJobTrackerStatus(), Cluster.JobTrackerStatus .Running); NUnit.Framework.Assert.AreEqual(status.GetGraylistedTrackers(), 0); // test read and write ByteArrayOutputStream dataOut = new ByteArrayOutputStream(); status.Write(new DataOutputStream(dataOut)); ClusterStatus status2 = new ClusterStatus(); status2.ReadFields(new DataInputStream(new ByteArrayInputStream(dataOut.ToByteArray ()))); NUnit.Framework.Assert.AreEqual(status.GetActiveTrackerNames(), status2.GetActiveTrackerNames ()); NUnit.Framework.Assert.AreEqual(status.GetBlackListedTrackersInfo(), status2.GetBlackListedTrackersInfo ()); NUnit.Framework.Assert.AreEqual(status.GetMapTasks(), status2.GetMapTasks()); try { } catch (RuntimeException e) { NUnit.Framework.Assert.IsTrue(e.Message.EndsWith("not found on CLASSPATH")); } // test taskStatusfilter JobClient.SetTaskOutputFilter(job, JobClient.TaskStatusFilter.All); NUnit.Framework.Assert.AreEqual(JobClient.GetTaskOutputFilter(job), JobClient.TaskStatusFilter .All); // runningJob.setJobPriority(JobPriority.HIGH.name()); // test default map NUnit.Framework.Assert.AreEqual(client.GetDefaultMaps(), 20); NUnit.Framework.Assert.AreEqual(client.GetDefaultReduces(), 4); NUnit.Framework.Assert.AreEqual(client.GetSystemDir().GetName(), "jobSubmitDir"); // test queue information JobQueueInfo[] rootQueueInfo = client.GetRootQueues(); NUnit.Framework.Assert.AreEqual(rootQueueInfo.Length, 1); NUnit.Framework.Assert.AreEqual(rootQueueInfo[0].GetQueueName(), "default"); JobQueueInfo[] qinfo = client.GetQueues(); NUnit.Framework.Assert.AreEqual(qinfo.Length, 1); NUnit.Framework.Assert.AreEqual(qinfo[0].GetQueueName(), "default"); NUnit.Framework.Assert.AreEqual(client.GetChildQueues("default").Length, 0); NUnit.Framework.Assert.AreEqual(client.GetJobsFromQueue("default").Length, 1); NUnit.Framework.Assert.IsTrue(client.GetJobsFromQueue("default")[0].GetJobFile(). EndsWith("/job.xml")); JobQueueInfo qi = client.GetQueueInfo("default"); NUnit.Framework.Assert.AreEqual(qi.GetQueueName(), "default"); NUnit.Framework.Assert.AreEqual(qi.GetQueueState(), "running"); QueueAclsInfo[] aai = client.GetQueueAclsForCurrentUser(); NUnit.Framework.Assert.AreEqual(aai.Length, 2); NUnit.Framework.Assert.AreEqual(aai[0].GetQueueName(), "root"); NUnit.Framework.Assert.AreEqual(aai[1].GetQueueName(), "default"); // test token Org.Apache.Hadoop.Security.Token.Token <DelegationTokenIdentifier> token = client. GetDelegationToken(new Text(UserGroupInformation.GetCurrentUser().GetShortUserName ())); NUnit.Framework.Assert.AreEqual(token.GetKind().ToString(), "RM_DELEGATION_TOKEN" ); // test JobClient // The following asserts read JobStatus twice and ensure the returned // JobStatus objects correspond to the same Job. NUnit.Framework.Assert.AreEqual("Expected matching JobIDs", jobId, ((JobID)client .GetJob(jobId).GetJobStatus().GetJobID())); NUnit.Framework.Assert.AreEqual("Expected matching startTimes", rj.GetJobStatus() .GetStartTime(), client.GetJob(jobId).GetJobStatus().GetStartTime()); } finally { if (fileSys != null) { fileSys.Delete(testDir, true); } if (mr != null) { mr.Stop(); } } }