예제 #1
0
        /// <summary>Generate input data for the benchmark</summary>
        /// <exception cref="System.Exception"/>
        public static void GenerateInputData(int dataSizePerMap, int numSpillsPerMap, int
                                             numMapsPerHost, JobConf masterConf)
        {
            JobConf job = new JobConf(masterConf, typeof(ThreadedMapBenchmark));

            job.SetJobName("threaded-map-benchmark-random-writer");
            job.SetJarByClass(typeof(ThreadedMapBenchmark));
            job.SetInputFormat(typeof(UtilsForTests.RandomInputFormat));
            job.SetOutputFormat(typeof(SequenceFileOutputFormat));
            job.SetMapperClass(typeof(ThreadedMapBenchmark.Map));
            job.SetReducerClass(typeof(IdentityReducer));
            job.SetOutputKeyClass(typeof(BytesWritable));
            job.SetOutputValueClass(typeof(BytesWritable));
            JobClient     client        = new JobClient(job);
            ClusterStatus cluster       = client.GetClusterStatus();
            long          totalDataSize = dataSizePerMap * numMapsPerHost * cluster.GetTaskTrackers();

            job.Set("test.tmb.bytes_per_map", (dataSizePerMap * 1024 * 1024).ToString());
            job.SetNumReduceTasks(0);
            // none reduce
            job.SetNumMapTasks(numMapsPerHost * cluster.GetTaskTrackers());
            FileOutputFormat.SetOutputPath(job, InputDir);
            FileSystem fs = FileSystem.Get(job);

            fs.Delete(BaseDir, true);
            Log.Info("Generating random input for the benchmark");
            Log.Info("Total data : " + totalDataSize + " mb");
            Log.Info("Data per map: " + dataSizePerMap + " mb");
            Log.Info("Number of spills : " + numSpillsPerMap);
            Log.Info("Number of maps per host : " + numMapsPerHost);
            Log.Info("Number of hosts : " + cluster.GetTaskTrackers());
            JobClient.RunJob(job);
        }
예제 #2
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces
                                              , Path sortInput, Path sortOutput)
            {
                JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker));

                jobConf.SetJobName("sortvalidate-record-checker");
                jobConf.SetInputFormat(typeof(SequenceFileInputFormat));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(BytesWritable));
                jobConf.SetOutputValueClass(typeof(IntWritable));
                jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map));
                jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce));
                JobClient     client  = new JobClient(jobConf);
                ClusterStatus cluster = client.GetClusterStatus();

                if (noMaps == -1)
                {
                    noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10);
                }
                if (noReduces == -1)
                {
                    noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9);
                    string sortReduces = jobConf.Get(ReducesPerHost);
                    if (sortReduces != null)
                    {
                        noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces);
                    }
                }
                jobConf.SetNumMapTasks(noMaps);
                jobConf.SetNumReduceTasks(noReduces);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path       outputPath = new Path("/tmp/sortvalidate/recordchecker");
                FileSystem fs         = FileSystem.Get(defaults);

                if (fs.Exists(outputPath))
                {
                    fs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster
                                             .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths
                                             [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces
                                             + " reduces.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                DateTime end_time = new DateTime();

                System.Console.Out.WriteLine("Job ended: " + end_time);
                System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                    ()) / 1000 + " seconds.");
            }
예제 #3
0
        // generates the input for the benchmark
        /// <summary>This is the main routine for launching the benchmark.</summary>
        /// <remarks>
        /// This is the main routine for launching the benchmark. It generates random
        /// input data. The input is non-splittable. Sort is used for benchmarking.
        /// This benchmark reports the effect of having multiple sort and spill
        /// cycles over a single sort and spill.
        /// </remarks>
        /// <exception cref="System.IO.IOException"></exception>
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            Log.Info("Starting the benchmark for threaded spills");
            string version = "ThreadedMapBenchmark.0.0.1";

            System.Console.Out.WriteLine(version);
            string usage = "Usage: threadedmapbenchmark " + "[-dataSizePerMap <data size (in mb) per map, default is 128 mb>] "
                           + "[-numSpillsPerMap <number of spills per map, default is 2>] " + "[-numMapsPerHost <number of maps per host, default is 1>]";
            int dataSizePerMap = 128;
            // in mb
            int     numSpillsPerMap = 2;
            int     numMapsPerHost  = 1;
            JobConf masterConf      = new JobConf(GetConf());

            for (int i = 0; i < args.Length; i++)
            {
                // parse command line
                if (args[i].Equals("-dataSizePerMap"))
                {
                    dataSizePerMap = System.Convert.ToInt32(args[++i]);
                }
                else
                {
                    if (args[i].Equals("-numSpillsPerMap"))
                    {
                        numSpillsPerMap = System.Convert.ToInt32(args[++i]);
                    }
                    else
                    {
                        if (args[i].Equals("-numMapsPerHost"))
                        {
                            numMapsPerHost = System.Convert.ToInt32(args[++i]);
                        }
                        else
                        {
                            System.Console.Error.WriteLine(usage);
                            System.Environment.Exit(-1);
                        }
                    }
                }
            }
            if (dataSizePerMap < 1 || numSpillsPerMap < 1 || numMapsPerHost < 1)
            {
                // verify arguments
                System.Console.Error.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            FileSystem fs = null;

            try
            {
                // using random-writer to generate the input data
                GenerateInputData(dataSizePerMap, numSpillsPerMap, numMapsPerHost, masterConf);
                // configure job for sorting
                JobConf job = new JobConf(masterConf, typeof(ThreadedMapBenchmark));
                job.SetJobName("threaded-map-benchmark-unspilled");
                job.SetJarByClass(typeof(ThreadedMapBenchmark));
                job.SetInputFormat(typeof(SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat
                                          ));
                job.SetOutputFormat(typeof(SequenceFileOutputFormat));
                job.SetOutputKeyClass(typeof(BytesWritable));
                job.SetOutputValueClass(typeof(BytesWritable));
                job.SetMapperClass(typeof(IdentityMapper));
                job.SetReducerClass(typeof(IdentityReducer));
                FileInputFormat.AddInputPath(job, InputDir);
                FileOutputFormat.SetOutputPath(job, OutputDir);
                JobClient     client  = new JobClient(job);
                ClusterStatus cluster = client.GetClusterStatus();
                job.SetNumMapTasks(numMapsPerHost * cluster.GetTaskTrackers());
                job.SetNumReduceTasks(1);
                // set mapreduce.task.io.sort.mb to avoid spill
                int ioSortMb = (int)Math.Ceil(Factor * dataSizePerMap);
                job.Set(JobContext.IoSortMb, ioSortMb.ToString());
                fs = FileSystem.Get(job);
                Log.Info("Running sort with 1 spill per map");
                long startTime = Runtime.CurrentTimeMillis();
                JobClient.RunJob(job);
                long endTime = Runtime.CurrentTimeMillis();
                Log.Info("Total time taken : " + (endTime - startTime).ToString() + " millisec");
                fs.Delete(OutputDir, true);
                // set mapreduce.task.io.sort.mb to have multiple spills
                JobConf spilledJob = new JobConf(job, typeof(ThreadedMapBenchmark));
                ioSortMb = (int)Math.Ceil(Factor * Math.Ceil((double)dataSizePerMap / numSpillsPerMap
                                                             ));
                spilledJob.Set(JobContext.IoSortMb, ioSortMb.ToString());
                spilledJob.SetJobName("threaded-map-benchmark-spilled");
                spilledJob.SetJarByClass(typeof(ThreadedMapBenchmark));
                Log.Info("Running sort with " + numSpillsPerMap + " spills per map");
                startTime = Runtime.CurrentTimeMillis();
                JobClient.RunJob(spilledJob);
                endTime = Runtime.CurrentTimeMillis();
                Log.Info("Total time taken : " + (endTime - startTime).ToString() + " millisec");
            }
            finally
            {
                if (fs != null)
                {
                    fs.Delete(BaseDir, true);
                }
            }
            return(0);
        }
예제 #4
0
            private void KillBasedOnProgress(bool considerMaps)
            {
                bool fail = false;

                if (considerMaps)
                {
                    ReliabilityTest.Log.Info("Will kill tasks based on Maps' progress");
                }
                else
                {
                    ReliabilityTest.Log.Info("Will kill tasks based on Reduces' progress");
                }
                ReliabilityTest.Log.Info("Initial progress threshold: " + this.threshold + ". Threshold Multiplier: "
                                         + this.thresholdMultiplier + ". Number of iterations: " + this.numIterations);
                float thresholdVal      = this.threshold;
                int   numIterationsDone = 0;

                while (!this.killed)
                {
                    try
                    {
                        float progress;
                        if (this.jc.GetJob(this.rJob.GetID()).IsComplete() || numIterationsDone == this.numIterations)
                        {
                            break;
                        }
                        if (considerMaps)
                        {
                            progress = this.jc.GetJob(this.rJob.GetID()).MapProgress();
                        }
                        else
                        {
                            progress = this.jc.GetJob(this.rJob.GetID()).ReduceProgress();
                        }
                        if (progress >= thresholdVal)
                        {
                            numIterationsDone++;
                            if (numIterationsDone > 0 && numIterationsDone % 2 == 0)
                            {
                                fail = true;
                            }
                            //fail tasks instead of kill
                            ClusterStatus c = this.jc.GetClusterStatus();
                            ReliabilityTest.Log.Info(new DateTime() + " Killing a few tasks");
                            ICollection <TaskAttemptID> runningTasks = new AList <TaskAttemptID>();
                            TaskReport[] mapReports = this.jc.GetMapTaskReports(this.rJob.GetID());
                            foreach (TaskReport mapReport in mapReports)
                            {
                                if (mapReport.GetCurrentStatus() == TIPStatus.Running)
                                {
                                    Sharpen.Collections.AddAll(runningTasks, mapReport.GetRunningTaskAttempts());
                                }
                            }
                            if (runningTasks.Count > c.GetTaskTrackers() / 2)
                            {
                                int count = 0;
                                foreach (TaskAttemptID t in runningTasks)
                                {
                                    ReliabilityTest.Log.Info(new DateTime() + " Killed task : " + t);
                                    this.rJob.KillTask(t, fail);
                                    if (count++ > runningTasks.Count / 2)
                                    {
                                        //kill 50%
                                        break;
                                    }
                                }
                            }
                            runningTasks.Clear();
                            TaskReport[] reduceReports = this.jc.GetReduceTaskReports(this.rJob.GetID());
                            foreach (TaskReport reduceReport in reduceReports)
                            {
                                if (reduceReport.GetCurrentStatus() == TIPStatus.Running)
                                {
                                    Sharpen.Collections.AddAll(runningTasks, reduceReport.GetRunningTaskAttempts());
                                }
                            }
                            if (runningTasks.Count > c.GetTaskTrackers() / 2)
                            {
                                int count = 0;
                                foreach (TaskAttemptID t in runningTasks)
                                {
                                    ReliabilityTest.Log.Info(new DateTime() + " Killed task : " + t);
                                    this.rJob.KillTask(t, fail);
                                    if (count++ > runningTasks.Count / 2)
                                    {
                                        //kill 50%
                                        break;
                                    }
                                }
                            }
                            thresholdVal = thresholdVal * this.thresholdMultiplier;
                        }
                        Sharpen.Thread.Sleep(5000);
                    }
                    catch (Exception)
                    {
                        this.killed = true;
                    }
                    catch (Exception e)
                    {
                        ReliabilityTest.Log.Fatal(StringUtils.StringifyException(e));
                    }
                }
            }
예제 #5
0
        /// <summary>test JobConf</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestNetworkedJob()
        {
            // mock creation
            MiniMRClientCluster mr      = null;
            FileSystem          fileSys = null;

            try
            {
                mr = CreateMiniClusterWithCapacityScheduler();
                JobConf job = new JobConf(mr.GetConfig());
                fileSys = FileSystem.Get(job);
                fileSys.Delete(testDir, true);
                FSDataOutputStream @out = fileSys.Create(inFile, true);
                @out.WriteBytes("This is a test file");
                @out.Close();
                FileInputFormat.SetInputPaths(job, inFile);
                FileOutputFormat.SetOutputPath(job, outDir);
                job.SetInputFormat(typeof(TextInputFormat));
                job.SetOutputFormat(typeof(TextOutputFormat));
                job.SetMapperClass(typeof(IdentityMapper));
                job.SetReducerClass(typeof(IdentityReducer));
                job.SetNumReduceTasks(0);
                JobClient              client     = new JobClient(mr.GetConfig());
                RunningJob             rj         = client.SubmitJob(job);
                JobID                  jobId      = rj.GetID();
                JobClient.NetworkedJob runningJob = (JobClient.NetworkedJob)client.GetJob(jobId);
                runningJob.SetJobPriority(JobPriority.High.ToString());
                // test getters
                NUnit.Framework.Assert.IsTrue(runningJob.GetConfiguration().ToString().EndsWith("0001/job.xml"
                                                                                                ));
                NUnit.Framework.Assert.AreEqual(runningJob.GetID(), jobId);
                NUnit.Framework.Assert.AreEqual(runningJob.GetJobID(), jobId.ToString());
                NUnit.Framework.Assert.AreEqual(runningJob.GetJobName(), "N/A");
                NUnit.Framework.Assert.IsTrue(runningJob.GetJobFile().EndsWith(".staging/" + runningJob
                                                                               .GetJobID() + "/job.xml"));
                NUnit.Framework.Assert.IsTrue(runningJob.GetTrackingURL().Length > 0);
                NUnit.Framework.Assert.IsTrue(runningJob.MapProgress() == 0.0f);
                NUnit.Framework.Assert.IsTrue(runningJob.ReduceProgress() == 0.0f);
                NUnit.Framework.Assert.IsTrue(runningJob.CleanupProgress() == 0.0f);
                NUnit.Framework.Assert.IsTrue(runningJob.SetupProgress() == 0.0f);
                TaskCompletionEvent[] tce = runningJob.GetTaskCompletionEvents(0);
                NUnit.Framework.Assert.AreEqual(tce.Length, 0);
                NUnit.Framework.Assert.AreEqual(runningJob.GetHistoryUrl(), string.Empty);
                NUnit.Framework.Assert.IsFalse(runningJob.IsRetired());
                NUnit.Framework.Assert.AreEqual(runningJob.GetFailureInfo(), string.Empty);
                NUnit.Framework.Assert.AreEqual(runningJob.GetJobStatus().GetJobName(), "N/A");
                NUnit.Framework.Assert.AreEqual(client.GetMapTaskReports(jobId).Length, 0);
                try
                {
                    client.GetSetupTaskReports(jobId);
                }
                catch (YarnRuntimeException e)
                {
                    NUnit.Framework.Assert.AreEqual(e.Message, "Unrecognized task type: JOB_SETUP");
                }
                try
                {
                    client.GetCleanupTaskReports(jobId);
                }
                catch (YarnRuntimeException e)
                {
                    NUnit.Framework.Assert.AreEqual(e.Message, "Unrecognized task type: JOB_CLEANUP");
                }
                NUnit.Framework.Assert.AreEqual(client.GetReduceTaskReports(jobId).Length, 0);
                // test ClusterStatus
                ClusterStatus status = client.GetClusterStatus(true);
                NUnit.Framework.Assert.AreEqual(status.GetActiveTrackerNames().Count, 2);
                // it method does not implemented and always return empty array or null;
                NUnit.Framework.Assert.AreEqual(status.GetBlacklistedTrackers(), 0);
                NUnit.Framework.Assert.AreEqual(status.GetBlacklistedTrackerNames().Count, 0);
                NUnit.Framework.Assert.AreEqual(status.GetBlackListedTrackersInfo().Count, 0);
                NUnit.Framework.Assert.AreEqual(status.GetJobTrackerStatus(), Cluster.JobTrackerStatus
                                                .Running);
                NUnit.Framework.Assert.AreEqual(status.GetMapTasks(), 1);
                NUnit.Framework.Assert.AreEqual(status.GetMaxMapTasks(), 20);
                NUnit.Framework.Assert.AreEqual(status.GetMaxReduceTasks(), 4);
                NUnit.Framework.Assert.AreEqual(status.GetNumExcludedNodes(), 0);
                NUnit.Framework.Assert.AreEqual(status.GetReduceTasks(), 1);
                NUnit.Framework.Assert.AreEqual(status.GetTaskTrackers(), 2);
                NUnit.Framework.Assert.AreEqual(status.GetTTExpiryInterval(), 0);
                NUnit.Framework.Assert.AreEqual(status.GetJobTrackerStatus(), Cluster.JobTrackerStatus
                                                .Running);
                NUnit.Framework.Assert.AreEqual(status.GetGraylistedTrackers(), 0);
                // test read and write
                ByteArrayOutputStream dataOut = new ByteArrayOutputStream();
                status.Write(new DataOutputStream(dataOut));
                ClusterStatus status2 = new ClusterStatus();
                status2.ReadFields(new DataInputStream(new ByteArrayInputStream(dataOut.ToByteArray
                                                                                    ())));
                NUnit.Framework.Assert.AreEqual(status.GetActiveTrackerNames(), status2.GetActiveTrackerNames
                                                    ());
                NUnit.Framework.Assert.AreEqual(status.GetBlackListedTrackersInfo(), status2.GetBlackListedTrackersInfo
                                                    ());
                NUnit.Framework.Assert.AreEqual(status.GetMapTasks(), status2.GetMapTasks());
                try
                {
                }
                catch (RuntimeException e)
                {
                    NUnit.Framework.Assert.IsTrue(e.Message.EndsWith("not found on CLASSPATH"));
                }
                // test taskStatusfilter
                JobClient.SetTaskOutputFilter(job, JobClient.TaskStatusFilter.All);
                NUnit.Framework.Assert.AreEqual(JobClient.GetTaskOutputFilter(job), JobClient.TaskStatusFilter
                                                .All);
                // runningJob.setJobPriority(JobPriority.HIGH.name());
                // test default map
                NUnit.Framework.Assert.AreEqual(client.GetDefaultMaps(), 20);
                NUnit.Framework.Assert.AreEqual(client.GetDefaultReduces(), 4);
                NUnit.Framework.Assert.AreEqual(client.GetSystemDir().GetName(), "jobSubmitDir");
                // test queue information
                JobQueueInfo[] rootQueueInfo = client.GetRootQueues();
                NUnit.Framework.Assert.AreEqual(rootQueueInfo.Length, 1);
                NUnit.Framework.Assert.AreEqual(rootQueueInfo[0].GetQueueName(), "default");
                JobQueueInfo[] qinfo = client.GetQueues();
                NUnit.Framework.Assert.AreEqual(qinfo.Length, 1);
                NUnit.Framework.Assert.AreEqual(qinfo[0].GetQueueName(), "default");
                NUnit.Framework.Assert.AreEqual(client.GetChildQueues("default").Length, 0);
                NUnit.Framework.Assert.AreEqual(client.GetJobsFromQueue("default").Length, 1);
                NUnit.Framework.Assert.IsTrue(client.GetJobsFromQueue("default")[0].GetJobFile().
                                              EndsWith("/job.xml"));
                JobQueueInfo qi = client.GetQueueInfo("default");
                NUnit.Framework.Assert.AreEqual(qi.GetQueueName(), "default");
                NUnit.Framework.Assert.AreEqual(qi.GetQueueState(), "running");
                QueueAclsInfo[] aai = client.GetQueueAclsForCurrentUser();
                NUnit.Framework.Assert.AreEqual(aai.Length, 2);
                NUnit.Framework.Assert.AreEqual(aai[0].GetQueueName(), "root");
                NUnit.Framework.Assert.AreEqual(aai[1].GetQueueName(), "default");
                // test token
                Org.Apache.Hadoop.Security.Token.Token <DelegationTokenIdentifier> token = client.
                                                                                           GetDelegationToken(new Text(UserGroupInformation.GetCurrentUser().GetShortUserName
                                                                                                                           ()));
                NUnit.Framework.Assert.AreEqual(token.GetKind().ToString(), "RM_DELEGATION_TOKEN"
                                                );
                // test JobClient
                // The following asserts read JobStatus twice and ensure the returned
                // JobStatus objects correspond to the same Job.
                NUnit.Framework.Assert.AreEqual("Expected matching JobIDs", jobId, ((JobID)client
                                                                                    .GetJob(jobId).GetJobStatus().GetJobID()));
                NUnit.Framework.Assert.AreEqual("Expected matching startTimes", rj.GetJobStatus()
                                                .GetStartTime(), client.GetJob(jobId).GetJobStatus().GetStartTime());
            }
            finally
            {
                if (fileSys != null)
                {
                    fileSys.Delete(testDir, true);
                }
                if (mr != null)
                {
                    mr.Stop();
                }
            }
        }