예제 #1
0
        public virtual void TestStatusLimit()
        {
            Path          test   = new Path(testRootTempDir, "testStatusLimit");
            Configuration conf   = new Configuration();
            Path          inDir  = new Path(test, "in");
            Path          outDir = new Path(test, "out");
            FileSystem    fs     = FileSystem.Get(conf);

            if (fs.Exists(inDir))
            {
                fs.Delete(inDir, true);
            }
            fs.Mkdirs(inDir);
            DataOutputStream file = fs.Create(new Path(inDir, "part-" + 0));

            file.WriteBytes("testStatusLimit");
            file.Close();
            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            Job job = Job.GetInstance(conf, "testStatusLimit");

            job.SetMapperClass(typeof(TestReporter.StatusLimitMapper));
            job.SetNumReduceTasks(0);
            FileInputFormat.AddInputPath(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful());
        }
예제 #2
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces
                                              , Path sortInput, Path sortOutput)
            {
                JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker));

                jobConf.SetJobName("sortvalidate-record-checker");
                jobConf.SetInputFormat(typeof(SequenceFileInputFormat));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(BytesWritable));
                jobConf.SetOutputValueClass(typeof(IntWritable));
                jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map));
                jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce));
                JobClient     client  = new JobClient(jobConf);
                ClusterStatus cluster = client.GetClusterStatus();

                if (noMaps == -1)
                {
                    noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10);
                }
                if (noReduces == -1)
                {
                    noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9);
                    string sortReduces = jobConf.Get(ReducesPerHost);
                    if (sortReduces != null)
                    {
                        noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces);
                    }
                }
                jobConf.SetNumMapTasks(noMaps);
                jobConf.SetNumReduceTasks(noReduces);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path       outputPath = new Path("/tmp/sortvalidate/recordchecker");
                FileSystem fs         = FileSystem.Get(defaults);

                if (fs.Exists(outputPath))
                {
                    fs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster
                                             .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths
                                             [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces
                                             + " reduces.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                DateTime end_time = new DateTime();

                System.Console.Out.WriteLine("Job ended: " + end_time);
                System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                    ()) / 1000 + " seconds.");
            }
        /// <summary>Creates and runs an MR job</summary>
        /// <param name="conf"/>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        public virtual void CreateAndRunJob(Configuration conf)
        {
            JobConf job = new JobConf(conf);

            job.SetJarByClass(typeof(TestLineRecordReaderJobs));
            job.SetMapperClass(typeof(IdentityMapper));
            job.SetReducerClass(typeof(IdentityReducer));
            FileInputFormat.AddInputPath(job, inputDir);
            FileOutputFormat.SetOutputPath(job, outputDir);
            JobClient.RunJob(job);
        }
예제 #4
0
        public virtual void TestGetRunningJobFromJobClient()
        {
            JobConf conf = new JobConf();

            conf.Set("mapreduce.framework.name", "local");
            FileInputFormat.AddInputPath(conf, CreateTempFile("in", "hello"));
            Path outputDir = new Path(TestRootDir, GetType().Name);

            outputDir.GetFileSystem(conf).Delete(outputDir, true);
            FileOutputFormat.SetOutputPath(conf, outputDir);
            JobClient  jc         = new JobClient(conf);
            RunningJob runningJob = jc.SubmitJob(conf);

            NUnit.Framework.Assert.IsNotNull("Running job", runningJob);
            // Check that the running job can be retrieved by ID
            RunningJob newRunningJob = jc.GetJob(runningJob.GetID());

            NUnit.Framework.Assert.IsNotNull("New running job", newRunningJob);
        }
예제 #5
0
		/// <summary>Create the job configuration.</summary>
		private JobConf SetupJob(int numMaps, int numReduces, string jarFile)
		{
			JobConf jobConf = new JobConf(GetConf());
			jobConf.SetJarByClass(typeof(MRBench));
			FileInputFormat.AddInputPath(jobConf, InputDir);
			jobConf.SetInputFormat(typeof(TextInputFormat));
			jobConf.SetOutputFormat(typeof(TextOutputFormat));
			jobConf.SetOutputValueClass(typeof(UTF8));
			jobConf.SetMapOutputKeyClass(typeof(UTF8));
			jobConf.SetMapOutputValueClass(typeof(UTF8));
			if (null != jarFile)
			{
				jobConf.SetJar(jarFile);
			}
			jobConf.SetMapperClass(typeof(MRBench.Map));
			jobConf.SetReducerClass(typeof(MRBench.Reduce));
			jobConf.SetNumMapTasks(numMaps);
			jobConf.SetNumReduceTasks(numReduces);
			jobConf.SetBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
			return jobConf;
		}
예제 #6
0
        public virtual void TestNewApis()
        {
            Random     r          = new Random(Runtime.CurrentTimeMillis());
            Path       tmpBaseDir = new Path("/tmp/wc-" + r.Next());
            Path       inDir      = new Path(tmpBaseDir, "input");
            Path       outDir     = new Path(tmpBaseDir, "output");
            string     input      = "The quick brown fox\nhas many silly\nred fox sox\n";
            FileSystem inFs       = inDir.GetFileSystem(conf);
            FileSystem outFs      = outDir.GetFileSystem(conf);

            outFs.Delete(outDir, true);
            if (!inFs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            {
                DataOutputStream file = inFs.Create(new Path(inDir, "part-0"));
                file.WriteBytes(input);
                file.Close();
            }
            Job job = Job.GetInstance(conf, "word count");

            job.SetJarByClass(typeof(TestLocalModeWithNewApis));
            job.SetMapperClass(typeof(TestLocalModeWithNewApis.TokenizerMapper));
            job.SetCombinerClass(typeof(TestLocalModeWithNewApis.IntSumReducer));
            job.SetReducerClass(typeof(TestLocalModeWithNewApis.IntSumReducer));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(IntWritable));
            FileInputFormat.AddInputPath(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            NUnit.Framework.Assert.AreEqual(job.WaitForCompletion(true), true);
            string output = ReadOutput(outDir, conf);

            NUnit.Framework.Assert.AreEqual("The\t1\nbrown\t1\nfox\t2\nhas\t1\nmany\t1\n" + "quick\t1\nred\t1\nsilly\t1\nsox\t1\n"
                                            , output);
            outFs.Delete(tmpBaseDir, true);
        }
예제 #7
0
        // generates the input for the benchmark
        /// <summary>This is the main routine for launching the benchmark.</summary>
        /// <remarks>
        /// This is the main routine for launching the benchmark. It generates random
        /// input data. The input is non-splittable. Sort is used for benchmarking.
        /// This benchmark reports the effect of having multiple sort and spill
        /// cycles over a single sort and spill.
        /// </remarks>
        /// <exception cref="System.IO.IOException"></exception>
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            Log.Info("Starting the benchmark for threaded spills");
            string version = "ThreadedMapBenchmark.0.0.1";

            System.Console.Out.WriteLine(version);
            string usage = "Usage: threadedmapbenchmark " + "[-dataSizePerMap <data size (in mb) per map, default is 128 mb>] "
                           + "[-numSpillsPerMap <number of spills per map, default is 2>] " + "[-numMapsPerHost <number of maps per host, default is 1>]";
            int dataSizePerMap = 128;
            // in mb
            int     numSpillsPerMap = 2;
            int     numMapsPerHost  = 1;
            JobConf masterConf      = new JobConf(GetConf());

            for (int i = 0; i < args.Length; i++)
            {
                // parse command line
                if (args[i].Equals("-dataSizePerMap"))
                {
                    dataSizePerMap = System.Convert.ToInt32(args[++i]);
                }
                else
                {
                    if (args[i].Equals("-numSpillsPerMap"))
                    {
                        numSpillsPerMap = System.Convert.ToInt32(args[++i]);
                    }
                    else
                    {
                        if (args[i].Equals("-numMapsPerHost"))
                        {
                            numMapsPerHost = System.Convert.ToInt32(args[++i]);
                        }
                        else
                        {
                            System.Console.Error.WriteLine(usage);
                            System.Environment.Exit(-1);
                        }
                    }
                }
            }
            if (dataSizePerMap < 1 || numSpillsPerMap < 1 || numMapsPerHost < 1)
            {
                // verify arguments
                System.Console.Error.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            FileSystem fs = null;

            try
            {
                // using random-writer to generate the input data
                GenerateInputData(dataSizePerMap, numSpillsPerMap, numMapsPerHost, masterConf);
                // configure job for sorting
                JobConf job = new JobConf(masterConf, typeof(ThreadedMapBenchmark));
                job.SetJobName("threaded-map-benchmark-unspilled");
                job.SetJarByClass(typeof(ThreadedMapBenchmark));
                job.SetInputFormat(typeof(SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat
                                          ));
                job.SetOutputFormat(typeof(SequenceFileOutputFormat));
                job.SetOutputKeyClass(typeof(BytesWritable));
                job.SetOutputValueClass(typeof(BytesWritable));
                job.SetMapperClass(typeof(IdentityMapper));
                job.SetReducerClass(typeof(IdentityReducer));
                FileInputFormat.AddInputPath(job, InputDir);
                FileOutputFormat.SetOutputPath(job, OutputDir);
                JobClient     client  = new JobClient(job);
                ClusterStatus cluster = client.GetClusterStatus();
                job.SetNumMapTasks(numMapsPerHost * cluster.GetTaskTrackers());
                job.SetNumReduceTasks(1);
                // set mapreduce.task.io.sort.mb to avoid spill
                int ioSortMb = (int)Math.Ceil(Factor * dataSizePerMap);
                job.Set(JobContext.IoSortMb, ioSortMb.ToString());
                fs = FileSystem.Get(job);
                Log.Info("Running sort with 1 spill per map");
                long startTime = Runtime.CurrentTimeMillis();
                JobClient.RunJob(job);
                long endTime = Runtime.CurrentTimeMillis();
                Log.Info("Total time taken : " + (endTime - startTime).ToString() + " millisec");
                fs.Delete(OutputDir, true);
                // set mapreduce.task.io.sort.mb to have multiple spills
                JobConf spilledJob = new JobConf(job, typeof(ThreadedMapBenchmark));
                ioSortMb = (int)Math.Ceil(Factor * Math.Ceil((double)dataSizePerMap / numSpillsPerMap
                                                             ));
                spilledJob.Set(JobContext.IoSortMb, ioSortMb.ToString());
                spilledJob.SetJobName("threaded-map-benchmark-spilled");
                spilledJob.SetJarByClass(typeof(ThreadedMapBenchmark));
                Log.Info("Running sort with " + numSpillsPerMap + " spills per map");
                startTime = Runtime.CurrentTimeMillis();
                JobClient.RunJob(spilledJob);
                endTime = Runtime.CurrentTimeMillis();
                Log.Info("Total time taken : " + (endTime - startTime).ToString() + " millisec");
            }
            finally
            {
                if (fs != null)
                {
                    fs.Delete(BaseDir, true);
                }
            }
            return(0);
        }
예제 #8
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, Path sortInput, Path sortOutput
                                              )
            {
                FileSystem inputfs   = sortInput.GetFileSystem(defaults);
                FileSystem outputfs  = sortOutput.GetFileSystem(defaults);
                FileSystem defaultfs = FileSystem.Get(defaults);
                JobConf    jobConf   = new JobConf(defaults, typeof(SortValidator.RecordStatsChecker));

                jobConf.SetJobName("sortvalidate-recordstats-checker");
                int noSortReduceTasks = outputfs.ListStatus(sortOutput, sortPathsFilter).Length;

                jobConf.SetInt(SortReduces, noSortReduceTasks);
                int noSortInputpaths = inputfs.ListStatus(sortInput).Length;

                jobConf.SetInputFormat(typeof(SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat
                                              ));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(IntWritable));
                jobConf.SetOutputValueClass(typeof(SortValidator.RecordStatsChecker.RecordStatsWritable
                                                   ));
                jobConf.SetMapperClass(typeof(SortValidator.RecordStatsChecker.Map));
                jobConf.SetCombinerClass(typeof(SortValidator.RecordStatsChecker.Reduce));
                jobConf.SetReducerClass(typeof(SortValidator.RecordStatsChecker.Reduce));
                jobConf.SetNumMapTasks(noSortReduceTasks);
                jobConf.SetNumReduceTasks(1);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path outputPath = new Path(new Path("/tmp", "sortvalidate"), UUID.RandomUUID().ToString
                                               ());

                if (defaultfs.Exists(outputPath))
                {
                    defaultfs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordStatsChecker: Validate sort "
                                             + "from " + inputPaths[0] + " (" + noSortInputpaths + " files), " + inputPaths[
                                                 1] + " (" + noSortReduceTasks + " files) into " + FileOutputFormat.GetOutputPath
                                                 (jobConf) + " with 1 reducer.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                try
                {
                    DateTime end_time = new DateTime();
                    System.Console.Out.WriteLine("Job ended: " + end_time);
                    System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                        ()) / 1000 + " seconds.");
                    // Check to ensure that the statistics of the
                    // framework's sort-input and sort-output match
                    SequenceFile.Reader stats = new SequenceFile.Reader(defaultfs, new Path(outputPath
                                                                                            , "part-00000"), defaults);
                    try
                    {
                        IntWritable k1 = new IntWritable();
                        IntWritable k2 = new IntWritable();
                        SortValidator.RecordStatsChecker.RecordStatsWritable v1 = new SortValidator.RecordStatsChecker.RecordStatsWritable
                                                                                      ();
                        SortValidator.RecordStatsChecker.RecordStatsWritable v2 = new SortValidator.RecordStatsChecker.RecordStatsWritable
                                                                                      ();
                        if (!stats.Next(k1, v1))
                        {
                            throw new IOException("Failed to read record #1 from reduce's output");
                        }
                        if (!stats.Next(k2, v2))
                        {
                            throw new IOException("Failed to read record #2 from reduce's output");
                        }
                        if ((v1.GetBytes() != v2.GetBytes()) || (v1.GetRecords() != v2.GetRecords()) || v1
                            .GetChecksum() != v2.GetChecksum())
                        {
                            throw new IOException("(" + v1.GetBytes() + ", " + v1.GetRecords() + ", " + v1.GetChecksum
                                                      () + ") v/s (" + v2.GetBytes() + ", " + v2.GetRecords() + ", " + v2.GetChecksum(
                                                      ) + ")");
                        }
                    }
                    finally
                    {
                        stats.Close();
                    }
                }
                finally
                {
                    defaultfs.Delete(outputPath, true);
                }
            }
예제 #9
0
        /// <exception cref="System.Exception"/>
        public virtual void TestInputPath()
        {
            JobConf jobConf    = new JobConf();
            Path    workingDir = jobConf.GetWorkingDirectory();
            Path    path       = new Path(workingDir, "xx{y" + StringUtils.CommaStr + "z}");

            FileInputFormat.SetInputPaths(jobConf, path);
            Path[] paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(1, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            StringBuilder pathStr = new StringBuilder();

            pathStr.Append(StringUtils.EscapeChar);
            pathStr.Append(StringUtils.EscapeChar);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append('a');
            path = new Path(workingDir, pathStr.ToString());
            FileInputFormat.SetInputPaths(jobConf, path);
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(1, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            pathStr.Length = 0;
            pathStr.Append(StringUtils.EscapeChar);
            pathStr.Append("xx");
            pathStr.Append(StringUtils.EscapeChar);
            path = new Path(workingDir, pathStr.ToString());
            Path path1 = new Path(workingDir, "yy" + StringUtils.CommaStr + "zz");

            FileInputFormat.SetInputPaths(jobConf, path);
            FileInputFormat.AddInputPath(jobConf, path1);
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(2, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            NUnit.Framework.Assert.AreEqual(path1.ToString(), paths[1].ToString());
            FileInputFormat.SetInputPaths(jobConf, path, path1);
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(2, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            NUnit.Framework.Assert.AreEqual(path1.ToString(), paths[1].ToString());
            Path[] input = new Path[] { path, path1 };
            FileInputFormat.SetInputPaths(jobConf, input);
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(2, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            NUnit.Framework.Assert.AreEqual(path1.ToString(), paths[1].ToString());
            pathStr.Length = 0;
            string str1 = "{a{b,c},de}";
            string str2 = "xyz";
            string str3 = "x{y,z}";

            pathStr.Append(str1);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append(str2);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append(str3);
            FileInputFormat.SetInputPaths(jobConf, pathStr.ToString());
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(3, paths.Length);
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str1).ToString(), paths[0].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str2).ToString(), paths[1].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str3).ToString(), paths[2].ToString
                                                ());
            pathStr.Length = 0;
            string str4 = "abc";
            string str5 = "pq{r,s}";

            pathStr.Append(str4);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append(str5);
            FileInputFormat.AddInputPaths(jobConf, pathStr.ToString());
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(5, paths.Length);
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str1).ToString(), paths[0].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str2).ToString(), paths[1].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str3).ToString(), paths[2].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str4).ToString(), paths[3].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str5).ToString(), paths[4].ToString
                                                ());
        }