Exemplo n.º 1
0
        private static IntWritable DeduceInputFile(JobConf job)
        {
            Path[] inputPaths = FileInputFormat.GetInputPaths(job);
            Path   inputFile  = new Path(job.Get(JobContext.MapInputFile));

            // value == one for sort-input; value == two for sort-output
            return((inputFile.GetParent().Equals(inputPaths[0])) ? sortInput : sortOutput);
        }
Exemplo n.º 2
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces
                                              , Path sortInput, Path sortOutput)
            {
                JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker));

                jobConf.SetJobName("sortvalidate-record-checker");
                jobConf.SetInputFormat(typeof(SequenceFileInputFormat));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(BytesWritable));
                jobConf.SetOutputValueClass(typeof(IntWritable));
                jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map));
                jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce));
                JobClient     client  = new JobClient(jobConf);
                ClusterStatus cluster = client.GetClusterStatus();

                if (noMaps == -1)
                {
                    noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10);
                }
                if (noReduces == -1)
                {
                    noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9);
                    string sortReduces = jobConf.Get(ReducesPerHost);
                    if (sortReduces != null)
                    {
                        noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces);
                    }
                }
                jobConf.SetNumMapTasks(noMaps);
                jobConf.SetNumReduceTasks(noReduces);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path       outputPath = new Path("/tmp/sortvalidate/recordchecker");
                FileSystem fs         = FileSystem.Get(defaults);

                if (fs.Exists(outputPath))
                {
                    fs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster
                                             .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths
                                             [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces
                                             + " reduces.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                DateTime end_time = new DateTime();

                System.Console.Out.WriteLine("Job ended: " + end_time);
                System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                    ()) / 1000 + " seconds.");
            }
Exemplo n.º 3
0
		/// <summary>Runs a MapReduce task, given number of times.</summary>
		/// <remarks>
		/// Runs a MapReduce task, given number of times. The input to each run
		/// is the same file.
		/// </remarks>
		/// <exception cref="System.IO.IOException"/>
		private AList<long> RunJobInSequence(JobConf masterJobConf, int numRuns)
		{
			Random rand = new Random();
			AList<long> execTimes = new AList<long>();
			for (int i = 0; i < numRuns; i++)
			{
				// create a new job conf every time, reusing same object does not work 
				JobConf jobConf = new JobConf(masterJobConf);
				// reset the job jar because the copy constructor doesn't
				jobConf.SetJar(masterJobConf.GetJar());
				// give a new random name to output of the mapred tasks
				FileOutputFormat.SetOutputPath(jobConf, new Path(OutputDir, "output_" + rand.Next
					()));
				Log.Info("Running job " + i + ":" + " input=" + FileInputFormat.GetInputPaths(jobConf
					)[0] + " output=" + FileOutputFormat.GetOutputPath(jobConf));
				// run the mapred task now 
				long curTime = Runtime.CurrentTimeMillis();
				JobClient.RunJob(jobConf);
				execTimes.AddItem(Runtime.CurrentTimeMillis() - curTime);
			}
			return execTimes;
		}
Exemplo n.º 4
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, Path sortInput, Path sortOutput
                                              )
            {
                FileSystem inputfs   = sortInput.GetFileSystem(defaults);
                FileSystem outputfs  = sortOutput.GetFileSystem(defaults);
                FileSystem defaultfs = FileSystem.Get(defaults);
                JobConf    jobConf   = new JobConf(defaults, typeof(SortValidator.RecordStatsChecker));

                jobConf.SetJobName("sortvalidate-recordstats-checker");
                int noSortReduceTasks = outputfs.ListStatus(sortOutput, sortPathsFilter).Length;

                jobConf.SetInt(SortReduces, noSortReduceTasks);
                int noSortInputpaths = inputfs.ListStatus(sortInput).Length;

                jobConf.SetInputFormat(typeof(SortValidator.RecordStatsChecker.NonSplitableSequenceFileInputFormat
                                              ));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(IntWritable));
                jobConf.SetOutputValueClass(typeof(SortValidator.RecordStatsChecker.RecordStatsWritable
                                                   ));
                jobConf.SetMapperClass(typeof(SortValidator.RecordStatsChecker.Map));
                jobConf.SetCombinerClass(typeof(SortValidator.RecordStatsChecker.Reduce));
                jobConf.SetReducerClass(typeof(SortValidator.RecordStatsChecker.Reduce));
                jobConf.SetNumMapTasks(noSortReduceTasks);
                jobConf.SetNumReduceTasks(1);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path outputPath = new Path(new Path("/tmp", "sortvalidate"), UUID.RandomUUID().ToString
                                               ());

                if (defaultfs.Exists(outputPath))
                {
                    defaultfs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordStatsChecker: Validate sort "
                                             + "from " + inputPaths[0] + " (" + noSortInputpaths + " files), " + inputPaths[
                                                 1] + " (" + noSortReduceTasks + " files) into " + FileOutputFormat.GetOutputPath
                                                 (jobConf) + " with 1 reducer.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                try
                {
                    DateTime end_time = new DateTime();
                    System.Console.Out.WriteLine("Job ended: " + end_time);
                    System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                        ()) / 1000 + " seconds.");
                    // Check to ensure that the statistics of the
                    // framework's sort-input and sort-output match
                    SequenceFile.Reader stats = new SequenceFile.Reader(defaultfs, new Path(outputPath
                                                                                            , "part-00000"), defaults);
                    try
                    {
                        IntWritable k1 = new IntWritable();
                        IntWritable k2 = new IntWritable();
                        SortValidator.RecordStatsChecker.RecordStatsWritable v1 = new SortValidator.RecordStatsChecker.RecordStatsWritable
                                                                                      ();
                        SortValidator.RecordStatsChecker.RecordStatsWritable v2 = new SortValidator.RecordStatsChecker.RecordStatsWritable
                                                                                      ();
                        if (!stats.Next(k1, v1))
                        {
                            throw new IOException("Failed to read record #1 from reduce's output");
                        }
                        if (!stats.Next(k2, v2))
                        {
                            throw new IOException("Failed to read record #2 from reduce's output");
                        }
                        if ((v1.GetBytes() != v2.GetBytes()) || (v1.GetRecords() != v2.GetRecords()) || v1
                            .GetChecksum() != v2.GetChecksum())
                        {
                            throw new IOException("(" + v1.GetBytes() + ", " + v1.GetRecords() + ", " + v1.GetChecksum
                                                      () + ") v/s (" + v2.GetBytes() + ", " + v2.GetRecords() + ", " + v2.GetChecksum(
                                                      ) + ")");
                        }
                    }
                    finally
                    {
                        stats.Close();
                    }
                }
                finally
                {
                    defaultfs.Delete(outputPath, true);
                }
            }
Exemplo n.º 5
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] argv)
        {
            JobConf job = new JobConf(GetConf());

            job.SetJarByClass(typeof(GenericMRLoadGenerator));
            job.SetMapperClass(typeof(GenericMRLoadGenerator.SampleMapper));
            job.SetReducerClass(typeof(GenericMRLoadGenerator.SampleReducer));
            if (!ParseArgs(argv, job))
            {
                return(-1);
            }
            if (null == FileOutputFormat.GetOutputPath(job))
            {
                // No output dir? No writes
                job.SetOutputFormat(typeof(NullOutputFormat));
            }
            if (0 == FileInputFormat.GetInputPaths(job).Length)
            {
                // No input dir? Generate random data
                System.Console.Error.WriteLine("No input path; ignoring InputFormat");
                ConfRandom(job);
            }
            else
            {
                if (null != job.GetClass(GenericMRLoadGenerator.IndirectInputFormat, null))
                {
                    // specified IndirectInputFormat? Build src list
                    JobClient jClient        = new JobClient(job);
                    Path      tmpDir         = new Path(jClient.GetFs().GetHomeDirectory(), ".staging");
                    Random    r              = new Random();
                    Path      indirInputFile = new Path(tmpDir, Sharpen.Extensions.ToString(r.Next(int.MaxValue
                                                                                                   ), 36) + "_files");
                    job.Set(GenericMRLoadGenerator.IndirectInputFile, indirInputFile.ToString());
                    SequenceFile.Writer writer = SequenceFile.CreateWriter(tmpDir.GetFileSystem(job),
                                                                           job, indirInputFile, typeof(LongWritable), typeof(Text), SequenceFile.CompressionType
                                                                           .None);
                    try
                    {
                        foreach (Path p in FileInputFormat.GetInputPaths(job))
                        {
                            FileSystem   fs        = p.GetFileSystem(job);
                            Stack <Path> pathstack = new Stack <Path>();
                            pathstack.Push(p);
                            while (!pathstack.Empty())
                            {
                                foreach (FileStatus stat in fs.ListStatus(pathstack.Pop()))
                                {
                                    if (stat.IsDirectory())
                                    {
                                        if (!stat.GetPath().GetName().StartsWith("_"))
                                        {
                                            pathstack.Push(stat.GetPath());
                                        }
                                    }
                                    else
                                    {
                                        writer.Sync();
                                        writer.Append(new LongWritable(stat.GetLen()), new Text(stat.GetPath().ToUri().ToString
                                                                                                    ()));
                                    }
                                }
                            }
                        }
                    }
                    finally
                    {
                        writer.Close();
                    }
                }
            }
            DateTime startTime = new DateTime();

            System.Console.Out.WriteLine("Job started: " + startTime);
            JobClient.RunJob(job);
            DateTime endTime = new DateTime();

            System.Console.Out.WriteLine("Job ended: " + endTime);
            System.Console.Out.WriteLine("The job took " + (endTime.GetTime() - startTime.GetTime
                                                                ()) / 1000 + " seconds.");
            return(0);
        }
Exemplo n.º 6
0
        /// <exception cref="System.Exception"/>
        public virtual void TestInputPath()
        {
            JobConf jobConf    = new JobConf();
            Path    workingDir = jobConf.GetWorkingDirectory();
            Path    path       = new Path(workingDir, "xx{y" + StringUtils.CommaStr + "z}");

            FileInputFormat.SetInputPaths(jobConf, path);
            Path[] paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(1, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            StringBuilder pathStr = new StringBuilder();

            pathStr.Append(StringUtils.EscapeChar);
            pathStr.Append(StringUtils.EscapeChar);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append('a');
            path = new Path(workingDir, pathStr.ToString());
            FileInputFormat.SetInputPaths(jobConf, path);
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(1, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            pathStr.Length = 0;
            pathStr.Append(StringUtils.EscapeChar);
            pathStr.Append("xx");
            pathStr.Append(StringUtils.EscapeChar);
            path = new Path(workingDir, pathStr.ToString());
            Path path1 = new Path(workingDir, "yy" + StringUtils.CommaStr + "zz");

            FileInputFormat.SetInputPaths(jobConf, path);
            FileInputFormat.AddInputPath(jobConf, path1);
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(2, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            NUnit.Framework.Assert.AreEqual(path1.ToString(), paths[1].ToString());
            FileInputFormat.SetInputPaths(jobConf, path, path1);
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(2, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            NUnit.Framework.Assert.AreEqual(path1.ToString(), paths[1].ToString());
            Path[] input = new Path[] { path, path1 };
            FileInputFormat.SetInputPaths(jobConf, input);
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(2, paths.Length);
            NUnit.Framework.Assert.AreEqual(path.ToString(), paths[0].ToString());
            NUnit.Framework.Assert.AreEqual(path1.ToString(), paths[1].ToString());
            pathStr.Length = 0;
            string str1 = "{a{b,c},de}";
            string str2 = "xyz";
            string str3 = "x{y,z}";

            pathStr.Append(str1);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append(str2);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append(str3);
            FileInputFormat.SetInputPaths(jobConf, pathStr.ToString());
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(3, paths.Length);
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str1).ToString(), paths[0].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str2).ToString(), paths[1].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str3).ToString(), paths[2].ToString
                                                ());
            pathStr.Length = 0;
            string str4 = "abc";
            string str5 = "pq{r,s}";

            pathStr.Append(str4);
            pathStr.Append(StringUtils.Comma);
            pathStr.Append(str5);
            FileInputFormat.AddInputPaths(jobConf, pathStr.ToString());
            paths = FileInputFormat.GetInputPaths(jobConf);
            NUnit.Framework.Assert.AreEqual(5, paths.Length);
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str1).ToString(), paths[0].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str2).ToString(), paths[1].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str3).ToString(), paths[2].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str4).ToString(), paths[3].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual(new Path(workingDir, str5).ToString(), paths[4].ToString
                                                ());
        }