Exemple #1
0
        /// <summary>Create an Aggregate based map/reduce job.</summary>
        /// <param name="args">
        /// the arguments used for job creation. Generic hadoop
        /// arguments are accepted.
        /// </param>
        /// <param name="caller">the the caller class.</param>
        /// <returns>a JobConf object ready for submission.</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <seealso cref="Org.Apache.Hadoop.Util.GenericOptionsParser"/>
        public static JobConf CreateValueAggregatorJob(string[] args, Type caller)
        {
            Configuration        conf          = new Configuration();
            GenericOptionsParser genericParser = new GenericOptionsParser(conf, args);

            args = genericParser.GetRemainingArgs();
            if (args.Length < 2)
            {
                System.Console.Out.WriteLine("usage: inputDirs outDir " + "[numOfReducer [textinputformat|seq [specfile [jobName]]]]"
                                             );
                GenericOptionsParser.PrintGenericCommandUsage(System.Console.Out);
                System.Environment.Exit(1);
            }
            string inputDir      = args[0];
            string outputDir     = args[1];
            int    numOfReducers = 1;

            if (args.Length > 2)
            {
                numOfReducers = System.Convert.ToInt32(args[2]);
            }
            Type theInputFormat = typeof(TextInputFormat);

            if (args.Length > 3 && args[3].CompareToIgnoreCase("textinputformat") == 0)
            {
                theInputFormat = typeof(TextInputFormat);
            }
            else
            {
                theInputFormat = typeof(SequenceFileInputFormat);
            }
            Path specFile = null;

            if (args.Length > 4)
            {
                specFile = new Path(args[4]);
            }
            string jobName = string.Empty;

            if (args.Length > 5)
            {
                jobName = args[5];
            }
            JobConf theJob = new JobConf(conf);

            if (specFile != null)
            {
                theJob.AddResource(specFile);
            }
            string userJarFile = theJob.Get("user.jar.file");

            if (userJarFile == null)
            {
                theJob.SetJarByClass(caller != null ? caller : typeof(ValueAggregatorJob));
            }
            else
            {
                theJob.SetJar(userJarFile);
            }
            theJob.SetJobName("ValueAggregatorJob: " + jobName);
            FileInputFormat.AddInputPaths(theJob, inputDir);
            theJob.SetInputFormat(theInputFormat);
            theJob.SetMapperClass(typeof(ValueAggregatorMapper));
            FileOutputFormat.SetOutputPath(theJob, new Path(outputDir));
            theJob.SetOutputFormat(typeof(TextOutputFormat));
            theJob.SetMapOutputKeyClass(typeof(Text));
            theJob.SetMapOutputValueClass(typeof(Text));
            theJob.SetOutputKeyClass(typeof(Text));
            theJob.SetOutputValueClass(typeof(Text));
            theJob.SetReducerClass(typeof(ValueAggregatorReducer));
            theJob.SetCombinerClass(typeof(ValueAggregatorCombiner));
            theJob.SetNumMapTasks(1);
            theJob.SetNumReduceTasks(numOfReducers);
            return(theJob);
        }