/// <summary>Create an Aggregate based map/reduce job.</summary> /// <param name="args"> /// the arguments used for job creation. Generic hadoop /// arguments are accepted. /// </param> /// <param name="caller">the the caller class.</param> /// <returns>a JobConf object ready for submission.</returns> /// <exception cref="System.IO.IOException"/> /// <seealso cref="Org.Apache.Hadoop.Util.GenericOptionsParser"/> public static JobConf CreateValueAggregatorJob(string[] args, Type caller) { Configuration conf = new Configuration(); GenericOptionsParser genericParser = new GenericOptionsParser(conf, args); args = genericParser.GetRemainingArgs(); if (args.Length < 2) { System.Console.Out.WriteLine("usage: inputDirs outDir " + "[numOfReducer [textinputformat|seq [specfile [jobName]]]]" ); GenericOptionsParser.PrintGenericCommandUsage(System.Console.Out); System.Environment.Exit(1); } string inputDir = args[0]; string outputDir = args[1]; int numOfReducers = 1; if (args.Length > 2) { numOfReducers = System.Convert.ToInt32(args[2]); } Type theInputFormat = typeof(TextInputFormat); if (args.Length > 3 && args[3].CompareToIgnoreCase("textinputformat") == 0) { theInputFormat = typeof(TextInputFormat); } else { theInputFormat = typeof(SequenceFileInputFormat); } Path specFile = null; if (args.Length > 4) { specFile = new Path(args[4]); } string jobName = string.Empty; if (args.Length > 5) { jobName = args[5]; } JobConf theJob = new JobConf(conf); if (specFile != null) { theJob.AddResource(specFile); } string userJarFile = theJob.Get("user.jar.file"); if (userJarFile == null) { theJob.SetJarByClass(caller != null ? caller : typeof(ValueAggregatorJob)); } else { theJob.SetJar(userJarFile); } theJob.SetJobName("ValueAggregatorJob: " + jobName); FileInputFormat.AddInputPaths(theJob, inputDir); theJob.SetInputFormat(theInputFormat); theJob.SetMapperClass(typeof(ValueAggregatorMapper)); FileOutputFormat.SetOutputPath(theJob, new Path(outputDir)); theJob.SetOutputFormat(typeof(TextOutputFormat)); theJob.SetMapOutputKeyClass(typeof(Text)); theJob.SetMapOutputValueClass(typeof(Text)); theJob.SetOutputKeyClass(typeof(Text)); theJob.SetOutputValueClass(typeof(Text)); theJob.SetReducerClass(typeof(ValueAggregatorReducer)); theJob.SetCombinerClass(typeof(ValueAggregatorCombiner)); theJob.SetNumMapTasks(1); theJob.SetNumReduceTasks(numOfReducers); return(theJob); }