/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { Log.Info("starting"); Job job = Job.GetInstance(GetConf()); Path inputDir = new Path(args[0]); Path outputDir = new Path(args[1]); bool useSimplePartitioner = GetUseSimplePartitioner(job); TeraInputFormat.SetInputPaths(job, inputDir); FileOutputFormat.SetOutputPath(job, outputDir); job.SetJobName("TeraSort"); job.SetJarByClass(typeof(TeraSort)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); job.SetInputFormatClass(typeof(TeraInputFormat)); job.SetOutputFormatClass(typeof(TeraOutputFormat)); if (useSimplePartitioner) { job.SetPartitionerClass(typeof(TeraSort.SimplePartitioner)); } else { long start = Runtime.CurrentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PartitionFilename); URI partitionUri = new URI(partitionFile.ToString() + "#" + TeraInputFormat.PartitionFilename ); try { TeraInputFormat.WritePartitionFile(job, partitionFile); } catch (Exception e) { Log.Error(e.Message); return(-1); } job.AddCacheFile(partitionUri); long end = Runtime.CurrentTimeMillis(); System.Console.Out.WriteLine("Spent " + (end - start) + "ms computing partitions." ); job.SetPartitionerClass(typeof(TeraSort.TotalOrderPartitioner)); } job.GetConfiguration().SetInt("dfs.replication", GetOutputReplication(job)); TeraOutputFormat.SetFinalSync(job, true); int ret = job.WaitForCompletion(true) ? 0 : 1; Log.Info("done"); return(ret); }
/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { Job job = Job.GetInstance(GetConf()); if (args.Length != 2) { Usage(); return(2); } TeraInputFormat.SetInputPaths(job, new Path(args[0])); FileOutputFormat.SetOutputPath(job, new Path(args[1])); job.SetJobName("TeraSum"); job.SetJarByClass(typeof(TeraChecksum)); job.SetMapperClass(typeof(TeraChecksum.ChecksumMapper)); job.SetReducerClass(typeof(TeraChecksum.ChecksumReducer)); job.SetOutputKeyClass(typeof(NullWritable)); job.SetOutputValueClass(typeof(Unsigned16)); // force a single reducer job.SetNumReduceTasks(1); job.SetInputFormatClass(typeof(TeraInputFormat)); return(job.WaitForCompletion(true) ? 0 : 1); }
/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { Job job = Job.GetInstance(GetConf()); if (args.Length != 2) { Usage(); return(1); } TeraInputFormat.SetInputPaths(job, new Path(args[0])); FileOutputFormat.SetOutputPath(job, new Path(args[1])); job.SetJobName("TeraValidate"); job.SetJarByClass(typeof(TeraValidate)); job.SetMapperClass(typeof(TeraValidate.ValidateMapper)); job.SetReducerClass(typeof(TeraValidate.ValidateReducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); // force a single reducer job.SetNumReduceTasks(1); // force a single split FileInputFormat.SetMinInputSplitSize(job, long.MaxValue); job.SetInputFormatClass(typeof(TeraInputFormat)); return(job.WaitForCompletion(true) ? 0 : 1); }