Exemplo n.º 1
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            Log.Info("starting");
            Job  job                  = Job.GetInstance(GetConf());
            Path inputDir             = new Path(args[0]);
            Path outputDir            = new Path(args[1]);
            bool useSimplePartitioner = GetUseSimplePartitioner(job);

            TeraInputFormat.SetInputPaths(job, inputDir);
            FileOutputFormat.SetOutputPath(job, outputDir);
            job.SetJobName("TeraSort");
            job.SetJarByClass(typeof(TeraSort));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetInputFormatClass(typeof(TeraInputFormat));
            job.SetOutputFormatClass(typeof(TeraOutputFormat));
            if (useSimplePartitioner)
            {
                job.SetPartitionerClass(typeof(TeraSort.SimplePartitioner));
            }
            else
            {
                long start         = Runtime.CurrentTimeMillis();
                Path partitionFile = new Path(outputDir, TeraInputFormat.PartitionFilename);
                URI  partitionUri  = new URI(partitionFile.ToString() + "#" + TeraInputFormat.PartitionFilename
                                             );
                try
                {
                    TeraInputFormat.WritePartitionFile(job, partitionFile);
                }
                catch (Exception e)
                {
                    Log.Error(e.Message);
                    return(-1);
                }
                job.AddCacheFile(partitionUri);
                long end = Runtime.CurrentTimeMillis();
                System.Console.Out.WriteLine("Spent " + (end - start) + "ms computing partitions."
                                             );
                job.SetPartitionerClass(typeof(TeraSort.TotalOrderPartitioner));
            }
            job.GetConfiguration().SetInt("dfs.replication", GetOutputReplication(job));
            TeraOutputFormat.SetFinalSync(job, true);
            int ret = job.WaitForCompletion(true) ? 0 : 1;

            Log.Info("done");
            return(ret);
        }
Exemplo n.º 2
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            Job job = Job.GetInstance(GetConf());

            if (args.Length != 2)
            {
                Usage();
                return(2);
            }
            TeraInputFormat.SetInputPaths(job, new Path(args[0]));
            FileOutputFormat.SetOutputPath(job, new Path(args[1]));
            job.SetJobName("TeraSum");
            job.SetJarByClass(typeof(TeraChecksum));
            job.SetMapperClass(typeof(TeraChecksum.ChecksumMapper));
            job.SetReducerClass(typeof(TeraChecksum.ChecksumReducer));
            job.SetOutputKeyClass(typeof(NullWritable));
            job.SetOutputValueClass(typeof(Unsigned16));
            // force a single reducer
            job.SetNumReduceTasks(1);
            job.SetInputFormatClass(typeof(TeraInputFormat));
            return(job.WaitForCompletion(true) ? 0 : 1);
        }
Exemplo n.º 3
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            Job job = Job.GetInstance(GetConf());

            if (args.Length != 2)
            {
                Usage();
                return(1);
            }
            TeraInputFormat.SetInputPaths(job, new Path(args[0]));
            FileOutputFormat.SetOutputPath(job, new Path(args[1]));
            job.SetJobName("TeraValidate");
            job.SetJarByClass(typeof(TeraValidate));
            job.SetMapperClass(typeof(TeraValidate.ValidateMapper));
            job.SetReducerClass(typeof(TeraValidate.ValidateReducer));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            // force a single reducer
            job.SetNumReduceTasks(1);
            // force a single split
            FileInputFormat.SetMinInputSplitSize(job, long.MaxValue);
            job.SetInputFormatClass(typeof(TeraInputFormat));
            return(job.WaitForCompletion(true) ? 0 : 1);
        }