/// <exception cref="System.IO.IOException"/>
        private static Path WritePartitionFile <T>(string testname, Configuration conf, T[]
                                                   splits)
            where T : WritableComparable <object>
        {
            FileSystem fs      = FileSystem.GetLocal(conf);
            Path       testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified
                                     (fs);
            Path p = new Path(testdir, testname + "/_partition.lst");

            TotalOrderPartitioner.SetPartitionFile(conf, p);
            conf.SetInt(MRJobConfig.NumReduces, splits.Length + 1);
            SequenceFile.Writer w = null;
            try
            {
                w = SequenceFile.CreateWriter(fs, conf, p, splits[0].GetType(), typeof(NullWritable
                                                                                       ), SequenceFile.CompressionType.None);
                for (int i = 0; i < splits.Length; ++i)
                {
                    w.Append(splits[i], NullWritable.Get());
                }
            }
            finally
            {
                if (null != w)
                {
                    w.Close();
                }
            }
            return(p);
        }
Example #2
0
        /// <summary>Driver for InputSampler from the command line.</summary>
        /// <remarks>
        /// Driver for InputSampler from the command line.
        /// Configures a JobConf instance and calls
        /// <see cref="InputSampler{K, V}.WritePartitionFile{K, V}(Org.Apache.Hadoop.Mapreduce.Job, Sampler{K, V})
        ///     "/>
        /// .
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            Job            job       = Job.GetInstance(GetConf());
            AList <string> otherArgs = new AList <string>();

            InputSampler.Sampler <K, V> sampler = null;
            for (int i = 0; i < args.Length; ++i)
            {
                try
                {
                    if ("-r".Equals(args[i]))
                    {
                        job.SetNumReduceTasks(System.Convert.ToInt32(args[++i]));
                    }
                    else
                    {
                        if ("-inFormat".Equals(args[i]))
                        {
                            job.SetInputFormatClass(Sharpen.Runtime.GetType(args[++i]).AsSubclass <InputFormat
                                                                                                   >());
                        }
                        else
                        {
                            if ("-keyClass".Equals(args[i]))
                            {
                                job.SetMapOutputKeyClass(Sharpen.Runtime.GetType(args[++i]).AsSubclass <WritableComparable
                                                                                                        >());
                            }
                            else
                            {
                                if ("-splitSample".Equals(args[i]))
                                {
                                    int numSamples = System.Convert.ToInt32(args[++i]);
                                    int maxSplits  = System.Convert.ToInt32(args[++i]);
                                    if (0 >= maxSplits)
                                    {
                                        maxSplits = int.MaxValue;
                                    }
                                    sampler = new InputSampler.SplitSampler <K, V>(numSamples, maxSplits);
                                }
                                else
                                {
                                    if ("-splitRandom".Equals(args[i]))
                                    {
                                        double pcnt       = double.ParseDouble(args[++i]);
                                        int    numSamples = System.Convert.ToInt32(args[++i]);
                                        int    maxSplits  = System.Convert.ToInt32(args[++i]);
                                        if (0 >= maxSplits)
                                        {
                                            maxSplits = int.MaxValue;
                                        }
                                        sampler = new InputSampler.RandomSampler <K, V>(pcnt, numSamples, maxSplits);
                                    }
                                    else
                                    {
                                        if ("-splitInterval".Equals(args[i]))
                                        {
                                            double pcnt      = double.ParseDouble(args[++i]);
                                            int    maxSplits = System.Convert.ToInt32(args[++i]);
                                            if (0 >= maxSplits)
                                            {
                                                maxSplits = int.MaxValue;
                                            }
                                            sampler = new InputSampler.IntervalSampler <K, V>(pcnt, maxSplits);
                                        }
                                        else
                                        {
                                            otherArgs.AddItem(args[i]);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                catch (FormatException)
                {
                    System.Console.Out.WriteLine("ERROR: Integer expected instead of " + args[i]);
                    return(PrintUsage());
                }
                catch (IndexOutOfRangeException)
                {
                    System.Console.Out.WriteLine("ERROR: Required parameter missing from " + args[i -
                                                                                                  1]);
                    return(PrintUsage());
                }
            }
            if (job.GetNumReduceTasks() <= 1)
            {
                System.Console.Error.WriteLine("Sampler requires more than one reducer");
                return(PrintUsage());
            }
            if (otherArgs.Count < 2)
            {
                System.Console.Out.WriteLine("ERROR: Wrong number of parameters: ");
                return(PrintUsage());
            }
            if (null == sampler)
            {
                sampler = new InputSampler.RandomSampler <K, V>(0.1, 10000, 10);
            }
            Path outf = new Path(otherArgs.Remove(otherArgs.Count - 1));

            TotalOrderPartitioner.SetPartitionFile(GetConf(), outf);
            foreach (string s in otherArgs)
            {
                FileInputFormat.AddInputPath(job, new Path(s));
            }
            InputSampler.WritePartitionFile <K, V>(job, sampler);
            return(0);
        }