/// <exception cref="System.IO.IOException"/> private static Path WritePartitionFile <T>(string testname, Configuration conf, T[] splits) where T : WritableComparable <object> { FileSystem fs = FileSystem.GetLocal(conf); Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.SetPartitionFile(conf, p); conf.SetInt(MRJobConfig.NumReduces, splits.Length + 1); SequenceFile.Writer w = null; try { w = SequenceFile.CreateWriter(fs, conf, p, splits[0].GetType(), typeof(NullWritable ), SequenceFile.CompressionType.None); for (int i = 0; i < splits.Length; ++i) { w.Append(splits[i], NullWritable.Get()); } } finally { if (null != w) { w.Close(); } } return(p); }
/// <summary>Driver for InputSampler from the command line.</summary> /// <remarks> /// Driver for InputSampler from the command line. /// Configures a JobConf instance and calls /// <see cref="InputSampler{K, V}.WritePartitionFile{K, V}(Org.Apache.Hadoop.Mapreduce.Job, Sampler{K, V}) /// "/> /// . /// </remarks> /// <exception cref="System.Exception"/> public virtual int Run(string[] args) { Job job = Job.GetInstance(GetConf()); AList <string> otherArgs = new AList <string>(); InputSampler.Sampler <K, V> sampler = null; for (int i = 0; i < args.Length; ++i) { try { if ("-r".Equals(args[i])) { job.SetNumReduceTasks(System.Convert.ToInt32(args[++i])); } else { if ("-inFormat".Equals(args[i])) { job.SetInputFormatClass(Sharpen.Runtime.GetType(args[++i]).AsSubclass <InputFormat >()); } else { if ("-keyClass".Equals(args[i])) { job.SetMapOutputKeyClass(Sharpen.Runtime.GetType(args[++i]).AsSubclass <WritableComparable >()); } else { if ("-splitSample".Equals(args[i])) { int numSamples = System.Convert.ToInt32(args[++i]); int maxSplits = System.Convert.ToInt32(args[++i]); if (0 >= maxSplits) { maxSplits = int.MaxValue; } sampler = new InputSampler.SplitSampler <K, V>(numSamples, maxSplits); } else { if ("-splitRandom".Equals(args[i])) { double pcnt = double.ParseDouble(args[++i]); int numSamples = System.Convert.ToInt32(args[++i]); int maxSplits = System.Convert.ToInt32(args[++i]); if (0 >= maxSplits) { maxSplits = int.MaxValue; } sampler = new InputSampler.RandomSampler <K, V>(pcnt, numSamples, maxSplits); } else { if ("-splitInterval".Equals(args[i])) { double pcnt = double.ParseDouble(args[++i]); int maxSplits = System.Convert.ToInt32(args[++i]); if (0 >= maxSplits) { maxSplits = int.MaxValue; } sampler = new InputSampler.IntervalSampler <K, V>(pcnt, maxSplits); } else { otherArgs.AddItem(args[i]); } } } } } } } catch (FormatException) { System.Console.Out.WriteLine("ERROR: Integer expected instead of " + args[i]); return(PrintUsage()); } catch (IndexOutOfRangeException) { System.Console.Out.WriteLine("ERROR: Required parameter missing from " + args[i - 1]); return(PrintUsage()); } } if (job.GetNumReduceTasks() <= 1) { System.Console.Error.WriteLine("Sampler requires more than one reducer"); return(PrintUsage()); } if (otherArgs.Count < 2) { System.Console.Out.WriteLine("ERROR: Wrong number of parameters: "); return(PrintUsage()); } if (null == sampler) { sampler = new InputSampler.RandomSampler <K, V>(0.1, 10000, 10); } Path outf = new Path(otherArgs.Remove(otherArgs.Count - 1)); TotalOrderPartitioner.SetPartitionFile(GetConf(), outf); foreach (string s in otherArgs) { FileInputFormat.AddInputPath(job, new Path(s)); } InputSampler.WritePartitionFile <K, V>(job, sampler); return(0); }