internal BinarySearchNode(TotalOrderPartitioner <K, V> _enclosing, K[] splitPoints , RawComparator <K> comparator) { this._enclosing = _enclosing; this.splitPoints = splitPoints; this.comparator = comparator; }
internal UnsplitTrieNode(TotalOrderPartitioner <K, V> _enclosing, int level, int value ) : base(level) { this._enclosing = _enclosing; this.result = value; }
/// <exception cref="System.IO.IOException"/> private static Path WritePartitionFile <T>(string testname, Configuration conf, T[] splits) where T : WritableComparable <object> { FileSystem fs = FileSystem.GetLocal(conf); Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.SetPartitionFile(conf, p); conf.SetInt(MRJobConfig.NumReduces, splits.Length + 1); SequenceFile.Writer w = null; try { w = SequenceFile.CreateWriter(fs, conf, p, splits[0].GetType(), typeof(NullWritable ), SequenceFile.CompressionType.None); for (int i = 0; i < splits.Length; ++i) { w.Append(splits[i], NullWritable.Get()); } } finally { if (null != w) { w.Close(); } } return(p); }
/// <exception cref="System.Exception"/> public virtual void TestTotalOrderBinarySearch() { TotalOrderPartitioner <Text, NullWritable> partitioner = new TotalOrderPartitioner <Text, NullWritable>(); Configuration conf = new Configuration(); Path p = TestTotalOrderPartitioner.WritePartitionFile <Text>("totalorderbinarysearch" , conf, splitStrings); conf.SetBoolean(TotalOrderPartitioner.NaturalOrder, false); conf.SetClass(MRJobConfig.MapOutputKeyClass, typeof(Text), typeof(object)); try { partitioner.SetConf(conf); NullWritable nw = NullWritable.Get(); foreach (TestTotalOrderPartitioner.Check <Text> chk in testStrings) { NUnit.Framework.Assert.AreEqual(chk.data.ToString(), chk.part, partitioner.GetPartition (chk.data, nw, splitStrings.Length + 1)); } } finally { p.GetFileSystem(conf).Delete(p, true); } }
internal SinglySplitTrieNode(TotalOrderPartitioner <K, V> _enclosing, int level, BinaryComparable [] splitPoints, int lower) : base(level) { this._enclosing = _enclosing; this.lower = lower; this.mySplitPoint = splitPoints[lower]; }
internal LeafTrieNode(TotalOrderPartitioner <K, V> _enclosing, int level, BinaryComparable [] splitPoints, int lower, int upper) : base(level) { this._enclosing = _enclosing; this.lower = lower; this.upper = upper; this.splitPoints = splitPoints; }
/// <exception cref="System.Exception"/> public virtual void TestTotalOrderCustomComparator() { TotalOrderPartitioner <Text, NullWritable> partitioner = new TotalOrderPartitioner <Text, NullWritable>(); Configuration conf = new Configuration(); Text[] revSplitStrings = Arrays.CopyOf(splitStrings, splitStrings.Length); Arrays.Sort(revSplitStrings, new TestTotalOrderPartitioner.ReverseStringComparator ()); Path p = TestTotalOrderPartitioner.WritePartitionFile <Text>("totalordercustomcomparator" , conf, revSplitStrings); conf.SetBoolean(TotalOrderPartitioner.NaturalOrder, false); conf.SetClass(MRJobConfig.MapOutputKeyClass, typeof(Text), typeof(object)); conf.SetClass(MRJobConfig.KeyComparator, typeof(TestTotalOrderPartitioner.ReverseStringComparator ), typeof(RawComparator)); AList <TestTotalOrderPartitioner.Check <Text> > revCheck = new AList <TestTotalOrderPartitioner.Check <Text> >(); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aaaaa"), 9)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aaabb"), 9)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aabbb"), 9)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aaaaa"), 9)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("babbb"), 8)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("baabb"), 8)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("yai"), 1)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("yak"), 1)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("z"), 0)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("ddngo"), 4)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("hi"), 3)); try { partitioner.SetConf(conf); NullWritable nw = NullWritable.Get(); foreach (TestTotalOrderPartitioner.Check <Text> chk in revCheck) { NUnit.Framework.Assert.AreEqual(chk.data.ToString(), chk.part, partitioner.GetPartition (chk.data, nw, splitStrings.Length + 1)); } } finally { p.GetFileSystem(conf).Delete(p, true); } }
/// <summary>Write a partition file for the given job, using the Sampler provided.</summary> /// <remarks> /// Write a partition file for the given job, using the Sampler provided. /// Queries the sampler for a sample keyset, sorts by the output key /// comparator, selects the keys for each rank, and writes to the destination /// returned from /// <see cref="TotalOrderPartitioner{K, V}.GetPartitionFile(Org.Apache.Hadoop.Conf.Configuration) /// "/> /// . /// </remarks> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.Exception"/> public static void WritePartitionFile <K, V>(Job job, InputSampler.Sampler <K, V> sampler ) { // getInputFormat, getOutputKeyComparator Configuration conf = job.GetConfiguration(); InputFormat inf = ReflectionUtils.NewInstance(job.GetInputFormatClass(), conf); int numPartitions = job.GetNumReduceTasks(); K[] samples = (K[])sampler.GetSample(inf, job); Log.Info("Using " + samples.Length + " samples"); RawComparator <K> comparator = (RawComparator <K>)job.GetSortComparator(); Arrays.Sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.GetPartitionFile(conf)); FileSystem fs = dst.GetFileSystem(conf); if (fs.Exists(dst)) { fs.Delete(dst, false); } SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, dst, job.GetMapOutputKeyClass (), typeof(NullWritable)); NullWritable nullValue = NullWritable.Get(); float stepSize = samples.Length / (float)numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.Round(stepSize * i); while (last >= k && comparator.Compare(samples[last], samples[k]) == 0) { ++k; } writer.Append(samples[k], nullValue); last = k; } writer.Close(); }
internal CarriedTrieNodeRef(TotalOrderPartitioner <K, V> _enclosing) { this._enclosing = _enclosing; this.content = null; }
internal InnerTrieNode(TotalOrderPartitioner <K, V> _enclosing, int level) : base(level) { this._enclosing = _enclosing; }
/// <summary>Driver for InputSampler from the command line.</summary> /// <remarks> /// Driver for InputSampler from the command line. /// Configures a JobConf instance and calls /// <see cref="InputSampler{K, V}.WritePartitionFile{K, V}(Org.Apache.Hadoop.Mapreduce.Job, Sampler{K, V}) /// "/> /// . /// </remarks> /// <exception cref="System.Exception"/> public virtual int Run(string[] args) { Job job = Job.GetInstance(GetConf()); AList <string> otherArgs = new AList <string>(); InputSampler.Sampler <K, V> sampler = null; for (int i = 0; i < args.Length; ++i) { try { if ("-r".Equals(args[i])) { job.SetNumReduceTasks(System.Convert.ToInt32(args[++i])); } else { if ("-inFormat".Equals(args[i])) { job.SetInputFormatClass(Sharpen.Runtime.GetType(args[++i]).AsSubclass <InputFormat >()); } else { if ("-keyClass".Equals(args[i])) { job.SetMapOutputKeyClass(Sharpen.Runtime.GetType(args[++i]).AsSubclass <WritableComparable >()); } else { if ("-splitSample".Equals(args[i])) { int numSamples = System.Convert.ToInt32(args[++i]); int maxSplits = System.Convert.ToInt32(args[++i]); if (0 >= maxSplits) { maxSplits = int.MaxValue; } sampler = new InputSampler.SplitSampler <K, V>(numSamples, maxSplits); } else { if ("-splitRandom".Equals(args[i])) { double pcnt = double.ParseDouble(args[++i]); int numSamples = System.Convert.ToInt32(args[++i]); int maxSplits = System.Convert.ToInt32(args[++i]); if (0 >= maxSplits) { maxSplits = int.MaxValue; } sampler = new InputSampler.RandomSampler <K, V>(pcnt, numSamples, maxSplits); } else { if ("-splitInterval".Equals(args[i])) { double pcnt = double.ParseDouble(args[++i]); int maxSplits = System.Convert.ToInt32(args[++i]); if (0 >= maxSplits) { maxSplits = int.MaxValue; } sampler = new InputSampler.IntervalSampler <K, V>(pcnt, maxSplits); } else { otherArgs.AddItem(args[i]); } } } } } } } catch (FormatException) { System.Console.Out.WriteLine("ERROR: Integer expected instead of " + args[i]); return(PrintUsage()); } catch (IndexOutOfRangeException) { System.Console.Out.WriteLine("ERROR: Required parameter missing from " + args[i - 1]); return(PrintUsage()); } } if (job.GetNumReduceTasks() <= 1) { System.Console.Error.WriteLine("Sampler requires more than one reducer"); return(PrintUsage()); } if (otherArgs.Count < 2) { System.Console.Out.WriteLine("ERROR: Wrong number of parameters: "); return(PrintUsage()); } if (null == sampler) { sampler = new InputSampler.RandomSampler <K, V>(0.1, 10000, 10); } Path outf = new Path(otherArgs.Remove(otherArgs.Count - 1)); TotalOrderPartitioner.SetPartitionFile(GetConf(), outf); foreach (string s in otherArgs) { FileInputFormat.AddInputPath(job, new Path(s)); } InputSampler.WritePartitionFile <K, V>(job, sampler); return(0); }