Example #1
0
 internal BinarySearchNode(TotalOrderPartitioner <K, V> _enclosing, K[] splitPoints
                           , RawComparator <K> comparator)
 {
     this._enclosing  = _enclosing;
     this.splitPoints = splitPoints;
     this.comparator  = comparator;
 }
Example #2
0
 internal UnsplitTrieNode(TotalOrderPartitioner <K, V> _enclosing, int level, int value
                          )
     : base(level)
 {
     this._enclosing = _enclosing;
     this.result     = value;
 }
        /// <exception cref="System.IO.IOException"/>
        private static Path WritePartitionFile <T>(string testname, Configuration conf, T[]
                                                   splits)
            where T : WritableComparable <object>
        {
            FileSystem fs      = FileSystem.GetLocal(conf);
            Path       testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified
                                     (fs);
            Path p = new Path(testdir, testname + "/_partition.lst");

            TotalOrderPartitioner.SetPartitionFile(conf, p);
            conf.SetInt(MRJobConfig.NumReduces, splits.Length + 1);
            SequenceFile.Writer w = null;
            try
            {
                w = SequenceFile.CreateWriter(fs, conf, p, splits[0].GetType(), typeof(NullWritable
                                                                                       ), SequenceFile.CompressionType.None);
                for (int i = 0; i < splits.Length; ++i)
                {
                    w.Append(splits[i], NullWritable.Get());
                }
            }
            finally
            {
                if (null != w)
                {
                    w.Close();
                }
            }
            return(p);
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestTotalOrderBinarySearch()
        {
            TotalOrderPartitioner <Text, NullWritable> partitioner = new TotalOrderPartitioner
                                                                     <Text, NullWritable>();
            Configuration conf = new Configuration();
            Path          p    = TestTotalOrderPartitioner.WritePartitionFile <Text>("totalorderbinarysearch"
                                                                                     , conf, splitStrings);

            conf.SetBoolean(TotalOrderPartitioner.NaturalOrder, false);
            conf.SetClass(MRJobConfig.MapOutputKeyClass, typeof(Text), typeof(object));
            try
            {
                partitioner.SetConf(conf);
                NullWritable nw = NullWritable.Get();
                foreach (TestTotalOrderPartitioner.Check <Text> chk in testStrings)
                {
                    NUnit.Framework.Assert.AreEqual(chk.data.ToString(), chk.part, partitioner.GetPartition
                                                        (chk.data, nw, splitStrings.Length + 1));
                }
            }
            finally
            {
                p.GetFileSystem(conf).Delete(p, true);
            }
        }
Example #5
0
 internal SinglySplitTrieNode(TotalOrderPartitioner <K, V> _enclosing, int level, BinaryComparable
                              [] splitPoints, int lower)
     : base(level)
 {
     this._enclosing   = _enclosing;
     this.lower        = lower;
     this.mySplitPoint = splitPoints[lower];
 }
Example #6
0
 internal LeafTrieNode(TotalOrderPartitioner <K, V> _enclosing, int level, BinaryComparable
                       [] splitPoints, int lower, int upper)
     : base(level)
 {
     this._enclosing  = _enclosing;
     this.lower       = lower;
     this.upper       = upper;
     this.splitPoints = splitPoints;
 }
        /// <exception cref="System.Exception"/>
        public virtual void TestTotalOrderCustomComparator()
        {
            TotalOrderPartitioner <Text, NullWritable> partitioner = new TotalOrderPartitioner
                                                                     <Text, NullWritable>();
            Configuration conf = new Configuration();

            Text[] revSplitStrings = Arrays.CopyOf(splitStrings, splitStrings.Length);
            Arrays.Sort(revSplitStrings, new TestTotalOrderPartitioner.ReverseStringComparator
                            ());
            Path p = TestTotalOrderPartitioner.WritePartitionFile <Text>("totalordercustomcomparator"
                                                                         , conf, revSplitStrings);

            conf.SetBoolean(TotalOrderPartitioner.NaturalOrder, false);
            conf.SetClass(MRJobConfig.MapOutputKeyClass, typeof(Text), typeof(object));
            conf.SetClass(MRJobConfig.KeyComparator, typeof(TestTotalOrderPartitioner.ReverseStringComparator
                                                            ), typeof(RawComparator));
            AList <TestTotalOrderPartitioner.Check <Text> > revCheck = new AList <TestTotalOrderPartitioner.Check
                                                                                  <Text> >();

            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aaaaa"), 9));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aaabb"), 9));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aabbb"), 9));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aaaaa"), 9));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("babbb"), 8));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("baabb"), 8));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("yai"), 1));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("yak"), 1));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("z"), 0));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("ddngo"), 4));
            revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("hi"), 3));
            try
            {
                partitioner.SetConf(conf);
                NullWritable nw = NullWritable.Get();
                foreach (TestTotalOrderPartitioner.Check <Text> chk in revCheck)
                {
                    NUnit.Framework.Assert.AreEqual(chk.data.ToString(), chk.part, partitioner.GetPartition
                                                        (chk.data, nw, splitStrings.Length + 1));
                }
            }
            finally
            {
                p.GetFileSystem(conf).Delete(p, true);
            }
        }
Example #8
0
        /// <summary>Write a partition file for the given job, using the Sampler provided.</summary>
        /// <remarks>
        /// Write a partition file for the given job, using the Sampler provided.
        /// Queries the sampler for a sample keyset, sorts by the output key
        /// comparator, selects the keys for each rank, and writes to the destination
        /// returned from
        /// <see cref="TotalOrderPartitioner{K, V}.GetPartitionFile(Org.Apache.Hadoop.Conf.Configuration)
        ///     "/>
        /// .
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.Exception"/>
        public static void WritePartitionFile <K, V>(Job job, InputSampler.Sampler <K, V> sampler
                                                     )
        {
            // getInputFormat, getOutputKeyComparator
            Configuration conf          = job.GetConfiguration();
            InputFormat   inf           = ReflectionUtils.NewInstance(job.GetInputFormatClass(), conf);
            int           numPartitions = job.GetNumReduceTasks();

            K[] samples = (K[])sampler.GetSample(inf, job);
            Log.Info("Using " + samples.Length + " samples");
            RawComparator <K> comparator = (RawComparator <K>)job.GetSortComparator();

            Arrays.Sort(samples, comparator);
            Path       dst = new Path(TotalOrderPartitioner.GetPartitionFile(conf));
            FileSystem fs  = dst.GetFileSystem(conf);

            if (fs.Exists(dst))
            {
                fs.Delete(dst, false);
            }
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, dst, job.GetMapOutputKeyClass
                                                                       (), typeof(NullWritable));
            NullWritable nullValue = NullWritable.Get();
            float        stepSize  = samples.Length / (float)numPartitions;
            int          last      = -1;

            for (int i = 1; i < numPartitions; ++i)
            {
                int k = Math.Round(stepSize * i);
                while (last >= k && comparator.Compare(samples[last], samples[k]) == 0)
                {
                    ++k;
                }
                writer.Append(samples[k], nullValue);
                last = k;
            }
            writer.Close();
        }
Example #9
0
 internal CarriedTrieNodeRef(TotalOrderPartitioner <K, V> _enclosing)
 {
     this._enclosing = _enclosing;
     this.content    = null;
 }
Example #10
0
 internal InnerTrieNode(TotalOrderPartitioner <K, V> _enclosing, int level)
     : base(level)
 {
     this._enclosing = _enclosing;
 }
Example #11
0
        /// <summary>Driver for InputSampler from the command line.</summary>
        /// <remarks>
        /// Driver for InputSampler from the command line.
        /// Configures a JobConf instance and calls
        /// <see cref="InputSampler{K, V}.WritePartitionFile{K, V}(Org.Apache.Hadoop.Mapreduce.Job, Sampler{K, V})
        ///     "/>
        /// .
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            Job            job       = Job.GetInstance(GetConf());
            AList <string> otherArgs = new AList <string>();

            InputSampler.Sampler <K, V> sampler = null;
            for (int i = 0; i < args.Length; ++i)
            {
                try
                {
                    if ("-r".Equals(args[i]))
                    {
                        job.SetNumReduceTasks(System.Convert.ToInt32(args[++i]));
                    }
                    else
                    {
                        if ("-inFormat".Equals(args[i]))
                        {
                            job.SetInputFormatClass(Sharpen.Runtime.GetType(args[++i]).AsSubclass <InputFormat
                                                                                                   >());
                        }
                        else
                        {
                            if ("-keyClass".Equals(args[i]))
                            {
                                job.SetMapOutputKeyClass(Sharpen.Runtime.GetType(args[++i]).AsSubclass <WritableComparable
                                                                                                        >());
                            }
                            else
                            {
                                if ("-splitSample".Equals(args[i]))
                                {
                                    int numSamples = System.Convert.ToInt32(args[++i]);
                                    int maxSplits  = System.Convert.ToInt32(args[++i]);
                                    if (0 >= maxSplits)
                                    {
                                        maxSplits = int.MaxValue;
                                    }
                                    sampler = new InputSampler.SplitSampler <K, V>(numSamples, maxSplits);
                                }
                                else
                                {
                                    if ("-splitRandom".Equals(args[i]))
                                    {
                                        double pcnt       = double.ParseDouble(args[++i]);
                                        int    numSamples = System.Convert.ToInt32(args[++i]);
                                        int    maxSplits  = System.Convert.ToInt32(args[++i]);
                                        if (0 >= maxSplits)
                                        {
                                            maxSplits = int.MaxValue;
                                        }
                                        sampler = new InputSampler.RandomSampler <K, V>(pcnt, numSamples, maxSplits);
                                    }
                                    else
                                    {
                                        if ("-splitInterval".Equals(args[i]))
                                        {
                                            double pcnt      = double.ParseDouble(args[++i]);
                                            int    maxSplits = System.Convert.ToInt32(args[++i]);
                                            if (0 >= maxSplits)
                                            {
                                                maxSplits = int.MaxValue;
                                            }
                                            sampler = new InputSampler.IntervalSampler <K, V>(pcnt, maxSplits);
                                        }
                                        else
                                        {
                                            otherArgs.AddItem(args[i]);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                catch (FormatException)
                {
                    System.Console.Out.WriteLine("ERROR: Integer expected instead of " + args[i]);
                    return(PrintUsage());
                }
                catch (IndexOutOfRangeException)
                {
                    System.Console.Out.WriteLine("ERROR: Required parameter missing from " + args[i -
                                                                                                  1]);
                    return(PrintUsage());
                }
            }
            if (job.GetNumReduceTasks() <= 1)
            {
                System.Console.Error.WriteLine("Sampler requires more than one reducer");
                return(PrintUsage());
            }
            if (otherArgs.Count < 2)
            {
                System.Console.Out.WriteLine("ERROR: Wrong number of parameters: ");
                return(PrintUsage());
            }
            if (null == sampler)
            {
                sampler = new InputSampler.RandomSampler <K, V>(0.1, 10000, 10);
            }
            Path outf = new Path(otherArgs.Remove(otherArgs.Count - 1));

            TotalOrderPartitioner.SetPartitionFile(GetConf(), outf);
            foreach (string s in otherArgs)
            {
                FileInputFormat.AddInputPath(job, new Path(s));
            }
            InputSampler.WritePartitionFile <K, V>(job, sampler);
            return(0);
        }