예제 #1
0
        public virtual void TestCombiner()
        {
            if (!new FilePath(TestRootDir).Mkdirs())
            {
                throw new RuntimeException("Could not create test dir: " + TestRootDir);
            }
            FilePath @in = new FilePath(TestRootDir, "input");

            if ([email protected]())
            {
                throw new RuntimeException("Could not create test dir: " + @in);
            }
            FilePath    @out = new FilePath(TestRootDir, "output");
            PrintWriter pw   = new PrintWriter(new FileWriter(new FilePath(@in, "data.txt")));

            pw.WriteLine("A|a,1");
            pw.WriteLine("A|b,2");
            pw.WriteLine("B|a,3");
            pw.WriteLine("B|b,4");
            pw.WriteLine("B|c,5");
            pw.Close();
            JobConf job = new JobConf();

            job.Set("mapreduce.framework.name", "local");
            TextInputFormat.SetInputPaths(job, new Path(@in.GetPath()));
            TextOutputFormat.SetOutputPath(job, new Path(@out.GetPath()));
            job.SetMapperClass(typeof(TestOldCombinerGrouping.Map));
            job.SetReducerClass(typeof(TestOldCombinerGrouping.Reduce));
            job.SetInputFormat(typeof(TextInputFormat));
            job.SetMapOutputKeyClass(typeof(Text));
            job.SetMapOutputValueClass(typeof(LongWritable));
            job.SetOutputFormat(typeof(TextOutputFormat));
            job.SetOutputValueGroupingComparator(typeof(TestOldCombinerGrouping.GroupComparator
                                                        ));
            job.SetCombinerClass(typeof(TestOldCombinerGrouping.Combiner));
            job.SetCombinerKeyGroupingComparator(typeof(TestOldCombinerGrouping.GroupComparator
                                                        ));
            job.SetInt("min.num.spills.for.combine", 0);
            JobClient  client     = new JobClient(job);
            RunningJob runningJob = client.SubmitJob(job);

            runningJob.WaitForCompletion();
            if (runningJob.IsSuccessful())
            {
                Counters counters             = runningJob.GetCounters();
                long     combinerInputRecords = counters.GetGroup("org.apache.hadoop.mapreduce.TaskCounter"
                                                                  ).GetCounter("COMBINE_INPUT_RECORDS");
                long combinerOutputRecords = counters.GetGroup("org.apache.hadoop.mapreduce.TaskCounter"
                                                               ).GetCounter("COMBINE_OUTPUT_RECORDS");
                NUnit.Framework.Assert.IsTrue(combinerInputRecords > 0);
                NUnit.Framework.Assert.IsTrue(combinerInputRecords > combinerOutputRecords);
                BufferedReader br = new BufferedReader(new FileReader(new FilePath(@out, "part-00000"
                                                                                   )));
                ICollection <string> output = new HashSet <string>();
                string line = br.ReadLine();
                NUnit.Framework.Assert.IsNotNull(line);
                output.AddItem(Sharpen.Runtime.Substring(line, 0, 1) + Sharpen.Runtime.Substring(
                                   line, 4, 5));
                line = br.ReadLine();
                NUnit.Framework.Assert.IsNotNull(line);
                output.AddItem(Sharpen.Runtime.Substring(line, 0, 1) + Sharpen.Runtime.Substring(
                                   line, 4, 5));
                line = br.ReadLine();
                NUnit.Framework.Assert.IsNull(line);
                br.Close();
                ICollection <string> expected = new HashSet <string>();
                expected.AddItem("A2");
                expected.AddItem("B5");
                NUnit.Framework.Assert.AreEqual(expected, output);
            }
            else
            {
                NUnit.Framework.Assert.Fail("Job failed");
            }
        }