Example #1
0
        /// <exception cref="System.Exception"/>
        private void RunMapReduce(JobConf conf, IList <string> mapperBadRecords, IList <string
                                                                                        > redBadRecords)
        {
            CreateInput();
            conf.SetJobName("mr");
            conf.SetNumMapTasks(1);
            conf.SetNumReduceTasks(1);
            conf.SetInt(JobContext.TaskTimeout, 30 * 1000);
            SkipBadRecords.SetMapperMaxSkipRecords(conf, long.MaxValue);
            SkipBadRecords.SetReducerMaxSkipGroups(conf, long.MaxValue);
            SkipBadRecords.SetAttemptsToStartSkipping(conf, 0);
            //the no of attempts to successfully complete the task depends
            //on the no of bad records.
            conf.SetMaxMapAttempts(SkipBadRecords.GetAttemptsToStartSkipping(conf) + 1 + mapperBadRecords
                                   .Count);
            conf.SetMaxReduceAttempts(SkipBadRecords.GetAttemptsToStartSkipping(conf) + 1 + redBadRecords
                                      .Count);
            FileInputFormat.SetInputPaths(conf, GetInputDir());
            FileOutputFormat.SetOutputPath(conf, GetOutputDir());
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            RunningJob runningJob = JobClient.RunJob(conf);

            ValidateOutput(conf, runningJob, mapperBadRecords, redBadRecords);
        }
        public virtual void TestSkipBadRecords()
        {
            // test default values
            Configuration conf = new Configuration();

            NUnit.Framework.Assert.AreEqual(2, SkipBadRecords.GetAttemptsToStartSkipping(conf
                                                                                         ));
            NUnit.Framework.Assert.IsTrue(SkipBadRecords.GetAutoIncrMapperProcCount(conf));
            NUnit.Framework.Assert.IsTrue(SkipBadRecords.GetAutoIncrReducerProcCount(conf));
            NUnit.Framework.Assert.AreEqual(0, SkipBadRecords.GetMapperMaxSkipRecords(conf));
            NUnit.Framework.Assert.AreEqual(0, SkipBadRecords.GetReducerMaxSkipGroups(conf),
                                            0);
            NUnit.Framework.Assert.IsNull(SkipBadRecords.GetSkipOutputPath(conf));
            // test setters
            SkipBadRecords.SetAttemptsToStartSkipping(conf, 5);
            SkipBadRecords.SetAutoIncrMapperProcCount(conf, false);
            SkipBadRecords.SetAutoIncrReducerProcCount(conf, false);
            SkipBadRecords.SetMapperMaxSkipRecords(conf, 6L);
            SkipBadRecords.SetReducerMaxSkipGroups(conf, 7L);
            JobConf jc = new JobConf();

            SkipBadRecords.SetSkipOutputPath(jc, new Path("test"));
            // test getters
            NUnit.Framework.Assert.AreEqual(5, SkipBadRecords.GetAttemptsToStartSkipping(conf
                                                                                         ));
            NUnit.Framework.Assert.IsFalse(SkipBadRecords.GetAutoIncrMapperProcCount(conf));
            NUnit.Framework.Assert.IsFalse(SkipBadRecords.GetAutoIncrReducerProcCount(conf));
            NUnit.Framework.Assert.AreEqual(6L, SkipBadRecords.GetMapperMaxSkipRecords(conf));
            NUnit.Framework.Assert.AreEqual(7L, SkipBadRecords.GetReducerMaxSkipGroups(conf),
                                            0);
            NUnit.Framework.Assert.AreEqual("test", SkipBadRecords.GetSkipOutputPath(jc).ToString
                                                ());
        }