public virtual void TestSkipBadRecords() { // test default values Configuration conf = new Configuration(); NUnit.Framework.Assert.AreEqual(2, SkipBadRecords.GetAttemptsToStartSkipping(conf )); NUnit.Framework.Assert.IsTrue(SkipBadRecords.GetAutoIncrMapperProcCount(conf)); NUnit.Framework.Assert.IsTrue(SkipBadRecords.GetAutoIncrReducerProcCount(conf)); NUnit.Framework.Assert.AreEqual(0, SkipBadRecords.GetMapperMaxSkipRecords(conf)); NUnit.Framework.Assert.AreEqual(0, SkipBadRecords.GetReducerMaxSkipGroups(conf), 0); NUnit.Framework.Assert.IsNull(SkipBadRecords.GetSkipOutputPath(conf)); // test setters SkipBadRecords.SetAttemptsToStartSkipping(conf, 5); SkipBadRecords.SetAutoIncrMapperProcCount(conf, false); SkipBadRecords.SetAutoIncrReducerProcCount(conf, false); SkipBadRecords.SetMapperMaxSkipRecords(conf, 6L); SkipBadRecords.SetReducerMaxSkipGroups(conf, 7L); JobConf jc = new JobConf(); SkipBadRecords.SetSkipOutputPath(jc, new Path("test")); // test getters NUnit.Framework.Assert.AreEqual(5, SkipBadRecords.GetAttemptsToStartSkipping(conf )); NUnit.Framework.Assert.IsFalse(SkipBadRecords.GetAutoIncrMapperProcCount(conf)); NUnit.Framework.Assert.IsFalse(SkipBadRecords.GetAutoIncrReducerProcCount(conf)); NUnit.Framework.Assert.AreEqual(6L, SkipBadRecords.GetMapperMaxSkipRecords(conf)); NUnit.Framework.Assert.AreEqual(7L, SkipBadRecords.GetReducerMaxSkipGroups(conf), 0); NUnit.Framework.Assert.AreEqual("test", SkipBadRecords.GetSkipOutputPath(jc).ToString ()); }
/// <exception cref="System.IO.IOException"/> private void WriteSkippedRec(KEY key, VALUE value) { if (this.skipWriter == null) { Path skipDir = SkipBadRecords.GetSkipOutputPath(this._enclosing.conf); Path skipFile = new Path(skipDir, this._enclosing.GetTaskID().ToString()); this.skipWriter = SequenceFile.CreateWriter(skipFile.GetFileSystem(this._enclosing .conf), this._enclosing.conf, skipFile, this.keyClass, this.valClass, SequenceFile.CompressionType .Block, this.reporter); } this.skipWriter.Append(key, value); }
/// <exception cref="System.IO.IOException"/> public SkippingReduceValuesIterator(ReduceTask _enclosing, RawKeyValueIterator @in , RawComparator <KEY> comparator, Type keyClass, Type valClass, Configuration conf , Task.TaskReporter reporter, TaskUmbilicalProtocol umbilical) : base(_enclosing) { this._enclosing = _enclosing; this.umbilical = umbilical; this.skipGroupCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedGroups )); this.skipRecCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedRecords )); this.toWriteSkipRecs = this._enclosing.ToWriteSkipRecs() && SkipBadRecords.GetSkipOutputPath (conf) != null; this.keyClass = keyClass; this.valClass = valClass; this.reporter = reporter; this.skipIt = this._enclosing.GetSkipRanges().SkipRangeIterator(); this.MayBeSkip(); }
/// <exception cref="System.Exception"/> private void ValidateOutput(JobConf conf, RunningJob runningJob, IList <string> mapperBadRecords , IList <string> redBadRecords) { Log.Info(runningJob.GetCounters().ToString()); NUnit.Framework.Assert.IsTrue(runningJob.IsSuccessful()); //validate counters Counters counters = runningJob.GetCounters(); NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapSkippedRecords ).GetCounter(), mapperBadRecords.Count); int mapRecs = input.Count - mapperBadRecords.Count; NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapInputRecords) .GetCounter(), mapRecs); NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapOutputRecords ).GetCounter(), mapRecs); int redRecs = mapRecs - redBadRecords.Count; NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedRecords ).GetCounter(), redBadRecords.Count); NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedGroups ).GetCounter(), redBadRecords.Count); NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputGroups ).GetCounter(), redRecs); NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputRecords ).GetCounter(), redRecs); NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceOutputRecords ).GetCounter(), redRecs); //validate skipped records Path skipDir = SkipBadRecords.GetSkipOutputPath(conf); NUnit.Framework.Assert.IsNotNull(skipDir); Path[] skips = FileUtil.Stat2Paths(GetFileSystem().ListStatus(skipDir)); IList <string> mapSkipped = new AList <string>(); IList <string> redSkipped = new AList <string>(); foreach (Path skipPath in skips) { Log.Info("skipPath: " + skipPath); SequenceFile.Reader reader = new SequenceFile.Reader(GetFileSystem(), skipPath, conf ); object key = ReflectionUtils.NewInstance(reader.GetKeyClass(), conf); object value = ReflectionUtils.NewInstance(reader.GetValueClass(), conf); key = reader.Next(key); while (key != null) { value = reader.GetCurrentValue(value); Log.Debug("key:" + key + " value:" + value.ToString()); if (skipPath.GetName().Contains("_r_")) { redSkipped.AddItem(value.ToString()); } else { mapSkipped.AddItem(value.ToString()); } key = reader.Next(key); } reader.Close(); } NUnit.Framework.Assert.IsTrue(mapSkipped.ContainsAll(mapperBadRecords)); NUnit.Framework.Assert.IsTrue(redSkipped.ContainsAll(redBadRecords)); Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(GetOutputDir( ), new Utils.OutputFileUtils.OutputFilesFilter())); IList <string> mapperOutput = GetProcessed(input, mapperBadRecords); Log.Debug("mapperOutput " + mapperOutput.Count); IList <string> reducerOutput = GetProcessed(mapperOutput, redBadRecords); Log.Debug("reducerOutput " + reducerOutput.Count); if (outputFiles.Length > 0) { InputStream @is = GetFileSystem().Open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(@is)); string line = reader.ReadLine(); int counter = 0; while (line != null) { counter++; StringTokenizer tokeniz = new StringTokenizer(line, "\t"); string key = tokeniz.NextToken(); string value = tokeniz.NextToken(); Log.Debug("Output: key:" + key + " value:" + value); NUnit.Framework.Assert.IsTrue(value.Contains("hello")); NUnit.Framework.Assert.IsTrue(reducerOutput.Contains(value)); line = reader.ReadLine(); } reader.Close(); NUnit.Framework.Assert.AreEqual(reducerOutput.Count, counter); } }