public virtual void TestDefaultCollect() { //mock creation Task.TaskReporter mockTaskReporter = Org.Mockito.Mockito.Mock <Task.TaskReporter>( ); IFile.Writer <string, int> mockWriter = Org.Mockito.Mockito.Mock <IFile.Writer>(); Configuration conf = new Configuration(); coc = new Task.CombineOutputCollector <string, int>(outCounter, mockTaskReporter, conf); coc.SetWriter(mockWriter); Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress (); for (int i = 0; i < Task.DefaultCombineRecordsBeforeProgress; i++) { coc.Collect("dummy", i); } Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(1)).Progress (); for (int i_1 = 0; i_1 < Task.DefaultCombineRecordsBeforeProgress; i_1++) { coc.Collect("dummy", i_1); } Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(2)).Progress (); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> private void RunNewReducer <Inkey, Invalue, Outkey, Outvalue>(JobConf job, TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter, RawKeyValueIterator rIter, RawComparator <INKEY> comparator) { System.Type keyClass = typeof(INKEY); System.Type valueClass = typeof(INVALUE); // wrap value iterator to report progress. RawKeyValueIterator rawIter = rIter; rIter = new _RawKeyValueIterator_587(rawIter, reporter); // make a task context so we can get the classes TaskAttemptContext taskContext = new TaskAttemptContextImpl(job, GetTaskID(), reporter ); // make a reducer Reducer <INKEY, INVALUE, OUTKEY, OUTVALUE> reducer = (Reducer <INKEY, INVALUE, OUTKEY , OUTVALUE>)ReflectionUtils.NewInstance(taskContext.GetReducerClass(), job); RecordWriter <OUTKEY, OUTVALUE> trackedRW = new ReduceTask.NewTrackingRecordWriter <OUTKEY, OUTVALUE>(this, taskContext); job.SetBoolean("mapred.skip.on", IsSkipping()); job.SetBoolean(JobContext.SkipRecords, IsSkipping()); Reducer.Context reducerContext = CreateReduceContext(reducer, job, GetTaskID(), rIter , reduceInputKeyCounter, reduceInputValueCounter, trackedRW, committer, reporter , comparator, keyClass, valueClass); try { reducer.Run(reducerContext); } finally { trackedRW.Close(reducerContext); } }
/// <exception cref="System.IO.IOException"/> private void RunOldReducer <Inkey, Invalue, Outkey, Outvalue>(JobConf job, TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter, RawKeyValueIterator rIter, RawComparator <INKEY> comparator) { System.Type keyClass = typeof(INKEY); System.Type valueClass = typeof(INVALUE); Reducer <INKEY, INVALUE, OUTKEY, OUTVALUE> reducer = ReflectionUtils.NewInstance(job .GetReducerClass(), job); // make output collector string finalName = GetOutputName(GetPartition()); RecordWriter <OUTKEY, OUTVALUE> @out = new ReduceTask.OldTrackingRecordWriter <OUTKEY , OUTVALUE>(this, job, reporter, finalName); RecordWriter <OUTKEY, OUTVALUE> finalOut = @out; OutputCollector <OUTKEY, OUTVALUE> collector = new _OutputCollector_419(finalOut, reporter); // indicate that progress update needs to be sent // apply reduce function try { //increment processed counter only if skipping feature is enabled bool incrProcCount = SkipBadRecords.GetReducerMaxSkipGroups(job) > 0 && SkipBadRecords .GetAutoIncrReducerProcCount(job); ReduceTask.ReduceValuesIterator <INKEY, INVALUE> values = IsSkipping() ? new ReduceTask.SkippingReduceValuesIterator <INKEY, INVALUE>(this, rIter, comparator, keyClass, valueClass, job, reporter, umbilical ) : new ReduceTask.ReduceValuesIterator <INKEY, INVALUE>(this, rIter, job.GetOutputValueGroupingComparator (), keyClass, valueClass, job, reporter); values.InformReduceProgress(); while (values.More()) { reduceInputKeyCounter.Increment(1); reducer.Reduce(values.GetKey(), values, collector, reporter); if (incrProcCount) { reporter.IncrCounter(SkipBadRecords.CounterGroup, SkipBadRecords.CounterReduceProcessedGroups , 1); } values.NextKey(); values.InformReduceProgress(); } reducer.Close(); reducer = null; @out.Close(reporter); @out = null; } finally { IOUtils.Cleanup(Log, reducer); CloseQuietly(@out, reporter); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public override void Init(MapOutputCollector.Context context) { this.mapTask = context.GetMapTask(); this.jobConf = context.GetJobConf(); this.reporter = context.GetReporter(); numberOfPartitions = jobConf.GetNumReduceTasks(); keyClass = (Type)jobConf.GetMapOutputKeyClass(); valueClass = (Type)jobConf.GetMapOutputValueClass(); recordWriters = new TestMerge.KeyValueWriter[numberOfPartitions]; outStreams = new ByteArrayOutputStream[numberOfPartitions]; // Create output streams for partitions. for (int i = 0; i < numberOfPartitions; i++) { outStreams[i] = new ByteArrayOutputStream(); recordWriters[i] = new TestMerge.KeyValueWriter <K, V>(jobConf, outStreams[i], keyClass , valueClass); } }
/// <exception cref="System.IO.IOException"/> public SkippingReduceValuesIterator(ReduceTask _enclosing, RawKeyValueIterator @in , RawComparator <KEY> comparator, Type keyClass, Type valClass, Configuration conf , Task.TaskReporter reporter, TaskUmbilicalProtocol umbilical) : base(_enclosing) { this._enclosing = _enclosing; this.umbilical = umbilical; this.skipGroupCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedGroups )); this.skipRecCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedRecords )); this.toWriteSkipRecs = this._enclosing.ToWriteSkipRecs() && SkipBadRecords.GetSkipOutputPath (conf) != null; this.keyClass = keyClass; this.valClass = valClass; this.reporter = reporter; this.skipIt = this._enclosing.GetSkipRanges().SkipRangeIterator(); this.MayBeSkip(); }
/// <exception cref="System.IO.IOException"/> public OldTrackingRecordWriter(ReduceTask reduce, JobConf job, Task.TaskReporter reporter, string finalName) { this.reduceOutputCounter = reduce.reduceOutputCounter; this.fileOutputByteCounter = reduce.fileOutputByteCounter; IList <FileSystem.Statistics> matchedStats = null; if (job.GetOutputFormat() is FileOutputFormat) { matchedStats = GetFsStatistics(FileOutputFormat.GetOutputPath(job), job); } fsStats = matchedStats; FileSystem fs = FileSystem.Get(job); long bytesOutPrev = GetOutputBytes(fsStats); this.real = job.GetOutputFormat().GetRecordWriter(fs, job, finalName, reporter); long bytesOutCurr = GetOutputBytes(fsStats); fileOutputByteCounter.Increment(bytesOutCurr - bytesOutPrev); }
public virtual void TestCustomCollect() { //mock creation Task.TaskReporter mockTaskReporter = Org.Mockito.Mockito.Mock <Task.TaskReporter>( ); IFile.Writer <string, int> mockWriter = Org.Mockito.Mockito.Mock <IFile.Writer>(); Configuration conf = new Configuration(); conf.Set(MRJobConfig.CombineRecordsBeforeProgress, "2"); coc = new Task.CombineOutputCollector <string, int>(outCounter, mockTaskReporter, conf); coc.SetWriter(mockWriter); Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress (); coc.Collect("dummy", 1); Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress (); coc.Collect("dummy", 2); Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(1)).Progress (); }
public virtual void TestProgressIsReportedIfInputASeriesOfEmptyFiles() { JobConf conf = new JobConf(); Path[] paths = new Path[3]; FilePath[] files = new FilePath[3]; long[] fileLength = new long[3]; try { for (int i = 0; i < 3; i++) { FilePath dir = new FilePath(outDir.ToString()); dir.Mkdir(); files[i] = new FilePath(dir, "testfile" + i); FileWriter fileWriter = new FileWriter(files[i]); fileWriter.Flush(); fileWriter.Close(); fileLength[i] = i; paths[i] = new Path(outDir + "/testfile" + i); } CombineFileSplit combineFileSplit = new CombineFileSplit(paths, fileLength); TaskAttemptID taskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); Task.TaskReporter reporter = Org.Mockito.Mockito.Mock <Task.TaskReporter>(); TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(conf, taskAttemptID , reporter); CombineFileRecordReader cfrr = new CombineFileRecordReader(combineFileSplit, taskAttemptContext , typeof(TestCombineFileRecordReader.TextRecordReaderWrapper)); cfrr.Initialize(combineFileSplit, taskAttemptContext); Org.Mockito.Mockito.Verify(reporter).Progress(); NUnit.Framework.Assert.IsFalse(cfrr.NextKeyValue()); Org.Mockito.Mockito.Verify(reporter, Org.Mockito.Mockito.Times(3)).Progress(); } finally { FileUtil.FullyDelete(new FilePath(outDir.ToString())); } }
public Context(MapTask mapTask, JobConf jobConf, Task.TaskReporter reporter) { this.mapTask = mapTask; this.jobConf = jobConf; this.reporter = reporter; }
public _RawKeyValueIterator_587(RawKeyValueIterator rawIter, Task.TaskReporter reporter ) { this.rawIter = rawIter; this.reporter = reporter; }
public _OutputCollector_419(RecordWriter <OUTKEY, OUTVALUE> finalOut, Task.TaskReporter reporter) { this.finalOut = finalOut; this.reporter = reporter; }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public override void Run(JobConf job, TaskUmbilicalProtocol umbilical) { job.SetBoolean(JobContext.SkipRecords, IsSkipping()); if (IsMapOrReduce()) { copyPhase = GetProgress().AddPhase("copy"); sortPhase = GetProgress().AddPhase("sort"); reducePhase = GetProgress().AddPhase("reduce"); } // start thread that will handle communication with parent Task.TaskReporter reporter = StartReporter(umbilical); bool useNewApi = job.GetUseNewReducer(); Initialize(job, GetJobID(), reporter, useNewApi); // check if it is a cleanupJobTask if (jobCleanup) { RunJobCleanupTask(umbilical, reporter); return; } if (jobSetup) { RunJobSetupTask(umbilical, reporter); return; } if (taskCleanup) { RunTaskCleanupTask(umbilical, reporter); return; } // Initialize the codec codec = InitCodec(); RawKeyValueIterator rIter = null; ShuffleConsumerPlugin shuffleConsumerPlugin = null; Type combinerClass = conf.GetCombinerClass(); Task.CombineOutputCollector combineCollector = (null != combinerClass) ? new Task.CombineOutputCollector (reduceCombineOutputCounter, reporter, conf) : null; Type clazz = job.GetClass <ShuffleConsumerPlugin>(MRConfig.ShuffleConsumerPlugin, typeof(Shuffle)); shuffleConsumerPlugin = ReflectionUtils.NewInstance(clazz, job); Log.Info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin); ShuffleConsumerPlugin.Context shuffleContext = new ShuffleConsumerPlugin.Context( GetTaskID(), job, FileSystem.GetLocal(job), umbilical, base.lDirAlloc, reporter, codec, combinerClass, combineCollector, spilledRecordsCounter, reduceCombineInputCounter , shuffledMapsCounter, reduceShuffleBytes, failedShuffleCounter, mergedMapOutputsCounter , taskStatus, copyPhase, sortPhase, this, mapOutputFile, localMapFiles); shuffleConsumerPlugin.Init(shuffleContext); rIter = shuffleConsumerPlugin.Run(); // free up the data structures mapOutputFilesOnDisk.Clear(); sortPhase.Complete(); // sort is complete SetPhase(TaskStatus.Phase.Reduce); StatusUpdate(umbilical); Type keyClass = job.GetMapOutputKeyClass(); Type valueClass = job.GetMapOutputValueClass(); RawComparator comparator = job.GetOutputValueGroupingComparator(); if (useNewApi) { RunNewReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass); } else { RunOldReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass); } shuffleConsumerPlugin.Close(); Done(umbilical, reporter); }