public virtual void TestDefaultCollect() { //mock creation Task.TaskReporter mockTaskReporter = Org.Mockito.Mockito.Mock <Task.TaskReporter>( ); IFile.Writer <string, int> mockWriter = Org.Mockito.Mockito.Mock <IFile.Writer>(); Configuration conf = new Configuration(); coc = new Task.CombineOutputCollector <string, int>(outCounter, mockTaskReporter, conf); coc.SetWriter(mockWriter); Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress (); for (int i = 0; i < Task.DefaultCombineRecordsBeforeProgress; i++) { coc.Collect("dummy", i); } Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(1)).Progress (); for (int i_1 = 0; i_1 < Task.DefaultCombineRecordsBeforeProgress; i_1++) { coc.Collect("dummy", i_1); } Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(2)).Progress (); }
public Context(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS, TaskUmbilicalProtocol umbilical, LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec, Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter , Counters.Counter shuffledMapsCounter, Counters.Counter reduceShuffleBytes, Counters.Counter failedShuffleCounter, Counters.Counter mergedMapOutputsCounter, TaskStatus status , Progress copyPhase, Progress mergePhase, Task reduceTask, MapOutputFile mapOutputFile , IDictionary <TaskAttemptID, MapOutputFile> localMapFiles) { this.reduceId = reduceId; this.jobConf = jobConf; this.localFS = localFS; this.umbilical = umbilical; this.localDirAllocator = localDirAllocator; this.reporter = reporter; this.codec = codec; this.combinerClass = combinerClass; this.combineCollector = combineCollector; this.spilledRecordsCounter = spilledRecordsCounter; this.reduceCombineInputCounter = reduceCombineInputCounter; this.shuffledMapsCounter = shuffledMapsCounter; this.reduceShuffleBytes = reduceShuffleBytes; this.failedShuffleCounter = failedShuffleCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.status = status; this.copyPhase = copyPhase; this.mergePhase = mergePhase; this.reduceTask = reduceTask; this.mapOutputFile = mapOutputFile; this.localMapFiles = localMapFiles; }
public virtual void TestSucceedAndFailedCopyMap <K, V>() { JobConf job = new JobConf(); job.SetNumMapTasks(2); //mock creation TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol >(); Reporter mockReporter = Org.Mockito.Mockito.Mock <Reporter>(); FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>(); Type combinerClass = job.GetCombinerClass(); Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>(); // needed for mock with generic TaskAttemptID mockTaskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec >(); Counters.Counter mockCounter = Org.Mockito.Mockito.Mock <Counters.Counter>(); TaskStatus mockTaskStatus = Org.Mockito.Mockito.Mock <TaskStatus>(); Progress mockProgress = Org.Mockito.Mockito.Mock <Progress>(); MapOutputFile mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task >(); MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>(); ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus , mockProgress, mockProgress, mockTask, mockMapOutputFile, null); TaskStatus status = new _TaskStatus_251(); Progress progress = new Progress(); ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes (), context.GetFailedShuffleCounter()); MapHost host1 = new MapHost("host1", null); TaskAttemptID failedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0) , TaskType.Map, 0), 0); TaskAttemptID succeedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0 ), TaskType.Map, 1), 1); // handle output fetch failure for failedAttemptID, part I scheduler.HostFailed(host1.GetHostName()); // handle output fetch succeed for succeedAttemptID long bytes = (long)500 * 1024 * 1024; scheduler.CopySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output); // handle output fetch failure for failedAttemptID, part II // for MAPREDUCE-6361: verify no NPE exception get thrown out scheduler.CopyFailed(failedAttemptID, host1, true, false); }
public virtual void TestConsumerApi() { JobConf jobConf = new JobConf(); ShuffleConsumerPlugin <K, V> shuffleConsumerPlugin = new TestShufflePlugin.TestShuffleConsumerPlugin <K, V>(); //mock creation ReduceTask mockReduceTask = Org.Mockito.Mockito.Mock <ReduceTask>(); TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol >(); Reporter mockReporter = Org.Mockito.Mockito.Mock <Reporter>(); FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>(); Type combinerClass = jobConf.GetCombinerClass(); Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>(); // needed for mock with generic TaskAttemptID mockTaskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec >(); Counters.Counter mockCounter = Org.Mockito.Mockito.Mock <Counters.Counter>(); TaskStatus mockTaskStatus = Org.Mockito.Mockito.Mock <TaskStatus>(); Progress mockProgress = Org.Mockito.Mockito.Mock <Progress>(); MapOutputFile mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task >(); try { string[] dirs = jobConf.GetLocalDirs(); // verify that these APIs are available through super class handler ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K , V>(mockTaskAttemptID, jobConf, mockFileSystem, mockUmbilical, mockLocalDirAllocator , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus , mockProgress, mockProgress, mockTask, mockMapOutputFile, null); shuffleConsumerPlugin.Init(context); shuffleConsumerPlugin.Run(); shuffleConsumerPlugin.Close(); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false); } // verify that these APIs are available for 3rd party plugins mockReduceTask.GetTaskID(); mockReduceTask.GetJobID(); mockReduceTask.GetNumMaps(); mockReduceTask.GetPartition(); mockReporter.Progress(); }
public virtual void TestCustomCollect() { //mock creation Task.TaskReporter mockTaskReporter = Org.Mockito.Mockito.Mock <Task.TaskReporter>( ); IFile.Writer <string, int> mockWriter = Org.Mockito.Mockito.Mock <IFile.Writer>(); Configuration conf = new Configuration(); conf.Set(MRJobConfig.CombineRecordsBeforeProgress, "2"); coc = new Task.CombineOutputCollector <string, int>(outCounter, mockTaskReporter, conf); coc.SetWriter(mockWriter); Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress (); coc.Collect("dummy", 1); Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress (); coc.Collect("dummy", 2); Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(1)).Progress (); }
public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS , LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec , Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase , MapOutputFile mapOutputFile) { /* Maximum percentage of the in-memory limit that a single shuffle can * consume*/ this.reduceId = reduceId; this.jobConf = jobConf; this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.reporter = reporter; this.codec = codec; this.combinerClass = combinerClass; this.combineCollector = combineCollector; this.reduceCombineInputCounter = reduceCombineInputCounter; this.spilledRecordsCounter = spilledRecordsCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.mapOutputFile = mapOutputFile; this.mapOutputFile.SetConf(jobConf); this.localFS = localFS; this.rfs = ((LocalFileSystem)localFS).GetRaw(); float maxInMemCopyUse = jobConf.GetFloat(MRJobConfig.ShuffleInputBufferPercent, MRJobConfig .DefaultShuffleInputBufferPercent); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleInputBufferPercent + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory this.memoryLimit = (long)(jobConf.GetLong(MRJobConfig.ReduceMemoryTotalBytes, Runtime .GetRuntime().MaxMemory()) * maxInMemCopyUse); this.ioSortFactor = jobConf.GetInt(MRJobConfig.IoSortFactor, 100); float singleShuffleMemoryLimitPercent = jobConf.GetFloat(MRJobConfig.ShuffleMemoryLimitPercent , DefaultShuffleMemoryLimitPercent); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleMemoryLimitPercent + ": " + singleShuffleMemoryLimitPercent); } usedMemory = 0L; commitMemory = 0L; this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent ); this.memToMemMergeOutputsThreshold = jobConf.GetInt(MRJobConfig.ReduceMemtomemThreshold , ioSortFactor); this.mergeThreshold = (long)(this.memoryLimit * jobConf.GetFloat(MRJobConfig.ShuffleMergePercent , 0.90f)); Log.Info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold ); if (this.maxSingleShuffleLimit >= this.mergeThreshold) { throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold " + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold); } bool allowMemToMemMerge = jobConf.GetBoolean(MRJobConfig.ReduceMemtomemEnabled, false ); if (allowMemToMemMerge) { this.memToMemMerger = new MergeManagerImpl.IntermediateMemoryToMemoryMerger(this, this, memToMemMergeOutputsThreshold); this.memToMemMerger.Start(); } else { this.memToMemMerger = null; } this.inMemoryMerger = CreateInMemoryMerger(); this.inMemoryMerger.Start(); this.onDiskMerger = new MergeManagerImpl.OnDiskMerger(this, this); this.onDiskMerger.Start(); this.mergePhase = mergePhase; }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public override void Run(JobConf job, TaskUmbilicalProtocol umbilical) { job.SetBoolean(JobContext.SkipRecords, IsSkipping()); if (IsMapOrReduce()) { copyPhase = GetProgress().AddPhase("copy"); sortPhase = GetProgress().AddPhase("sort"); reducePhase = GetProgress().AddPhase("reduce"); } // start thread that will handle communication with parent Task.TaskReporter reporter = StartReporter(umbilical); bool useNewApi = job.GetUseNewReducer(); Initialize(job, GetJobID(), reporter, useNewApi); // check if it is a cleanupJobTask if (jobCleanup) { RunJobCleanupTask(umbilical, reporter); return; } if (jobSetup) { RunJobSetupTask(umbilical, reporter); return; } if (taskCleanup) { RunTaskCleanupTask(umbilical, reporter); return; } // Initialize the codec codec = InitCodec(); RawKeyValueIterator rIter = null; ShuffleConsumerPlugin shuffleConsumerPlugin = null; Type combinerClass = conf.GetCombinerClass(); Task.CombineOutputCollector combineCollector = (null != combinerClass) ? new Task.CombineOutputCollector (reduceCombineOutputCounter, reporter, conf) : null; Type clazz = job.GetClass <ShuffleConsumerPlugin>(MRConfig.ShuffleConsumerPlugin, typeof(Shuffle)); shuffleConsumerPlugin = ReflectionUtils.NewInstance(clazz, job); Log.Info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin); ShuffleConsumerPlugin.Context shuffleContext = new ShuffleConsumerPlugin.Context( GetTaskID(), job, FileSystem.GetLocal(job), umbilical, base.lDirAlloc, reporter, codec, combinerClass, combineCollector, spilledRecordsCounter, reduceCombineInputCounter , shuffledMapsCounter, reduceShuffleBytes, failedShuffleCounter, mergedMapOutputsCounter , taskStatus, copyPhase, sortPhase, this, mapOutputFile, localMapFiles); shuffleConsumerPlugin.Init(shuffleContext); rIter = shuffleConsumerPlugin.Run(); // free up the data structures mapOutputFilesOnDisk.Clear(); sortPhase.Complete(); // sort is complete SetPhase(TaskStatus.Phase.Reduce); StatusUpdate(umbilical); Type keyClass = job.GetMapOutputKeyClass(); Type valueClass = job.GetMapOutputValueClass(); RawComparator comparator = job.GetOutputValueGroupingComparator(); if (useNewApi) { RunNewReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass); } else { RunOldReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass); } shuffleConsumerPlugin.Close(); Done(umbilical, reporter); }
public virtual void TestAggregatedTransferRate <K, V>() { JobConf job = new JobConf(); job.SetNumMapTasks(10); //mock creation TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol >(); Reporter mockReporter = Org.Mockito.Mockito.Mock <Reporter>(); FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>(); Type combinerClass = job.GetCombinerClass(); Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>(); // needed for mock with generic TaskAttemptID mockTaskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec >(); Counters.Counter mockCounter = Org.Mockito.Mockito.Mock <Counters.Counter>(); TaskStatus mockTaskStatus = Org.Mockito.Mockito.Mock <TaskStatus>(); Progress mockProgress = Org.Mockito.Mockito.Mock <Progress>(); MapOutputFile mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task >(); MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>(); ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus , mockProgress, mockProgress, mockTask, mockMapOutputFile, null); TaskStatus status = new _TaskStatus_115(); Progress progress = new Progress(); ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes (), context.GetFailedShuffleCounter()); TaskAttemptID attemptID0 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 0), 0); //adding the 1st interval, 40MB from 60s to 100s long bytes = (long)40 * 1024 * 1024; scheduler.CopySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(1, 1, 1), progress.ToString()); TaskAttemptID attemptID1 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 1), 1); //adding the 2nd interval before the 1st interval, 50MB from 0s to 50s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output ); NUnit.Framework.Assert.AreEqual(CopyMessage(2, 1, 1), progress.ToString()); TaskAttemptID attemptID2 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 2), 2); //adding the 3rd interval overlapping with the 1st and the 2nd interval //110MB from 25s to 80s bytes = (long)110 * 1024 * 1024; scheduler.CopySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output); NUnit.Framework.Assert.AreEqual(CopyMessage(3, 2, 2), progress.ToString()); TaskAttemptID attemptID3 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 3), 3); //adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s bytes = (long)100 * 1024 * 1024; scheduler.CopySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(4, 0.5, 1), progress.ToString()); TaskAttemptID attemptID4 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 4), 4); //adding the 5th interval between after 4th, 50MB from 350s to 400s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(5, 1, 1), progress.ToString()); TaskAttemptID attemptID5 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 5), 5); //adding the 6th interval between after 5th, 50MB from 450s to 500s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(6, 1, 1), progress.ToString()); TaskAttemptID attemptID6 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 6), 6); //adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s bytes = (long)20 * 1024 * 1024; scheduler.CopySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(7, 1, 1), progress.ToString()); TaskAttemptID attemptID7 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 7), 7); //adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s bytes = (long)30 * 1024 * 1024; scheduler.CopySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(8, 0.5, 1), progress.ToString()); TaskAttemptID attemptID8 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 8), 8); //adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(9, 1, 1), progress.ToString()); TaskAttemptID attemptID9 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 9), 9); //adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s bytes = (long)500 * 1024 * 1024; scheduler.CopySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output ); NUnit.Framework.Assert.AreEqual(CopyMessage(10, 1, 2), progress.ToString()); }