/// <exception cref="System.IO.IOException"/> public OnDiskMapOutput(TaskAttemptID mapId, TaskAttemptID reduceId, MergeManagerImpl <K, V> merger, long size, JobConf conf, MapOutputFile mapOutputFile, int fetcher , bool primaryMapOutput) : this(mapId, reduceId, merger, size, conf, mapOutputFile, fetcher, primaryMapOutput , FileSystem.GetLocal(conf).GetRaw(), mapOutputFile.GetInputFileForWrite(mapId.GetTaskID (), size)) { }
public virtual void TestSucceedAndFailedCopyMap <K, V>() { JobConf job = new JobConf(); job.SetNumMapTasks(2); //mock creation TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol >(); Reporter mockReporter = Org.Mockito.Mockito.Mock <Reporter>(); FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>(); Type combinerClass = job.GetCombinerClass(); Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>(); // needed for mock with generic TaskAttemptID mockTaskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec >(); Counters.Counter mockCounter = Org.Mockito.Mockito.Mock <Counters.Counter>(); TaskStatus mockTaskStatus = Org.Mockito.Mockito.Mock <TaskStatus>(); Progress mockProgress = Org.Mockito.Mockito.Mock <Progress>(); MapOutputFile mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task >(); MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>(); ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus , mockProgress, mockProgress, mockTask, mockMapOutputFile, null); TaskStatus status = new _TaskStatus_251(); Progress progress = new Progress(); ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes (), context.GetFailedShuffleCounter()); MapHost host1 = new MapHost("host1", null); TaskAttemptID failedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0) , TaskType.Map, 0), 0); TaskAttemptID succeedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0 ), TaskType.Map, 1), 1); // handle output fetch failure for failedAttemptID, part I scheduler.HostFailed(host1.GetHostName()); // handle output fetch succeed for succeedAttemptID long bytes = (long)500 * 1024 * 1024; scheduler.CopySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output); // handle output fetch failure for failedAttemptID, part II // for MAPREDUCE-6361: verify no NPE exception get thrown out scheduler.CopyFailed(failedAttemptID, host1, true, false); }
public virtual void TestConsumerApi() { JobConf jobConf = new JobConf(); ShuffleConsumerPlugin <K, V> shuffleConsumerPlugin = new TestShufflePlugin.TestShuffleConsumerPlugin <K, V>(); //mock creation ReduceTask mockReduceTask = Org.Mockito.Mockito.Mock <ReduceTask>(); TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol >(); Reporter mockReporter = Org.Mockito.Mockito.Mock <Reporter>(); FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>(); Type combinerClass = jobConf.GetCombinerClass(); Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>(); // needed for mock with generic TaskAttemptID mockTaskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec >(); Counters.Counter mockCounter = Org.Mockito.Mockito.Mock <Counters.Counter>(); TaskStatus mockTaskStatus = Org.Mockito.Mockito.Mock <TaskStatus>(); Progress mockProgress = Org.Mockito.Mockito.Mock <Progress>(); MapOutputFile mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task >(); try { string[] dirs = jobConf.GetLocalDirs(); // verify that these APIs are available through super class handler ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K , V>(mockTaskAttemptID, jobConf, mockFileSystem, mockUmbilical, mockLocalDirAllocator , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus , mockProgress, mockProgress, mockTask, mockMapOutputFile, null); shuffleConsumerPlugin.Init(context); shuffleConsumerPlugin.Run(); shuffleConsumerPlugin.Close(); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false); } // verify that these APIs are available for 3rd party plugins mockReduceTask.GetTaskID(); mockReduceTask.GetJobID(); mockReduceTask.GetNumMaps(); mockReduceTask.GetPartition(); mockReporter.Progress(); }
internal OnDiskMapOutput(TaskAttemptID mapId, TaskAttemptID reduceId, MergeManagerImpl <K, V> merger, long size, JobConf conf, MapOutputFile mapOutputFile, int fetcher , bool primaryMapOutput, FileSystem fs, Path outputPath) : base(mapId, size, primaryMapOutput) { this.fs = fs; this.merger = merger; this.outputPath = outputPath; tmpOutputPath = GetTempPath(outputPath, fetcher); disk = CryptoUtils.WrapIfNecessary(conf, fs.Create(tmpOutputPath)); this.conf = conf; }
/// <exception cref="System.Exception"/> public virtual void TestInterruptOnDisk() { int Fetcher = 7; Path p = new Path("file:///tmp/foo"); Path pTmp = OnDiskMapOutput.GetTempPath(p, Fetcher); FileSystem mFs = Org.Mockito.Mockito.Mock <FileSystem>(ReturnsDeepStubs); MapOutputFile mof = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Mockito.Mockito.When(mof.GetInputFileForWrite(Matchers.Any <TaskID>(), Matchers.AnyLong ())).ThenReturn(p); OnDiskMapOutput <Text, Text> odmo = Org.Mockito.Mockito.Spy(new OnDiskMapOutput <Text , Text>(map1ID, id, mm, 100L, job, mof, Fetcher, true, mFs, p)); Org.Mockito.Mockito.When(mm.Reserve(Matchers.Any <TaskAttemptID>(), Matchers.AnyLong (), Matchers.AnyInt())).ThenReturn(odmo); Org.Mockito.Mockito.DoNothing().When(mm).WaitForResource(); Org.Mockito.Mockito.When(ss.GetHost()).ThenReturn(host); string replyHash = SecureShuffleUtils.GenerateHash(Sharpen.Runtime.GetBytesForString (encHash), key); Org.Mockito.Mockito.When(connection.GetResponseCode()).ThenReturn(200); Org.Mockito.Mockito.When(connection.GetHeaderField(SecureShuffleUtils.HttpHeaderReplyUrlHash )).ThenReturn(replyHash); ShuffleHeader header = new ShuffleHeader(map1ID.ToString(), 10, 10, 1); ByteArrayOutputStream bout = new ByteArrayOutputStream(); header.Write(new DataOutputStream(bout)); TestFetcher.StuckInputStream @in = new TestFetcher.StuckInputStream(new ByteArrayInputStream (bout.ToByteArray())); Org.Mockito.Mockito.When(connection.GetInputStream()).ThenReturn(@in); Org.Mockito.Mockito.When(connection.GetHeaderField(ShuffleHeader.HttpHeaderName)) .ThenReturn(ShuffleHeader.DefaultHttpHeaderName); Org.Mockito.Mockito.When(connection.GetHeaderField(ShuffleHeader.HttpHeaderVersion )).ThenReturn(ShuffleHeader.DefaultHttpHeaderVersion); Org.Mockito.Mockito.DoAnswer(new _Answer_610(@in)).When(connection).Disconnect(); Fetcher <Text, Text> underTest = new TestFetcher.FakeFetcher <Text, Text>(job, id, ss, mm, r, metrics, except, key, connection, Fetcher); underTest.Start(); // wait for read in inputstream @in.WaitForFetcher(); underTest.ShutDown(); underTest.Join(); // rely on test timeout to kill if stuck NUnit.Framework.Assert.IsTrue(@in.WasClosedProperly()); Org.Mockito.Mockito.Verify(mFs).Create(Matchers.Eq(pTmp)); Org.Mockito.Mockito.Verify(mFs).Delete(Matchers.Eq(pTmp), Matchers.Eq(false)); Org.Mockito.Mockito.Verify(odmo).Abort(); }
public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS , LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec , Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase , MapOutputFile mapOutputFile) { /* Maximum percentage of the in-memory limit that a single shuffle can * consume*/ this.reduceId = reduceId; this.jobConf = jobConf; this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.reporter = reporter; this.codec = codec; this.combinerClass = combinerClass; this.combineCollector = combineCollector; this.reduceCombineInputCounter = reduceCombineInputCounter; this.spilledRecordsCounter = spilledRecordsCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.mapOutputFile = mapOutputFile; this.mapOutputFile.SetConf(jobConf); this.localFS = localFS; this.rfs = ((LocalFileSystem)localFS).GetRaw(); float maxInMemCopyUse = jobConf.GetFloat(MRJobConfig.ShuffleInputBufferPercent, MRJobConfig .DefaultShuffleInputBufferPercent); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleInputBufferPercent + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory this.memoryLimit = (long)(jobConf.GetLong(MRJobConfig.ReduceMemoryTotalBytes, Runtime .GetRuntime().MaxMemory()) * maxInMemCopyUse); this.ioSortFactor = jobConf.GetInt(MRJobConfig.IoSortFactor, 100); float singleShuffleMemoryLimitPercent = jobConf.GetFloat(MRJobConfig.ShuffleMemoryLimitPercent , DefaultShuffleMemoryLimitPercent); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleMemoryLimitPercent + ": " + singleShuffleMemoryLimitPercent); } usedMemory = 0L; commitMemory = 0L; this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent ); this.memToMemMergeOutputsThreshold = jobConf.GetInt(MRJobConfig.ReduceMemtomemThreshold , ioSortFactor); this.mergeThreshold = (long)(this.memoryLimit * jobConf.GetFloat(MRJobConfig.ShuffleMergePercent , 0.90f)); Log.Info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold ); if (this.maxSingleShuffleLimit >= this.mergeThreshold) { throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold " + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold); } bool allowMemToMemMerge = jobConf.GetBoolean(MRJobConfig.ReduceMemtomemEnabled, false ); if (allowMemToMemMerge) { this.memToMemMerger = new MergeManagerImpl.IntermediateMemoryToMemoryMerger(this, this, memToMemMergeOutputsThreshold); this.memToMemMerger.Start(); } else { this.memToMemMerger = null; } this.inMemoryMerger = CreateInMemoryMerger(); this.inMemoryMerger.Start(); this.onDiskMerger = new MergeManagerImpl.OnDiskMerger(this, this); this.onDiskMerger.Start(); this.mergePhase = mergePhase; }
public virtual void TestCorruptedIFile() { int fetcher = 7; Path onDiskMapOutputPath = new Path(name.GetMethodName() + "/foo"); Path shuffledToDisk = OnDiskMapOutput.GetTempPath(onDiskMapOutputPath, fetcher); fs = FileSystem.GetLocal(job).GetRaw(); MapOutputFile mof = Org.Mockito.Mockito.Mock <MapOutputFile>(); OnDiskMapOutput <Text, Text> odmo = new OnDiskMapOutput <Text, Text>(map1ID, id, mm , 100L, job, mof, fetcher, true, fs, onDiskMapOutputPath); string mapData = "MAPDATA12345678901234567890"; ShuffleHeader header = new ShuffleHeader(map1ID.ToString(), 14, 10, 1); ByteArrayOutputStream bout = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(bout); IFileOutputStream ios = new IFileOutputStream(dos); header.Write(dos); int headerSize = dos.Size(); try { ios.Write(Sharpen.Runtime.GetBytesForString(mapData)); } finally { ios.Close(); } int dataSize = bout.Size() - headerSize; // Ensure that the OnDiskMapOutput shuffler can successfully read the data. MapHost host = new MapHost("TestHost", "http://test/url"); ByteArrayInputStream bin = new ByteArrayInputStream(bout.ToByteArray()); try { // Read past the shuffle header. bin.Read(new byte[headerSize], 0, headerSize); odmo.Shuffle(host, bin, dataSize, dataSize, metrics, Reporter.Null); } finally { bin.Close(); } // Now corrupt the IFile data. byte[] corrupted = bout.ToByteArray(); corrupted[headerSize + (dataSize / 2)] = unchecked ((int)(0x0)); try { bin = new ByteArrayInputStream(corrupted); // Read past the shuffle header. bin.Read(new byte[headerSize], 0, headerSize); odmo.Shuffle(host, bin, dataSize, dataSize, metrics, Reporter.Null); NUnit.Framework.Assert.Fail("OnDiskMapOutput.shuffle didn't detect the corrupted map partition file" ); } catch (ChecksumException e) { Log.Info("The expected checksum exception was thrown.", e); } finally { bin.Close(); } // Ensure that the shuffled file can be read. IFileInputStream iFin = new IFileInputStream(fs.Open(shuffledToDisk), dataSize, job ); try { iFin.Read(new byte[dataSize], 0, dataSize); } finally { iFin.Close(); } }
public virtual void TestAggregatedTransferRate <K, V>() { JobConf job = new JobConf(); job.SetNumMapTasks(10); //mock creation TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol >(); Reporter mockReporter = Org.Mockito.Mockito.Mock <Reporter>(); FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>(); Type combinerClass = job.GetCombinerClass(); Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>(); // needed for mock with generic TaskAttemptID mockTaskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec >(); Counters.Counter mockCounter = Org.Mockito.Mockito.Mock <Counters.Counter>(); TaskStatus mockTaskStatus = Org.Mockito.Mockito.Mock <TaskStatus>(); Progress mockProgress = Org.Mockito.Mockito.Mock <Progress>(); MapOutputFile mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task >(); MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>(); ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus , mockProgress, mockProgress, mockTask, mockMapOutputFile, null); TaskStatus status = new _TaskStatus_115(); Progress progress = new Progress(); ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes (), context.GetFailedShuffleCounter()); TaskAttemptID attemptID0 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 0), 0); //adding the 1st interval, 40MB from 60s to 100s long bytes = (long)40 * 1024 * 1024; scheduler.CopySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(1, 1, 1), progress.ToString()); TaskAttemptID attemptID1 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 1), 1); //adding the 2nd interval before the 1st interval, 50MB from 0s to 50s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output ); NUnit.Framework.Assert.AreEqual(CopyMessage(2, 1, 1), progress.ToString()); TaskAttemptID attemptID2 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 2), 2); //adding the 3rd interval overlapping with the 1st and the 2nd interval //110MB from 25s to 80s bytes = (long)110 * 1024 * 1024; scheduler.CopySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output); NUnit.Framework.Assert.AreEqual(CopyMessage(3, 2, 2), progress.ToString()); TaskAttemptID attemptID3 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 3), 3); //adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s bytes = (long)100 * 1024 * 1024; scheduler.CopySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(4, 0.5, 1), progress.ToString()); TaskAttemptID attemptID4 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 4), 4); //adding the 5th interval between after 4th, 50MB from 350s to 400s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(5, 1, 1), progress.ToString()); TaskAttemptID attemptID5 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 5), 5); //adding the 6th interval between after 5th, 50MB from 450s to 500s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(6, 1, 1), progress.ToString()); TaskAttemptID attemptID6 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 6), 6); //adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s bytes = (long)20 * 1024 * 1024; scheduler.CopySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(7, 1, 1), progress.ToString()); TaskAttemptID attemptID7 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 7), 7); //adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s bytes = (long)30 * 1024 * 1024; scheduler.CopySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(8, 0.5, 1), progress.ToString()); TaskAttemptID attemptID8 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 8), 8); //adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(9, 1, 1), progress.ToString()); TaskAttemptID attemptID9 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 9), 9); //adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s bytes = (long)500 * 1024 * 1024; scheduler.CopySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output ); NUnit.Framework.Assert.AreEqual(CopyMessage(10, 1, 2), progress.ToString()); }