/// <exception cref="System.Exception"/> public virtual void TestUniqueDestinationPath() { Configuration conf = new Configuration(); FileContext files = FileContext.GetLocalFSFileContext(conf); Path basedir = files.MakeQualified(new Path("target", typeof(TestFSDownload).Name )); files.Mkdir(basedir, null, true); conf.SetStrings(typeof(TestFSDownload).FullName, basedir.ToString()); ExecutorService singleThreadedExec = Executors.NewSingleThreadExecutor(); LocalDirAllocator dirs = new LocalDirAllocator(typeof(TestFSDownload).FullName); Path destPath = dirs.GetLocalPathForWrite(basedir.ToString(), conf); destPath = new Path(destPath, System.Convert.ToString(uniqueNumberGenerator.IncrementAndGet ())); Path p = new Path(basedir, "dir" + 0 + ".jar"); LocalResourceVisibility vis = LocalResourceVisibility.Private; LocalResource rsrc = CreateJar(files, p, vis); FSDownload fsd = new FSDownload(files, UserGroupInformation.GetCurrentUser(), conf , destPath, rsrc); Future <Path> rPath = singleThreadedExec.Submit(fsd); singleThreadedExec.Shutdown(); while (!singleThreadedExec.AwaitTermination(1000, TimeUnit.Milliseconds)) { } NUnit.Framework.Assert.IsTrue(rPath.IsDone()); // Now FSDownload will not create a random directory to localize the // resource. Therefore the final localizedPath for the resource should be // destination directory (passed as an argument) + file name. NUnit.Framework.Assert.AreEqual(destPath, rPath.Get().GetParent()); }
/// <exception cref="System.IO.IOException"/> public FileCache(BackupStore <K, V> _enclosing, Configuration conf) { this._enclosing = _enclosing; this.conf = conf; this.fs = FileSystem.GetLocal(conf); this.lDirAlloc = new LocalDirAllocator(MRConfig.LocalDir); }
public Context(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS, TaskUmbilicalProtocol umbilical, LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec, Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter , Counters.Counter shuffledMapsCounter, Counters.Counter reduceShuffleBytes, Counters.Counter failedShuffleCounter, Counters.Counter mergedMapOutputsCounter, TaskStatus status , Progress copyPhase, Progress mergePhase, Task reduceTask, MapOutputFile mapOutputFile , IDictionary <TaskAttemptID, MapOutputFile> localMapFiles) { this.reduceId = reduceId; this.jobConf = jobConf; this.localFS = localFS; this.umbilical = umbilical; this.localDirAllocator = localDirAllocator; this.reporter = reporter; this.codec = codec; this.combinerClass = combinerClass; this.combineCollector = combineCollector; this.spilledRecordsCounter = spilledRecordsCounter; this.reduceCombineInputCounter = reduceCombineInputCounter; this.shuffledMapsCounter = shuffledMapsCounter; this.reduceShuffleBytes = reduceShuffleBytes; this.failedShuffleCounter = failedShuffleCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.status = status; this.copyPhase = copyPhase; this.mergePhase = mergePhase; this.reduceTask = reduceTask; this.mapOutputFile = mapOutputFile; this.localMapFiles = localMapFiles; }
public virtual void TestSucceedAndFailedCopyMap <K, V>() { JobConf job = new JobConf(); job.SetNumMapTasks(2); //mock creation TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol >(); Reporter mockReporter = Org.Mockito.Mockito.Mock <Reporter>(); FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>(); Type combinerClass = job.GetCombinerClass(); Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>(); // needed for mock with generic TaskAttemptID mockTaskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec >(); Counters.Counter mockCounter = Org.Mockito.Mockito.Mock <Counters.Counter>(); TaskStatus mockTaskStatus = Org.Mockito.Mockito.Mock <TaskStatus>(); Progress mockProgress = Org.Mockito.Mockito.Mock <Progress>(); MapOutputFile mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task >(); MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>(); ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus , mockProgress, mockProgress, mockTask, mockMapOutputFile, null); TaskStatus status = new _TaskStatus_251(); Progress progress = new Progress(); ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes (), context.GetFailedShuffleCounter()); MapHost host1 = new MapHost("host1", null); TaskAttemptID failedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0) , TaskType.Map, 0), 0); TaskAttemptID succeedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0 ), TaskType.Map, 1), 1); // handle output fetch failure for failedAttemptID, part I scheduler.HostFailed(host1.GetHostName()); // handle output fetch succeed for succeedAttemptID long bytes = (long)500 * 1024 * 1024; scheduler.CopySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output); // handle output fetch failure for failedAttemptID, part II // for MAPREDUCE-6361: verify no NPE exception get thrown out scheduler.CopyFailed(failedAttemptID, host1, true, false); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.URISyntaxException"/> /// <exception cref="System.Exception"/> public virtual void TestDownloadBadPublic() { Configuration conf = new Configuration(); conf.Set(CommonConfigurationKeys.FsPermissionsUmaskKey, "077"); FileContext files = FileContext.GetLocalFSFileContext(conf); Path basedir = files.MakeQualified(new Path("target", typeof(TestFSDownload).Name )); files.Mkdir(basedir, null, true); conf.SetStrings(typeof(TestFSDownload).FullName, basedir.ToString()); IDictionary <LocalResource, LocalResourceVisibility> rsrcVis = new Dictionary <LocalResource , LocalResourceVisibility>(); Random rand = new Random(); long sharedSeed = rand.NextLong(); rand.SetSeed(sharedSeed); System.Console.Out.WriteLine("SEED: " + sharedSeed); IDictionary <LocalResource, Future <Path> > pending = new Dictionary <LocalResource, Future <Path> >(); ExecutorService exec = Executors.NewSingleThreadExecutor(); LocalDirAllocator dirs = new LocalDirAllocator(typeof(TestFSDownload).FullName); int size = 512; LocalResourceVisibility vis = LocalResourceVisibility.Public; Path path = new Path(basedir, "test-file"); LocalResource rsrc = CreateFile(files, path, size, rand, vis); rsrcVis[rsrc] = vis; Path destPath = dirs.GetLocalPathForWrite(basedir.ToString(), size, conf); destPath = new Path(destPath, System.Convert.ToString(uniqueNumberGenerator.IncrementAndGet ())); FSDownload fsd = new FSDownload(files, UserGroupInformation.GetCurrentUser(), conf , destPath, rsrc); pending[rsrc] = exec.Submit(fsd); exec.Shutdown(); while (!exec.AwaitTermination(1000, TimeUnit.Milliseconds)) { } NUnit.Framework.Assert.IsTrue(pending[rsrc].IsDone()); try { foreach (KeyValuePair <LocalResource, Future <Path> > p in pending) { p.Value.Get(); NUnit.Framework.Assert.Fail("We localized a file that is not public."); } } catch (ExecutionException e) { NUnit.Framework.Assert.IsTrue(e.InnerException is IOException); } }
public virtual void TestConsumerApi() { JobConf jobConf = new JobConf(); ShuffleConsumerPlugin <K, V> shuffleConsumerPlugin = new TestShufflePlugin.TestShuffleConsumerPlugin <K, V>(); //mock creation ReduceTask mockReduceTask = Org.Mockito.Mockito.Mock <ReduceTask>(); TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol >(); Reporter mockReporter = Org.Mockito.Mockito.Mock <Reporter>(); FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>(); Type combinerClass = jobConf.GetCombinerClass(); Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>(); // needed for mock with generic TaskAttemptID mockTaskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec >(); Counters.Counter mockCounter = Org.Mockito.Mockito.Mock <Counters.Counter>(); TaskStatus mockTaskStatus = Org.Mockito.Mockito.Mock <TaskStatus>(); Progress mockProgress = Org.Mockito.Mockito.Mock <Progress>(); MapOutputFile mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task >(); try { string[] dirs = jobConf.GetLocalDirs(); // verify that these APIs are available through super class handler ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K , V>(mockTaskAttemptID, jobConf, mockFileSystem, mockUmbilical, mockLocalDirAllocator , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus , mockProgress, mockProgress, mockTask, mockMapOutputFile, null); shuffleConsumerPlugin.Init(context); shuffleConsumerPlugin.Run(); shuffleConsumerPlugin.Close(); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false); } // verify that these APIs are available for 3rd party plugins mockReduceTask.GetTaskID(); mockReduceTask.GetJobID(); mockReduceTask.GetNumMaps(); mockReduceTask.GetPartition(); mockReporter.Progress(); }
public virtual void TestProviderApi() { LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); JobConf mockJobConf = Org.Mockito.Mockito.Mock <JobConf>(); try { mockLocalDirAllocator.GetLocalPathToRead(string.Empty, mockJobConf); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false); } }
/// <summary>Configure mapred-local dirs.</summary> /// <remarks> /// Configure mapred-local dirs. This config is used by the task for finding /// out an output directory. /// </remarks> /// <exception cref="System.IO.IOException"></exception> private static void ConfigureLocalDirs(Task task, JobConf job) { string[] localSysDirs = StringUtils.GetTrimmedStrings(Runtime.Getenv(ApplicationConstants.Environment .LocalDirs.ToString())); job.SetStrings(MRConfig.LocalDir, localSysDirs); Log.Info(MRConfig.LocalDir + " for child: " + job.Get(MRConfig.LocalDir)); LocalDirAllocator lDirAlloc = new LocalDirAllocator(MRConfig.LocalDir); Path workDir = null; // First, try to find the JOB_LOCAL_DIR on this host. try { workDir = lDirAlloc.GetLocalPathToRead("work", job); } catch (DiskChecker.DiskErrorException) { } // DiskErrorException means dir not found. If not found, it will // be created below. if (workDir == null) { // JOB_LOCAL_DIR doesn't exist on this host -- Create it. workDir = lDirAlloc.GetLocalPathForWrite("work", job); FileSystem lfs = FileSystem.GetLocal(job).GetRaw(); bool madeDir = false; try { madeDir = lfs.Mkdirs(workDir); } catch (FileAlreadyExistsException) { // Since all tasks will be running in their own JVM, the race condition // exists where multiple tasks could be trying to create this directory // at the same time. If this task loses the race, it's okay because // the directory already exists. madeDir = true; workDir = lDirAlloc.GetLocalPathToRead("work", job); } if (!madeDir) { throw new IOException("Mkdirs failed to create " + workDir.ToString()); } } job.Set(MRJobConfig.JobLocalDir, workDir.ToString()); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual int RunLocalization(IPEndPoint nmAddr) { // load credentials InitDirs(conf, user, appId, lfs, localDirs); Credentials creds = new Credentials(); DataInputStream credFile = null; try { // assume credentials in cwd // TODO: Fix Path tokenPath = new Path(string.Format(TokenFileNameFmt, localizerId)); credFile = lfs.Open(tokenPath); creds.ReadTokenStorageStream(credFile); // Explicitly deleting token file. lfs.Delete(tokenPath, false); } finally { if (credFile != null) { credFile.Close(); } } // create localizer context UserGroupInformation remoteUser = UserGroupInformation.CreateRemoteUser(user); remoteUser.AddToken(creds.GetToken(LocalizerTokenIdentifier.Kind)); LocalizationProtocol nodeManager = remoteUser.DoAs(new _PrivilegedAction_151(this , nmAddr)); // create user context UserGroupInformation ugi = UserGroupInformation.CreateRemoteUser(user); foreach (Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> token in creds.GetAllTokens ()) { ugi.AddToken(token); } ExecutorService exec = null; try { exec = CreateDownloadThreadPool(); CompletionService <Path> ecs = CreateCompletionService(exec); LocalizeFiles(nodeManager, ecs, ugi); return(0); } catch (Exception e) { // Print traces to stdout so that they can be logged by the NM address // space. Sharpen.Runtime.PrintStackTrace(e, System.Console.Out); return(-1); } finally { try { if (exec != null) { exec.ShutdownNow(); } LocalDirAllocator.RemoveContext(appCacheDirContextName); } finally { CloseFileSystems(ugi); } } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestDirDownload() { Configuration conf = new Configuration(); FileContext files = FileContext.GetLocalFSFileContext(conf); Path basedir = files.MakeQualified(new Path("target", typeof(TestFSDownload).Name )); files.Mkdir(basedir, null, true); conf.SetStrings(typeof(TestFSDownload).FullName, basedir.ToString()); IDictionary <LocalResource, LocalResourceVisibility> rsrcVis = new Dictionary <LocalResource , LocalResourceVisibility>(); Random rand = new Random(); long sharedSeed = rand.NextLong(); rand.SetSeed(sharedSeed); System.Console.Out.WriteLine("SEED: " + sharedSeed); IDictionary <LocalResource, Future <Path> > pending = new Dictionary <LocalResource, Future <Path> >(); ExecutorService exec = Executors.NewSingleThreadExecutor(); LocalDirAllocator dirs = new LocalDirAllocator(typeof(TestFSDownload).FullName); for (int i = 0; i < 5; ++i) { LocalResourceVisibility vis = LocalResourceVisibility.Private; if (i % 2 == 1) { vis = LocalResourceVisibility.Application; } Path p = new Path(basedir, "dir" + i + ".jar"); LocalResource rsrc = CreateJar(files, p, vis); rsrcVis[rsrc] = vis; Path destPath = dirs.GetLocalPathForWrite(basedir.ToString(), conf); destPath = new Path(destPath, System.Convert.ToString(uniqueNumberGenerator.IncrementAndGet ())); FSDownload fsd = new FSDownload(files, UserGroupInformation.GetCurrentUser(), conf , destPath, rsrc); pending[rsrc] = exec.Submit(fsd); } exec.Shutdown(); while (!exec.AwaitTermination(1000, TimeUnit.Milliseconds)) { } foreach (Future <Path> path in pending.Values) { NUnit.Framework.Assert.IsTrue(path.IsDone()); } try { foreach (KeyValuePair <LocalResource, Future <Path> > p in pending) { Path localized = p.Value.Get(); FileStatus status = files.GetFileStatus(localized); System.Console.Out.WriteLine("Testing path " + localized); System.Diagnostics.Debug.Assert((status.IsDirectory())); System.Diagnostics.Debug.Assert((rsrcVis.Contains(p.Key))); VerifyPermsRecursively(localized.GetFileSystem(conf), files, localized, rsrcVis[p .Key]); } } catch (ExecutionException e) { throw new IOException("Failed exec", e); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.URISyntaxException"/> /// <exception cref="System.Exception"/> private void DownloadWithFileType(TestFSDownload.TEST_FILE_TYPE fileType) { Configuration conf = new Configuration(); conf.Set(CommonConfigurationKeys.FsPermissionsUmaskKey, "077"); FileContext files = FileContext.GetLocalFSFileContext(conf); Path basedir = files.MakeQualified(new Path("target", typeof(TestFSDownload).Name )); files.Mkdir(basedir, null, true); conf.SetStrings(typeof(TestFSDownload).FullName, basedir.ToString()); Random rand = new Random(); long sharedSeed = rand.NextLong(); rand.SetSeed(sharedSeed); System.Console.Out.WriteLine("SEED: " + sharedSeed); IDictionary <LocalResource, Future <Path> > pending = new Dictionary <LocalResource, Future <Path> >(); ExecutorService exec = Executors.NewSingleThreadExecutor(); LocalDirAllocator dirs = new LocalDirAllocator(typeof(TestFSDownload).FullName); int size = rand.Next(512) + 512; LocalResourceVisibility vis = LocalResourceVisibility.Private; Path p = new Path(basedir, string.Empty + 1); string strFileName = string.Empty; LocalResource rsrc = null; switch (fileType) { case TestFSDownload.TEST_FILE_TYPE.Tar: { rsrc = CreateTarFile(files, p, size, rand, vis); break; } case TestFSDownload.TEST_FILE_TYPE.Jar: { rsrc = CreateJarFile(files, p, size, rand, vis); rsrc.SetType(LocalResourceType.Pattern); break; } case TestFSDownload.TEST_FILE_TYPE.Zip: { rsrc = CreateZipFile(files, p, size, rand, vis); strFileName = p.GetName() + ".ZIP"; break; } case TestFSDownload.TEST_FILE_TYPE.Tgz: { rsrc = CreateTgzFile(files, p, size, rand, vis); break; } } Path destPath = dirs.GetLocalPathForWrite(basedir.ToString(), size, conf); destPath = new Path(destPath, System.Convert.ToString(uniqueNumberGenerator.IncrementAndGet ())); FSDownload fsd = new FSDownload(files, UserGroupInformation.GetCurrentUser(), conf , destPath, rsrc); pending[rsrc] = exec.Submit(fsd); exec.Shutdown(); while (!exec.AwaitTermination(1000, TimeUnit.Milliseconds)) { } try { pending[rsrc].Get(); // see if there was an Exception during download FileStatus[] filesstatus = files.GetDefaultFileSystem().ListStatus(basedir); foreach (FileStatus filestatus in filesstatus) { if (filestatus.IsDirectory()) { FileStatus[] childFiles = files.GetDefaultFileSystem().ListStatus(filestatus.GetPath ()); foreach (FileStatus childfile in childFiles) { if (strFileName.EndsWith(".ZIP") && childfile.GetPath().GetName().Equals(strFileName ) && !childfile.IsDirectory()) { NUnit.Framework.Assert.Fail("Failure...After unzip, there should have been a" + " directory formed with zip file name but found a file. " + childfile.GetPath()); } if (childfile.GetPath().GetName().StartsWith("tmp")) { NUnit.Framework.Assert.Fail("Tmp File should not have been there " + childfile.GetPath ()); } } } } } catch (Exception e) { throw new IOException("Failed exec", e); } }
public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS , LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec , Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase , MapOutputFile mapOutputFile) { /* Maximum percentage of the in-memory limit that a single shuffle can * consume*/ this.reduceId = reduceId; this.jobConf = jobConf; this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.reporter = reporter; this.codec = codec; this.combinerClass = combinerClass; this.combineCollector = combineCollector; this.reduceCombineInputCounter = reduceCombineInputCounter; this.spilledRecordsCounter = spilledRecordsCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.mapOutputFile = mapOutputFile; this.mapOutputFile.SetConf(jobConf); this.localFS = localFS; this.rfs = ((LocalFileSystem)localFS).GetRaw(); float maxInMemCopyUse = jobConf.GetFloat(MRJobConfig.ShuffleInputBufferPercent, MRJobConfig .DefaultShuffleInputBufferPercent); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleInputBufferPercent + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory this.memoryLimit = (long)(jobConf.GetLong(MRJobConfig.ReduceMemoryTotalBytes, Runtime .GetRuntime().MaxMemory()) * maxInMemCopyUse); this.ioSortFactor = jobConf.GetInt(MRJobConfig.IoSortFactor, 100); float singleShuffleMemoryLimitPercent = jobConf.GetFloat(MRJobConfig.ShuffleMemoryLimitPercent , DefaultShuffleMemoryLimitPercent); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleMemoryLimitPercent + ": " + singleShuffleMemoryLimitPercent); } usedMemory = 0L; commitMemory = 0L; this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent ); this.memToMemMergeOutputsThreshold = jobConf.GetInt(MRJobConfig.ReduceMemtomemThreshold , ioSortFactor); this.mergeThreshold = (long)(this.memoryLimit * jobConf.GetFloat(MRJobConfig.ShuffleMergePercent , 0.90f)); Log.Info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold ); if (this.maxSingleShuffleLimit >= this.mergeThreshold) { throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold " + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold); } bool allowMemToMemMerge = jobConf.GetBoolean(MRJobConfig.ReduceMemtomemEnabled, false ); if (allowMemToMemMerge) { this.memToMemMerger = new MergeManagerImpl.IntermediateMemoryToMemoryMerger(this, this, memToMemMergeOutputsThreshold); this.memToMemMerger.Start(); } else { this.memToMemMerger = null; } this.inMemoryMerger = CreateInMemoryMerger(); this.inMemoryMerger.Start(); this.onDiskMerger = new MergeManagerImpl.OnDiskMerger(this, this); this.onDiskMerger.Start(); this.mergePhase = mergePhase; }
public virtual void TestInMemoryAndOnDiskMerger() { JobID jobId = new JobID("a", 0); TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 0) , 0); TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 1), 0); TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 2), 0); LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LocalDir); MergeManagerImpl <Text, Text> mergeManager = new MergeManagerImpl <Text, Text>(reduceId1 , jobConf, fs, lda, Reporter.Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles()); // write map outputs IDictionary <string, string> map1 = new SortedDictionary <string, string>(); map1["apple"] = "disgusting"; map1["carrot"] = "delicious"; IDictionary <string, string> map2 = new SortedDictionary <string, string>(); map1["banana"] = "pretty good"; byte[] mapOutputBytes1 = WriteMapOutput(conf, map1); byte[] mapOutputBytes2 = WriteMapOutput(conf, map2); InMemoryMapOutput <Text, Text> mapOutput1 = new InMemoryMapOutput <Text, Text>(conf , mapId1, mergeManager, mapOutputBytes1.Length, null, true); InMemoryMapOutput <Text, Text> mapOutput2 = new InMemoryMapOutput <Text, Text>(conf , mapId2, mergeManager, mapOutputBytes2.Length, null, true); System.Array.Copy(mapOutputBytes1, 0, mapOutput1.GetMemory(), 0, mapOutputBytes1. Length); System.Array.Copy(mapOutputBytes2, 0, mapOutput2.GetMemory(), 0, mapOutputBytes2. Length); // create merger and run merge MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger = mergeManager .CreateInMemoryMerger(); IList <InMemoryMapOutput <Text, Text> > mapOutputs1 = new AList <InMemoryMapOutput <Text , Text> >(); mapOutputs1.AddItem(mapOutput1); mapOutputs1.AddItem(mapOutput2); inMemoryMerger.Merge(mapOutputs1); NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count); TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 3) , 0); TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 4), 0); TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 5), 0); // write map outputs IDictionary <string, string> map3 = new SortedDictionary <string, string>(); map3["apple"] = "awesome"; map3["carrot"] = "amazing"; IDictionary <string, string> map4 = new SortedDictionary <string, string>(); map4["banana"] = "bla"; byte[] mapOutputBytes3 = WriteMapOutput(conf, map3); byte[] mapOutputBytes4 = WriteMapOutput(conf, map4); InMemoryMapOutput <Text, Text> mapOutput3 = new InMemoryMapOutput <Text, Text>(conf , mapId3, mergeManager, mapOutputBytes3.Length, null, true); InMemoryMapOutput <Text, Text> mapOutput4 = new InMemoryMapOutput <Text, Text>(conf , mapId4, mergeManager, mapOutputBytes4.Length, null, true); System.Array.Copy(mapOutputBytes3, 0, mapOutput3.GetMemory(), 0, mapOutputBytes3. Length); System.Array.Copy(mapOutputBytes4, 0, mapOutput4.GetMemory(), 0, mapOutputBytes4. Length); // // create merger and run merge MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger2 = mergeManager .CreateInMemoryMerger(); IList <InMemoryMapOutput <Text, Text> > mapOutputs2 = new AList <InMemoryMapOutput <Text , Text> >(); mapOutputs2.AddItem(mapOutput3); mapOutputs2.AddItem(mapOutput4); inMemoryMerger2.Merge(mapOutputs2); NUnit.Framework.Assert.AreEqual(2, mergeManager.onDiskMapOutputs.Count); IList <MergeManagerImpl.CompressAwarePath> paths = new AList <MergeManagerImpl.CompressAwarePath >(); IEnumerator <MergeManagerImpl.CompressAwarePath> iterator = mergeManager.onDiskMapOutputs .GetEnumerator(); IList <string> keys = new AList <string>(); IList <string> values = new AList <string>(); while (iterator.HasNext()) { MergeManagerImpl.CompressAwarePath next = iterator.Next(); ReadOnDiskMapOutput(conf, fs, next, keys, values); paths.AddItem(next); } NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "banana", "carrot", "apple", "banana", "carrot")); NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "bla", "amazing" , "disgusting", "pretty good", "delicious")); mergeManager.Close(); mergeManager = new MergeManagerImpl <Text, Text>(reduceId2, jobConf, fs, lda, Reporter .Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles ()); MergeThread <MergeManagerImpl.CompressAwarePath, Text, Text> onDiskMerger = mergeManager .CreateOnDiskMerger(); onDiskMerger.Merge(paths); NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count); keys = new AList <string>(); values = new AList <string>(); ReadOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.GetEnumerator().Next( ), keys, values); NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "apple", "banana", "banana" , "carrot", "carrot")); NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "disgusting", "pretty good" , "bla", "amazing", "delicious")); mergeManager.Close(); NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMapOutputs.Count); NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMergedMapOutputs.Count); NUnit.Framework.Assert.AreEqual(0, mergeManager.onDiskMapOutputs.Count); }
public virtual void TestAggregatedTransferRate <K, V>() { JobConf job = new JobConf(); job.SetNumMapTasks(10); //mock creation TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol >(); Reporter mockReporter = Org.Mockito.Mockito.Mock <Reporter>(); FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>(); Type combinerClass = job.GetCombinerClass(); Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>(); // needed for mock with generic TaskAttemptID mockTaskAttemptID = Org.Mockito.Mockito.Mock <TaskAttemptID>(); LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator >(); CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec >(); Counters.Counter mockCounter = Org.Mockito.Mockito.Mock <Counters.Counter>(); TaskStatus mockTaskStatus = Org.Mockito.Mockito.Mock <TaskStatus>(); Progress mockProgress = Org.Mockito.Mockito.Mock <Progress>(); MapOutputFile mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>(); Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task >(); MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>(); ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus , mockProgress, mockProgress, mockTask, mockMapOutputFile, null); TaskStatus status = new _TaskStatus_115(); Progress progress = new Progress(); ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes (), context.GetFailedShuffleCounter()); TaskAttemptID attemptID0 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 0), 0); //adding the 1st interval, 40MB from 60s to 100s long bytes = (long)40 * 1024 * 1024; scheduler.CopySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(1, 1, 1), progress.ToString()); TaskAttemptID attemptID1 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 1), 1); //adding the 2nd interval before the 1st interval, 50MB from 0s to 50s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output ); NUnit.Framework.Assert.AreEqual(CopyMessage(2, 1, 1), progress.ToString()); TaskAttemptID attemptID2 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 2), 2); //adding the 3rd interval overlapping with the 1st and the 2nd interval //110MB from 25s to 80s bytes = (long)110 * 1024 * 1024; scheduler.CopySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output); NUnit.Framework.Assert.AreEqual(CopyMessage(3, 2, 2), progress.ToString()); TaskAttemptID attemptID3 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 3), 3); //adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s bytes = (long)100 * 1024 * 1024; scheduler.CopySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(4, 0.5, 1), progress.ToString()); TaskAttemptID attemptID4 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 4), 4); //adding the 5th interval between after 4th, 50MB from 350s to 400s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(5, 1, 1), progress.ToString()); TaskAttemptID attemptID5 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 5), 5); //adding the 6th interval between after 5th, 50MB from 450s to 500s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(6, 1, 1), progress.ToString()); TaskAttemptID attemptID6 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 6), 6); //adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s bytes = (long)20 * 1024 * 1024; scheduler.CopySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(7, 1, 1), progress.ToString()); TaskAttemptID attemptID7 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 7), 7); //adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s bytes = (long)30 * 1024 * 1024; scheduler.CopySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(8, 0.5, 1), progress.ToString()); TaskAttemptID attemptID8 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 8), 8); //adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s bytes = (long)50 * 1024 * 1024; scheduler.CopySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000 , output); NUnit.Framework.Assert.AreEqual(CopyMessage(9, 1, 1), progress.ToString()); TaskAttemptID attemptID9 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType .Map, 9), 9); //adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s bytes = (long)500 * 1024 * 1024; scheduler.CopySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output ); NUnit.Framework.Assert.AreEqual(CopyMessage(10, 1, 2), progress.ToString()); }
/// <summary> /// Set up the distributed cache by localizing the resources, and updating /// the configuration with references to the localized resources. /// </summary> /// <param name="conf"/> /// <exception cref="System.IO.IOException"/> public virtual void Setup(JobConf conf) { FilePath workDir = new FilePath(Runtime.GetProperty("user.dir")); // Generate YARN local resources objects corresponding to the distributed // cache configuration IDictionary <string, LocalResource> localResources = new LinkedHashMap <string, LocalResource >(); MRApps.SetupDistributedCache(conf, localResources); // Generating unique numbers for FSDownload. AtomicLong uniqueNumberGenerator = new AtomicLong(Runtime.CurrentTimeMillis()); // Find which resources are to be put on the local classpath IDictionary <string, Path> classpaths = new Dictionary <string, Path>(); Path[] archiveClassPaths = DistributedCache.GetArchiveClassPaths(conf); if (archiveClassPaths != null) { foreach (Path p in archiveClassPaths) { classpaths[p.ToUri().GetPath().ToString()] = p; } } Path[] fileClassPaths = DistributedCache.GetFileClassPaths(conf); if (fileClassPaths != null) { foreach (Path p in fileClassPaths) { classpaths[p.ToUri().GetPath().ToString()] = p; } } // Localize the resources LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LocalDir); FileContext localFSFileContext = FileContext.GetLocalFSFileContext(); UserGroupInformation ugi = UserGroupInformation.GetCurrentUser(); ExecutorService exec = null; try { ThreadFactory tf = new ThreadFactoryBuilder().SetNameFormat("LocalDistributedCacheManager Downloader #%d" ).Build(); exec = Executors.NewCachedThreadPool(tf); Path destPath = localDirAllocator.GetLocalPathForWrite(".", conf); IDictionary <LocalResource, Future <Path> > resourcesToPaths = Maps.NewHashMap(); foreach (LocalResource resource in localResources.Values) { Callable <Path> download = new FSDownload(localFSFileContext, ugi, conf, new Path( destPath, System.Convert.ToString(uniqueNumberGenerator.IncrementAndGet())), resource ); Future <Path> future = exec.Submit(download); resourcesToPaths[resource] = future; } foreach (KeyValuePair <string, LocalResource> entry in localResources) { LocalResource resource_1 = entry.Value; Path path; try { path = resourcesToPaths[resource_1].Get(); } catch (Exception e) { throw new IOException(e); } catch (ExecutionException e) { throw new IOException(e); } string pathString = path.ToUri().ToString(); string link = entry.Key; string target = new FilePath(path.ToUri()).GetPath(); Symlink(workDir, target, link); if (resource_1.GetType() == LocalResourceType.Archive) { localArchives.AddItem(pathString); } else { if (resource_1.GetType() == LocalResourceType.File) { localFiles.AddItem(pathString); } else { if (resource_1.GetType() == LocalResourceType.Pattern) { //PATTERN is not currently used in local mode throw new ArgumentException("Resource type PATTERN is not " + "implemented yet. " + resource_1.GetResource()); } } } Path resourcePath; try { resourcePath = ConverterUtils.GetPathFromYarnURL(resource_1.GetResource()); } catch (URISyntaxException e) { throw new IOException(e); } Log.Info(string.Format("Localized %s as %s", resourcePath, path)); string cp = resourcePath.ToUri().GetPath(); if (classpaths.Keys.Contains(cp)) { localClasspaths.AddItem(path.ToUri().GetPath().ToString()); } } } finally { if (exec != null) { exec.Shutdown(); } } // Update the configuration object with localized data. if (!localArchives.IsEmpty()) { conf.Set(MRJobConfig.CacheLocalarchives, StringUtils.ArrayToString(Sharpen.Collections.ToArray (localArchives, new string[localArchives.Count]))); } if (!localFiles.IsEmpty()) { conf.Set(MRJobConfig.CacheLocalfiles, StringUtils.ArrayToString(Sharpen.Collections.ToArray (localFiles, new string[localArchives.Count]))); } setupCalled = true; }
/// <exception cref="System.IO.IOException"/> public virtual int Run(string[] args) { // silence the minidfs cluster Log hadoopLog = LogFactory.GetLog("org"); if (hadoopLog is Log4JLogger) { ((Log4JLogger)hadoopLog).GetLogger().SetLevel(Level.Warn); } int reps = 1; if (args.Length == 1) { try { reps = System.Convert.ToInt32(args[0]); } catch (FormatException) { PrintUsage(); return(-1); } } else { if (args.Length > 1) { PrintUsage(); return(-1); } } Configuration conf = GetConf(); // the size of the file to write long Size = conf.GetLong("dfsthroughput.file.size", 10L * 1024 * 1024 * 1024); BufferSize = conf.GetInt("dfsthroughput.buffer.size", 4 * 1024); string localDir = conf.Get("mapred.temp.dir"); if (localDir == null) { localDir = conf.Get("hadoop.tmp.dir"); conf.Set("mapred.temp.dir", localDir); } dir = new LocalDirAllocator("mapred.temp.dir"); Runtime.SetProperty("test.build.data", localDir); System.Console.Out.WriteLine("Local = " + localDir); ChecksumFileSystem checkedLocal = FileSystem.GetLocal(conf); FileSystem rawLocal = checkedLocal.GetRawFileSystem(); for (int i = 0; i < reps; ++i) { WriteAndReadLocalFile("local", conf, Size); WriteAndReadFile(rawLocal, "raw", conf, Size); WriteAndReadFile(checkedLocal, "checked", conf, Size); } MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).Racks(new string[] { "/foo" }).Build(); cluster.WaitActive(); FileSystem dfs = cluster.GetFileSystem(); for (int i_1 = 0; i_1 < reps; ++i_1) { WriteAndReadFile(dfs, "dfs", conf, Size); } } finally { if (cluster != null) { cluster.Shutdown(); // clean up minidfs junk rawLocal.Delete(new Path(localDir, "dfs"), true); } } return(0); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.URISyntaxException"/> /// <exception cref="System.Exception"/> public virtual void TestDownload() { Configuration conf = new Configuration(); conf.Set(CommonConfigurationKeys.FsPermissionsUmaskKey, "077"); FileContext files = FileContext.GetLocalFSFileContext(conf); Path basedir = files.MakeQualified(new Path("target", typeof(TestFSDownload).Name )); files.Mkdir(basedir, null, true); conf.SetStrings(typeof(TestFSDownload).FullName, basedir.ToString()); IDictionary <LocalResource, LocalResourceVisibility> rsrcVis = new Dictionary <LocalResource , LocalResourceVisibility>(); Random rand = new Random(); long sharedSeed = rand.NextLong(); rand.SetSeed(sharedSeed); System.Console.Out.WriteLine("SEED: " + sharedSeed); IDictionary <LocalResource, Future <Path> > pending = new Dictionary <LocalResource, Future <Path> >(); ExecutorService exec = Executors.NewSingleThreadExecutor(); LocalDirAllocator dirs = new LocalDirAllocator(typeof(TestFSDownload).FullName); int[] sizes = new int[10]; for (int i = 0; i < 10; ++i) { sizes[i] = rand.Next(512) + 512; LocalResourceVisibility vis = LocalResourceVisibility.Private; if (i % 2 == 1) { vis = LocalResourceVisibility.Application; } Path p = new Path(basedir, string.Empty + i); LocalResource rsrc = CreateFile(files, p, sizes[i], rand, vis); rsrcVis[rsrc] = vis; Path destPath = dirs.GetLocalPathForWrite(basedir.ToString(), sizes[i], conf); destPath = new Path(destPath, System.Convert.ToString(uniqueNumberGenerator.IncrementAndGet ())); FSDownload fsd = new FSDownload(files, UserGroupInformation.GetCurrentUser(), conf , destPath, rsrc); pending[rsrc] = exec.Submit(fsd); } exec.Shutdown(); while (!exec.AwaitTermination(1000, TimeUnit.Milliseconds)) { } foreach (Future <Path> path in pending.Values) { NUnit.Framework.Assert.IsTrue(path.IsDone()); } try { foreach (KeyValuePair <LocalResource, Future <Path> > p in pending) { Path localized = p.Value.Get(); NUnit.Framework.Assert.AreEqual(sizes[Sharpen.Extensions.ValueOf(localized.GetName ())], p.Key.GetSize()); FileStatus status = files.GetFileStatus(localized.GetParent()); FsPermission perm = status.GetPermission(); NUnit.Framework.Assert.AreEqual("Cache directory permissions are incorrect", new FsPermission((short)0x1ed), perm); status = files.GetFileStatus(localized); perm = status.GetPermission(); System.Console.Out.WriteLine("File permission " + perm + " for rsrc vis " + p.Key .GetVisibility().ToString()); System.Diagnostics.Debug.Assert((rsrcVis.Contains(p.Key))); NUnit.Framework.Assert.IsTrue("Private file should be 500", perm.ToShort() == FSDownload .PrivateFilePerms.ToShort()); } } catch (ExecutionException e) { throw new IOException("Failed exec", e); } }