/// <exception cref="System.Exception"/> public virtual void TestSetupDistributedCache() { Configuration conf = new Configuration(); conf.SetClass("fs.mockfs.impl", typeof(TestMRApps.MockFileSystem), typeof(FileSystem )); URI mockUri = URI.Create("mockfs://mock/"); FileSystem mockFs = ((FilterFileSystem)FileSystem.Get(mockUri, conf)).GetRawFileSystem (); URI archive = new URI("mockfs://mock/tmp/something.zip"); Path archivePath = new Path(archive); URI file = new URI("mockfs://mock/tmp/something.txt#something"); Path filePath = new Path(file); Org.Mockito.Mockito.When(mockFs.ResolvePath(archivePath)).ThenReturn(archivePath); Org.Mockito.Mockito.When(mockFs.ResolvePath(filePath)).ThenReturn(filePath); DistributedCache.AddCacheArchive(archive, conf); conf.Set(MRJobConfig.CacheArchivesTimestamps, "10"); conf.Set(MRJobConfig.CacheArchivesSizes, "10"); conf.Set(MRJobConfig.CacheArchivesVisibilities, "true"); DistributedCache.AddCacheFile(file, conf); conf.Set(MRJobConfig.CacheFileTimestamps, "11"); conf.Set(MRJobConfig.CacheFilesSizes, "11"); conf.Set(MRJobConfig.CacheFileVisibilities, "true"); IDictionary <string, LocalResource> localResources = new Dictionary <string, LocalResource >(); MRApps.SetupDistributedCache(conf, localResources); NUnit.Framework.Assert.AreEqual(2, localResources.Count); LocalResource lr = localResources["something.zip"]; NUnit.Framework.Assert.IsNotNull(lr); NUnit.Framework.Assert.AreEqual(10l, lr.GetSize()); NUnit.Framework.Assert.AreEqual(10l, lr.GetTimestamp()); NUnit.Framework.Assert.AreEqual(LocalResourceType.Archive, lr.GetType()); lr = localResources["something"]; NUnit.Framework.Assert.IsNotNull(lr); NUnit.Framework.Assert.AreEqual(11l, lr.GetSize()); NUnit.Framework.Assert.AreEqual(11l, lr.GetTimestamp()); NUnit.Framework.Assert.AreEqual(LocalResourceType.File, lr.GetType()); }
/// <exception cref="System.IO.IOException"/> private static void AddMRFrameworkToDistributedCache(Configuration conf) { string framework = conf.Get(MRJobConfig.MapreduceApplicationFrameworkPath, string.Empty ); if (!framework.IsEmpty()) { URI uri; try { uri = new URI(framework); } catch (URISyntaxException e) { throw new ArgumentException("Unable to parse '" + framework + "' as a URI, check the setting for " + MRJobConfig.MapreduceApplicationFrameworkPath, e); } string linkedName = uri.GetFragment(); // resolve any symlinks in the URI path so using a "current" symlink // to point to a specific version shows the specific version // in the distributed cache configuration FileSystem fs = FileSystem.Get(conf); Path frameworkPath = fs.MakeQualified(new Path(uri.GetScheme(), uri.GetAuthority( ), uri.GetPath())); FileContext fc = FileContext.GetFileContext(frameworkPath.ToUri(), conf); frameworkPath = fc.ResolvePath(frameworkPath); uri = frameworkPath.ToUri(); try { uri = new URI(uri.GetScheme(), uri.GetAuthority(), uri.GetPath(), null, linkedName ); } catch (URISyntaxException e) { throw new ArgumentException(e); } DistributedCache.AddCacheArchive(uri, conf); } }
/// <exception cref="System.Exception"/> public virtual void TestDeprecatedFunctions() { DistributedCache.AddLocalArchives(conf, "Test Local Archives 1"); NUnit.Framework.Assert.AreEqual("Test Local Archives 1", conf.Get(DistributedCache .CacheLocalarchives)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheArchives(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Archives 1", DistributedCache.GetLocalCacheArchives (conf)[0].GetName()); DistributedCache.AddLocalArchives(conf, "Test Local Archives 2"); NUnit.Framework.Assert.AreEqual("Test Local Archives 1,Test Local Archives 2", conf .Get(DistributedCache.CacheLocalarchives)); NUnit.Framework.Assert.AreEqual(2, DistributedCache.GetLocalCacheArchives(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Archives 2", DistributedCache.GetLocalCacheArchives (conf)[1].GetName()); DistributedCache.SetLocalArchives(conf, "Test Local Archives 3"); NUnit.Framework.Assert.AreEqual("Test Local Archives 3", conf.Get(DistributedCache .CacheLocalarchives)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheArchives(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Archives 3", DistributedCache.GetLocalCacheArchives (conf)[0].GetName()); DistributedCache.AddLocalFiles(conf, "Test Local Files 1"); NUnit.Framework.Assert.AreEqual("Test Local Files 1", conf.Get(DistributedCache.CacheLocalfiles )); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheFiles(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Files 1", DistributedCache.GetLocalCacheFiles (conf)[0].GetName()); DistributedCache.AddLocalFiles(conf, "Test Local Files 2"); NUnit.Framework.Assert.AreEqual("Test Local Files 1,Test Local Files 2", conf.Get (DistributedCache.CacheLocalfiles)); NUnit.Framework.Assert.AreEqual(2, DistributedCache.GetLocalCacheFiles(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Files 2", DistributedCache.GetLocalCacheFiles (conf)[1].GetName()); DistributedCache.SetLocalFiles(conf, "Test Local Files 3"); NUnit.Framework.Assert.AreEqual("Test Local Files 3", conf.Get(DistributedCache.CacheLocalfiles )); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheFiles(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Files 3", DistributedCache.GetLocalCacheFiles (conf)[0].GetName()); DistributedCache.SetArchiveTimestamps(conf, "1234567890"); NUnit.Framework.Assert.AreEqual(1234567890, conf.GetLong(DistributedCache.CacheArchivesTimestamps , 0)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetArchiveTimestamps(conf).Length ); NUnit.Framework.Assert.AreEqual(1234567890, DistributedCache.GetArchiveTimestamps (conf)[0]); DistributedCache.SetFileTimestamps(conf, "1234567890"); NUnit.Framework.Assert.AreEqual(1234567890, conf.GetLong(DistributedCache.CacheFilesTimestamps , 0)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetFileTimestamps(conf).Length ); NUnit.Framework.Assert.AreEqual(1234567890, DistributedCache.GetFileTimestamps(conf )[0]); DistributedCache.CreateAllSymlink(conf, new FilePath("Test Job Cache Dir"), new FilePath ("Test Work Dir")); NUnit.Framework.Assert.IsNull(conf.Get(DistributedCache.CacheSymlink)); NUnit.Framework.Assert.IsTrue(DistributedCache.GetSymlink(conf)); NUnit.Framework.Assert.IsTrue(symlinkFile.CreateNewFile()); FileStatus fileStatus = DistributedCache.GetFileStatus(conf, symlinkFile.ToURI()); NUnit.Framework.Assert.IsNotNull(fileStatus); NUnit.Framework.Assert.AreEqual(fileStatus.GetModificationTime(), DistributedCache .GetTimestamp(conf, symlinkFile.ToURI())); NUnit.Framework.Assert.IsTrue(symlinkFile.Delete()); DistributedCache.AddCacheArchive(symlinkFile.ToURI(), conf); NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI().ToString(), conf.Get(DistributedCache .CacheArchives)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetCacheArchives(conf).Length ); NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI(), DistributedCache.GetCacheArchives (conf)[0]); DistributedCache.AddCacheFile(symlinkFile.ToURI(), conf); NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI().ToString(), conf.Get(DistributedCache .CacheFiles)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetCacheFiles(conf).Length); NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI(), DistributedCache.GetCacheFiles (conf)[0]); }
/// <summary> /// Upload and configure files, libjars, jobjars, and archives pertaining to /// the passed job. /// </summary> /// <param name="job">the job containing the files to be uploaded</param> /// <param name="submitJobDir">the submission directory of the job</param> /// <exception cref="System.IO.IOException"/> public virtual void UploadFiles(Job job, Path submitJobDir) { Configuration conf = job.GetConfiguration(); short replication = (short)conf.GetInt(Job.SubmitReplication, Job.DefaultSubmitReplication ); if (!(conf.GetBoolean(Job.UsedGenericParser, false))) { Log.Warn("Hadoop command-line option parsing not performed. " + "Implement the Tool interface and execute your application " + "with ToolRunner to remedy this."); } // get all the command line arguments passed in by the user conf string files = conf.Get("tmpfiles"); string libjars = conf.Get("tmpjars"); string archives = conf.Get("tmparchives"); string jobJar = job.GetJar(); // // Figure out what fs the JobTracker is using. Copy the // job to it, under a temporary name. This allows DFS to work, // and under the local fs also provides UNIX-like object loading // semantics. (that is, if the job file is deleted right after // submission, we can still run the submission to completion) // // Create a number of filenames in the JobTracker's fs namespace Log.Debug("default FileSystem: " + jtFs.GetUri()); if (jtFs.Exists(submitJobDir)) { throw new IOException("Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in" + " that directory"); } submitJobDir = jtFs.MakeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.ToUri().GetPath()); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission ); FileSystem.Mkdirs(jtFs, submitJobDir, mapredSysPerms); Path filesDir = JobSubmissionFiles.GetJobDistCacheFiles(submitJobDir); Path archivesDir = JobSubmissionFiles.GetJobDistCacheArchives(submitJobDir); Path libjarsDir = JobSubmissionFiles.GetJobDistCacheLibjars(submitJobDir); // add all the command line files/ jars and archive // first copy them to jobtrackers filesystem if (files != null) { FileSystem.Mkdirs(jtFs, filesDir, mapredSysPerms); string[] fileArr = files.Split(","); foreach (string tmpFile in fileArr) { URI tmpURI = null; try { tmpURI = new URI(tmpFile); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(filesDir, tmp, conf, replication); try { URI pathURI = GetPathURI(newPath, tmpURI.GetFragment()); DistributedCache.AddCacheFile(pathURI, conf); } catch (URISyntaxException ue) { // should not throw a uri exception throw new IOException("Failed to create uri for " + tmpFile, ue); } } } if (libjars != null) { FileSystem.Mkdirs(jtFs, libjarsDir, mapredSysPerms); string[] libjarsArr = libjars.Split(","); foreach (string tmpjars in libjarsArr) { Path tmp = new Path(tmpjars); Path newPath = CopyRemoteFiles(libjarsDir, tmp, conf, replication); DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf, jtFs ); } } if (archives != null) { FileSystem.Mkdirs(jtFs, archivesDir, mapredSysPerms); string[] archivesArr = archives.Split(","); foreach (string tmpArchives in archivesArr) { URI tmpURI; try { tmpURI = new URI(tmpArchives); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(archivesDir, tmp, conf, replication); try { URI pathURI = GetPathURI(newPath, tmpURI.GetFragment()); DistributedCache.AddCacheArchive(pathURI, conf); } catch (URISyntaxException ue) { // should not throw an uri excpetion throw new IOException("Failed to create uri for " + tmpArchives, ue); } } } if (jobJar != null) { // copy jar to JobTracker's fs // use jar name if job is not named. if (string.Empty.Equals(job.GetJobName())) { job.SetJobName(new Path(jobJar).GetName()); } Path jobJarPath = new Path(jobJar); URI jobJarURI = jobJarPath.ToUri(); // If the job jar is already in a global fs, // we don't need to copy it from local fs if (jobJarURI.GetScheme() == null || jobJarURI.GetScheme().Equals("file")) { CopyJar(jobJarPath, JobSubmissionFiles.GetJobJar(submitJobDir), replication); job.SetJar(JobSubmissionFiles.GetJobJar(submitJobDir).ToString()); } } else { Log.Warn("No job jar file set. User classes may not be found. " + "See Job or Job#setJar(String)." ); } AddLog4jToDistributedCache(job, submitJobDir); // set the timestamps of the archives and files // set the public/private visibility of the archives and files ClientDistributedCacheManager.DetermineTimestampsAndCacheVisibilities(conf); // get DelegationToken for cached file ClientDistributedCacheManager.GetDelegationTokens(conf, job.GetCredentials()); }
/// <exception cref="System.IO.IOException"/> public static MRCaching.TestResult LaunchMRCache(string indir, string outdir, string cacheDir, JobConf conf, string input) { string TestRootDir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).ToString ().Replace(' ', '+'); //if (TEST_ROOT_DIR.startsWith("C:")) TEST_ROOT_DIR = "/tmp"; conf.Set("test.build.data", TestRootDir); Path inDir = new Path(indir); Path outDir = new Path(outdir); FileSystem fs = FileSystem.Get(conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { System.Console.Out.WriteLine("HERE:" + inDir); DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); } conf.SetJobName("cachetest"); // the keys are words (strings) conf.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) conf.SetOutputValueClass(typeof(IntWritable)); conf.SetCombinerClass(typeof(MRCaching.ReduceClass)); conf.SetReducerClass(typeof(MRCaching.ReduceClass)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(1); conf.SetNumReduceTasks(1); conf.SetSpeculativeExecution(false); URI[] uris = new URI[6]; conf.SetMapperClass(typeof(MRCaching.MapClass2)); uris[0] = fs.GetUri().Resolve(cacheDir + "/test.txt"); uris[1] = fs.GetUri().Resolve(cacheDir + "/test.jar"); uris[2] = fs.GetUri().Resolve(cacheDir + "/test.zip"); uris[3] = fs.GetUri().Resolve(cacheDir + "/test.tgz"); uris[4] = fs.GetUri().Resolve(cacheDir + "/test.tar.gz"); uris[5] = fs.GetUri().Resolve(cacheDir + "/test.tar"); DistributedCache.AddCacheFile(uris[0], conf); // Save expected file sizes long[] fileSizes = new long[1]; fileSizes[0] = fs.GetFileStatus(new Path(uris[0].GetPath())).GetLen(); long[] archiveSizes = new long[5]; // track last 5 for (int i = 1; i < 6; i++) { DistributedCache.AddCacheArchive(uris[i], conf); archiveSizes[i - 1] = fs.GetFileStatus(new Path(uris[i].GetPath())).GetLen(); } // starting with second archive RunningJob job = JobClient.RunJob(conf); int count = 0; // after the job ran check to see if the input from the localized cache // match the real string. check if there are 3 instances or not. Path result = new Path(TestRootDir + "/test.txt"); { BufferedReader file = new BufferedReader(new InputStreamReader(FileSystem.GetLocal (conf).Open(result))); string line = file.ReadLine(); while (line != null) { if (!testStr.Equals(line)) { return(new MRCaching.TestResult(job, false)); } count++; line = file.ReadLine(); } file.Close(); } if (count != 6) { return(new MRCaching.TestResult(job, false)); } // Check to ensure the filesizes of files in DC were correctly saved. // Note, the underlying job clones the original conf before determine // various stats (timestamps etc.), so we have to getConfiguration here. ValidateCacheFileSizes(job.GetConfiguration(), fileSizes, MRJobConfig.CacheFilesSizes ); ValidateCacheFileSizes(job.GetConfiguration(), archiveSizes, MRJobConfig.CacheArchivesSizes ); return(new MRCaching.TestResult(job, true)); }