/// <exception cref="System.Exception"/> public virtual void TestSetupDistributedCacheConflictsFiles() { Configuration conf = new Configuration(); conf.SetClass("fs.mockfs.impl", typeof(TestMRApps.MockFileSystem), typeof(FileSystem )); URI mockUri = URI.Create("mockfs://mock/"); FileSystem mockFs = ((FilterFileSystem)FileSystem.Get(mockUri, conf)).GetRawFileSystem (); URI file = new URI("mockfs://mock/tmp/something.zip#something"); Path filePath = new Path(file); URI file2 = new URI("mockfs://mock/tmp/something.txt#something"); Path file2Path = new Path(file2); Org.Mockito.Mockito.When(mockFs.ResolvePath(filePath)).ThenReturn(filePath); Org.Mockito.Mockito.When(mockFs.ResolvePath(file2Path)).ThenReturn(file2Path); DistributedCache.AddCacheFile(file, conf); DistributedCache.AddCacheFile(file2, conf); conf.Set(MRJobConfig.CacheFileTimestamps, "10,11"); conf.Set(MRJobConfig.CacheFilesSizes, "10,11"); conf.Set(MRJobConfig.CacheFileVisibilities, "true,true"); IDictionary <string, LocalResource> localResources = new Dictionary <string, LocalResource >(); MRApps.SetupDistributedCache(conf, localResources); NUnit.Framework.Assert.AreEqual(1, localResources.Count); LocalResource lr = localResources["something"]; //First one wins NUnit.Framework.Assert.IsNotNull(lr); NUnit.Framework.Assert.AreEqual(10l, lr.GetSize()); NUnit.Framework.Assert.AreEqual(10l, lr.GetTimestamp()); NUnit.Framework.Assert.AreEqual(LocalResourceType.File, lr.GetType()); }
public virtual void TestDuplicateDownload() { JobConf conf = new JobConf(); conf.SetClass("fs.mock.impl", typeof(TestLocalDistributedCacheManager.MockFileSystem ), typeof(FileSystem)); URI mockBase = new URI("mock://test-nn1/"); Org.Mockito.Mockito.When(mockfs.GetUri()).ThenReturn(mockBase); Path working = new Path("mock://test-nn1/user/me/"); Org.Mockito.Mockito.When(mockfs.GetWorkingDirectory()).ThenReturn(working); Org.Mockito.Mockito.When(mockfs.ResolvePath(Matchers.Any <Path>())).ThenAnswer(new _Answer_234()); URI file = new URI("mock://test-nn1/user/me/file.txt#link"); Path filePath = new Path(file); FilePath link = new FilePath("link"); Org.Mockito.Mockito.When(mockfs.GetFileStatus(Matchers.Any <Path>())).ThenAnswer(new _Answer_245(filePath)); Org.Mockito.Mockito.When(mockfs.GetConf()).ThenReturn(conf); FSDataInputStream @in = new FSDataInputStream(new TestLocalDistributedCacheManager.MockInputStream (Sharpen.Runtime.GetBytesForString("This is a test file\n"))); Org.Mockito.Mockito.When(mockfs.Open(Matchers.Any <Path>(), Matchers.AnyInt())).ThenAnswer (new _Answer_261(@in)); DistributedCache.AddCacheFile(file, conf); DistributedCache.AddCacheFile(file, conf); conf.Set(MRJobConfig.CacheFileTimestamps, "101,101"); conf.Set(MRJobConfig.CacheFilesSizes, "201,201"); conf.Set(MRJobConfig.CacheFileVisibilities, "false,false"); conf.Set(MRConfig.LocalDir, localDir.GetAbsolutePath()); LocalDistributedCacheManager manager = new LocalDistributedCacheManager(); try { manager.Setup(conf); NUnit.Framework.Assert.IsTrue(link.Exists()); } finally { manager.Close(); } NUnit.Framework.Assert.IsFalse(link.Exists()); }
/// <exception cref="System.Exception"/> public virtual void TestDeprecatedFunctions() { DistributedCache.AddLocalArchives(conf, "Test Local Archives 1"); NUnit.Framework.Assert.AreEqual("Test Local Archives 1", conf.Get(DistributedCache .CacheLocalarchives)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheArchives(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Archives 1", DistributedCache.GetLocalCacheArchives (conf)[0].GetName()); DistributedCache.AddLocalArchives(conf, "Test Local Archives 2"); NUnit.Framework.Assert.AreEqual("Test Local Archives 1,Test Local Archives 2", conf .Get(DistributedCache.CacheLocalarchives)); NUnit.Framework.Assert.AreEqual(2, DistributedCache.GetLocalCacheArchives(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Archives 2", DistributedCache.GetLocalCacheArchives (conf)[1].GetName()); DistributedCache.SetLocalArchives(conf, "Test Local Archives 3"); NUnit.Framework.Assert.AreEqual("Test Local Archives 3", conf.Get(DistributedCache .CacheLocalarchives)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheArchives(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Archives 3", DistributedCache.GetLocalCacheArchives (conf)[0].GetName()); DistributedCache.AddLocalFiles(conf, "Test Local Files 1"); NUnit.Framework.Assert.AreEqual("Test Local Files 1", conf.Get(DistributedCache.CacheLocalfiles )); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheFiles(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Files 1", DistributedCache.GetLocalCacheFiles (conf)[0].GetName()); DistributedCache.AddLocalFiles(conf, "Test Local Files 2"); NUnit.Framework.Assert.AreEqual("Test Local Files 1,Test Local Files 2", conf.Get (DistributedCache.CacheLocalfiles)); NUnit.Framework.Assert.AreEqual(2, DistributedCache.GetLocalCacheFiles(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Files 2", DistributedCache.GetLocalCacheFiles (conf)[1].GetName()); DistributedCache.SetLocalFiles(conf, "Test Local Files 3"); NUnit.Framework.Assert.AreEqual("Test Local Files 3", conf.Get(DistributedCache.CacheLocalfiles )); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheFiles(conf).Length ); NUnit.Framework.Assert.AreEqual("Test Local Files 3", DistributedCache.GetLocalCacheFiles (conf)[0].GetName()); DistributedCache.SetArchiveTimestamps(conf, "1234567890"); NUnit.Framework.Assert.AreEqual(1234567890, conf.GetLong(DistributedCache.CacheArchivesTimestamps , 0)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetArchiveTimestamps(conf).Length ); NUnit.Framework.Assert.AreEqual(1234567890, DistributedCache.GetArchiveTimestamps (conf)[0]); DistributedCache.SetFileTimestamps(conf, "1234567890"); NUnit.Framework.Assert.AreEqual(1234567890, conf.GetLong(DistributedCache.CacheFilesTimestamps , 0)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetFileTimestamps(conf).Length ); NUnit.Framework.Assert.AreEqual(1234567890, DistributedCache.GetFileTimestamps(conf )[0]); DistributedCache.CreateAllSymlink(conf, new FilePath("Test Job Cache Dir"), new FilePath ("Test Work Dir")); NUnit.Framework.Assert.IsNull(conf.Get(DistributedCache.CacheSymlink)); NUnit.Framework.Assert.IsTrue(DistributedCache.GetSymlink(conf)); NUnit.Framework.Assert.IsTrue(symlinkFile.CreateNewFile()); FileStatus fileStatus = DistributedCache.GetFileStatus(conf, symlinkFile.ToURI()); NUnit.Framework.Assert.IsNotNull(fileStatus); NUnit.Framework.Assert.AreEqual(fileStatus.GetModificationTime(), DistributedCache .GetTimestamp(conf, symlinkFile.ToURI())); NUnit.Framework.Assert.IsTrue(symlinkFile.Delete()); DistributedCache.AddCacheArchive(symlinkFile.ToURI(), conf); NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI().ToString(), conf.Get(DistributedCache .CacheArchives)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetCacheArchives(conf).Length ); NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI(), DistributedCache.GetCacheArchives (conf)[0]); DistributedCache.AddCacheFile(symlinkFile.ToURI(), conf); NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI().ToString(), conf.Get(DistributedCache .CacheFiles)); NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetCacheFiles(conf).Length); NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI(), DistributedCache.GetCacheFiles (conf)[0]); }
/// <summary>The main driver for sort program.</summary> /// <remarks> /// The main driver for sort program. /// Invoke this method to submit the map/reduce job. /// </remarks> /// <exception cref="System.IO.IOException"> /// When there is communication problems with the /// job tracker. /// </exception> /// <exception cref="System.Exception"/> public virtual int Run(string[] args) { Configuration conf = GetConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.GetClusterStatus(); int num_reduces = (int)(cluster.GetMaxReduceTasks() * 0.9); string sort_reduces = conf.Get(ReducesPerHost); if (sort_reduces != null) { num_reduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sort_reduces); } Type inputFormatClass = typeof(SequenceFileInputFormat); Type outputFormatClass = typeof(SequenceFileOutputFormat); Type outputKeyClass = typeof(BytesWritable); Type outputValueClass = typeof(BytesWritable); IList <string> otherArgs = new AList <string>(); InputSampler.Sampler <K, V> sampler = null; for (int i = 0; i < args.Length; ++i) { try { if ("-r".Equals(args[i])) { num_reduces = System.Convert.ToInt32(args[++i]); } else { if ("-inFormat".Equals(args[i])) { inputFormatClass = Sharpen.Runtime.GetType(args[++i]).AsSubclass <InputFormat>(); } else { if ("-outFormat".Equals(args[i])) { outputFormatClass = Sharpen.Runtime.GetType(args[++i]).AsSubclass <OutputFormat>(); } else { if ("-outKey".Equals(args[i])) { outputKeyClass = Sharpen.Runtime.GetType(args[++i]).AsSubclass <WritableComparable >(); } else { if ("-outValue".Equals(args[i])) { outputValueClass = Sharpen.Runtime.GetType(args[++i]).AsSubclass <Writable>(); } else { if ("-totalOrder".Equals(args[i])) { double pcnt = double.ParseDouble(args[++i]); int numSamples = System.Convert.ToInt32(args[++i]); int maxSplits = System.Convert.ToInt32(args[++i]); if (0 >= maxSplits) { maxSplits = int.MaxValue; } sampler = new InputSampler.RandomSampler <K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.AddItem(args[i]); } } } } } } } catch (FormatException) { System.Console.Out.WriteLine("ERROR: Integer expected instead of " + args[i]); return(PrintUsage()); } catch (IndexOutOfRangeException) { System.Console.Out.WriteLine("ERROR: Required parameter missing from " + args[i - 1]); return(PrintUsage()); } } // exits // Set user-supplied (possibly default) job configs job = Job.GetInstance(conf); job.SetJobName("sorter"); job.SetJarByClass(typeof(Sort)); job.SetMapperClass(typeof(Mapper)); job.SetReducerClass(typeof(Reducer)); job.SetNumReduceTasks(num_reduces); job.SetInputFormatClass(inputFormatClass); job.SetOutputFormatClass(outputFormatClass); job.SetOutputKeyClass(outputKeyClass); job.SetOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.Count != 2) { System.Console.Out.WriteLine("ERROR: Wrong number of parameters: " + otherArgs.Count + " instead of 2."); return(PrintUsage()); } FileInputFormat.SetInputPaths(job, otherArgs[0]); FileOutputFormat.SetOutputPath(job, new Path(otherArgs[1])); if (sampler != null) { System.Console.Out.WriteLine("Sampling input to effect total-order sort..."); job.SetPartitionerClass(typeof(TotalOrderPartitioner)); Path inputDir = FileInputFormat.GetInputPaths(job)[0]; inputDir = inputDir.MakeQualified(inputDir.GetFileSystem(conf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.SetPartitionFile(conf, partitionFile); InputSampler.WritePartitionFile <K, V>(job, sampler); URI partitionUri = new URI(partitionFile.ToString() + "#" + "_sortPartitioning"); DistributedCache.AddCacheFile(partitionUri, conf); } System.Console.Out.WriteLine("Running on " + cluster.GetTaskTrackers() + " nodes to sort from " + FileInputFormat.GetInputPaths(job)[0] + " into " + FileOutputFormat.GetOutputPath (job) + " with " + num_reduces + " reduces."); DateTime startTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); int ret = job.WaitForCompletion(true) ? 0 : 1; DateTime end_time = new DateTime(); System.Console.Out.WriteLine("Job ended: " + end_time); System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime ()) / 1000 + " seconds."); return(ret); }
/// <summary> /// Upload and configure files, libjars, jobjars, and archives pertaining to /// the passed job. /// </summary> /// <param name="job">the job containing the files to be uploaded</param> /// <param name="submitJobDir">the submission directory of the job</param> /// <exception cref="System.IO.IOException"/> public virtual void UploadFiles(Job job, Path submitJobDir) { Configuration conf = job.GetConfiguration(); short replication = (short)conf.GetInt(Job.SubmitReplication, Job.DefaultSubmitReplication ); if (!(conf.GetBoolean(Job.UsedGenericParser, false))) { Log.Warn("Hadoop command-line option parsing not performed. " + "Implement the Tool interface and execute your application " + "with ToolRunner to remedy this."); } // get all the command line arguments passed in by the user conf string files = conf.Get("tmpfiles"); string libjars = conf.Get("tmpjars"); string archives = conf.Get("tmparchives"); string jobJar = job.GetJar(); // // Figure out what fs the JobTracker is using. Copy the // job to it, under a temporary name. This allows DFS to work, // and under the local fs also provides UNIX-like object loading // semantics. (that is, if the job file is deleted right after // submission, we can still run the submission to completion) // // Create a number of filenames in the JobTracker's fs namespace Log.Debug("default FileSystem: " + jtFs.GetUri()); if (jtFs.Exists(submitJobDir)) { throw new IOException("Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in" + " that directory"); } submitJobDir = jtFs.MakeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.ToUri().GetPath()); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission ); FileSystem.Mkdirs(jtFs, submitJobDir, mapredSysPerms); Path filesDir = JobSubmissionFiles.GetJobDistCacheFiles(submitJobDir); Path archivesDir = JobSubmissionFiles.GetJobDistCacheArchives(submitJobDir); Path libjarsDir = JobSubmissionFiles.GetJobDistCacheLibjars(submitJobDir); // add all the command line files/ jars and archive // first copy them to jobtrackers filesystem if (files != null) { FileSystem.Mkdirs(jtFs, filesDir, mapredSysPerms); string[] fileArr = files.Split(","); foreach (string tmpFile in fileArr) { URI tmpURI = null; try { tmpURI = new URI(tmpFile); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(filesDir, tmp, conf, replication); try { URI pathURI = GetPathURI(newPath, tmpURI.GetFragment()); DistributedCache.AddCacheFile(pathURI, conf); } catch (URISyntaxException ue) { // should not throw a uri exception throw new IOException("Failed to create uri for " + tmpFile, ue); } } } if (libjars != null) { FileSystem.Mkdirs(jtFs, libjarsDir, mapredSysPerms); string[] libjarsArr = libjars.Split(","); foreach (string tmpjars in libjarsArr) { Path tmp = new Path(tmpjars); Path newPath = CopyRemoteFiles(libjarsDir, tmp, conf, replication); DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf, jtFs ); } } if (archives != null) { FileSystem.Mkdirs(jtFs, archivesDir, mapredSysPerms); string[] archivesArr = archives.Split(","); foreach (string tmpArchives in archivesArr) { URI tmpURI; try { tmpURI = new URI(tmpArchives); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(archivesDir, tmp, conf, replication); try { URI pathURI = GetPathURI(newPath, tmpURI.GetFragment()); DistributedCache.AddCacheArchive(pathURI, conf); } catch (URISyntaxException ue) { // should not throw an uri excpetion throw new IOException("Failed to create uri for " + tmpArchives, ue); } } } if (jobJar != null) { // copy jar to JobTracker's fs // use jar name if job is not named. if (string.Empty.Equals(job.GetJobName())) { job.SetJobName(new Path(jobJar).GetName()); } Path jobJarPath = new Path(jobJar); URI jobJarURI = jobJarPath.ToUri(); // If the job jar is already in a global fs, // we don't need to copy it from local fs if (jobJarURI.GetScheme() == null || jobJarURI.GetScheme().Equals("file")) { CopyJar(jobJarPath, JobSubmissionFiles.GetJobJar(submitJobDir), replication); job.SetJar(JobSubmissionFiles.GetJobJar(submitJobDir).ToString()); } } else { Log.Warn("No job jar file set. User classes may not be found. " + "See Job or Job#setJar(String)." ); } AddLog4jToDistributedCache(job, submitJobDir); // set the timestamps of the archives and files // set the public/private visibility of the archives and files ClientDistributedCacheManager.DetermineTimestampsAndCacheVisibilities(conf); // get DelegationToken for cached file ClientDistributedCacheManager.GetDelegationTokens(conf, job.GetCredentials()); }
/// <exception cref="System.IO.IOException"/> public static MRCaching.TestResult LaunchMRCache(string indir, string outdir, string cacheDir, JobConf conf, string input) { string TestRootDir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).ToString ().Replace(' ', '+'); //if (TEST_ROOT_DIR.startsWith("C:")) TEST_ROOT_DIR = "/tmp"; conf.Set("test.build.data", TestRootDir); Path inDir = new Path(indir); Path outDir = new Path(outdir); FileSystem fs = FileSystem.Get(conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { System.Console.Out.WriteLine("HERE:" + inDir); DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); } conf.SetJobName("cachetest"); // the keys are words (strings) conf.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) conf.SetOutputValueClass(typeof(IntWritable)); conf.SetCombinerClass(typeof(MRCaching.ReduceClass)); conf.SetReducerClass(typeof(MRCaching.ReduceClass)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(1); conf.SetNumReduceTasks(1); conf.SetSpeculativeExecution(false); URI[] uris = new URI[6]; conf.SetMapperClass(typeof(MRCaching.MapClass2)); uris[0] = fs.GetUri().Resolve(cacheDir + "/test.txt"); uris[1] = fs.GetUri().Resolve(cacheDir + "/test.jar"); uris[2] = fs.GetUri().Resolve(cacheDir + "/test.zip"); uris[3] = fs.GetUri().Resolve(cacheDir + "/test.tgz"); uris[4] = fs.GetUri().Resolve(cacheDir + "/test.tar.gz"); uris[5] = fs.GetUri().Resolve(cacheDir + "/test.tar"); DistributedCache.AddCacheFile(uris[0], conf); // Save expected file sizes long[] fileSizes = new long[1]; fileSizes[0] = fs.GetFileStatus(new Path(uris[0].GetPath())).GetLen(); long[] archiveSizes = new long[5]; // track last 5 for (int i = 1; i < 6; i++) { DistributedCache.AddCacheArchive(uris[i], conf); archiveSizes[i - 1] = fs.GetFileStatus(new Path(uris[i].GetPath())).GetLen(); } // starting with second archive RunningJob job = JobClient.RunJob(conf); int count = 0; // after the job ran check to see if the input from the localized cache // match the real string. check if there are 3 instances or not. Path result = new Path(TestRootDir + "/test.txt"); { BufferedReader file = new BufferedReader(new InputStreamReader(FileSystem.GetLocal (conf).Open(result))); string line = file.ReadLine(); while (line != null) { if (!testStr.Equals(line)) { return(new MRCaching.TestResult(job, false)); } count++; line = file.ReadLine(); } file.Close(); } if (count != 6) { return(new MRCaching.TestResult(job, false)); } // Check to ensure the filesizes of files in DC were correctly saved. // Note, the underlying job clones the original conf before determine // various stats (timestamps etc.), so we have to getConfiguration here. ValidateCacheFileSizes(job.GetConfiguration(), fileSizes, MRJobConfig.CacheFilesSizes ); ValidateCacheFileSizes(job.GetConfiguration(), archiveSizes, MRJobConfig.CacheArchivesSizes ); return(new MRCaching.TestResult(job, true)); }