/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="Sharpen.URISyntaxException"/> private void TestWithConf(Configuration conf) { // Create a temporary file of length 1. Path first = CreateTempFile("distributed.first", "x"); // Create two jars with a single file inside them. Path second = MakeJar(new Path(TestRootDir, "distributed.second.jar"), 2); Path third = MakeJar(new Path(TestRootDir, "distributed.third.jar"), 3); Path fourth = MakeJar(new Path(TestRootDir, "distributed.fourth.jar"), 4); Job job = Job.GetInstance(conf); job.SetMapperClass(typeof(TestMRWithDistributedCache.DistributedCacheCheckerMapper )); job.SetReducerClass(typeof(TestMRWithDistributedCache.DistributedCacheCheckerReducer )); job.SetOutputFormatClass(typeof(NullOutputFormat)); FileInputFormat.SetInputPaths(job, first); // Creates the Job Configuration job.AddCacheFile(new URI(first.ToUri().ToString() + "#distributed.first.symlink") ); job.AddFileToClassPath(second); job.AddArchiveToClassPath(third); job.AddCacheArchive(fourth.ToUri()); job.SetMaxMapAttempts(1); // speed up failures job.Submit(); NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(false)); }
public virtual void TestDetermineTimestamps() { Job job = Job.GetInstance(conf); job.AddCacheFile(firstCacheFile.ToUri()); job.AddCacheFile(secondCacheFile.ToUri()); Configuration jobConf = job.GetConfiguration(); IDictionary <URI, FileStatus> statCache = new Dictionary <URI, FileStatus>(); ClientDistributedCacheManager.DetermineTimestamps(jobConf, statCache); FileStatus firstStatus = statCache[firstCacheFile.ToUri()]; FileStatus secondStatus = statCache[secondCacheFile.ToUri()]; NUnit.Framework.Assert.IsNotNull(firstStatus); NUnit.Framework.Assert.IsNotNull(secondStatus); NUnit.Framework.Assert.AreEqual(2, statCache.Count); string expected = firstStatus.GetModificationTime() + "," + secondStatus.GetModificationTime (); NUnit.Framework.Assert.AreEqual(expected, jobConf.Get(MRJobConfig.CacheFileTimestamps )); }
/// <exception cref="System.Exception"/> public virtual void _testDistributedCache(string jobJarPath) { if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists()) { Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test." ); return; } // Create a temporary file of length 1. Path first = CreateTempFile("distributed.first", "x"); // Create two jars with a single file inside them. Path second = MakeJar(new Path(TestRootDir, "distributed.second.jar"), 2); Path third = MakeJar(new Path(TestRootDir, "distributed.third.jar"), 3); Path fourth = MakeJar(new Path(TestRootDir, "distributed.fourth.jar"), 4); Job job = Job.GetInstance(mrCluster.GetConfig()); // Set the job jar to a new "dummy" jar so we can check that its extracted // properly job.SetJar(jobJarPath); // Because the job jar is a "dummy" jar, we need to include the jar with // DistributedCacheChecker or it won't be able to find it Path distributedCacheCheckerJar = new Path(JarFinder.GetJar(typeof(TestMRJobs.DistributedCacheChecker ))); job.AddFileToClassPath(distributedCacheCheckerJar.MakeQualified(localFs.GetUri(), distributedCacheCheckerJar.GetParent())); job.SetMapperClass(typeof(TestMRJobs.DistributedCacheChecker)); job.SetOutputFormatClass(typeof(NullOutputFormat)); FileInputFormat.SetInputPaths(job, first); // Creates the Job Configuration job.AddCacheFile(new URI(first.ToUri().ToString() + "#distributed.first.symlink") ); job.AddFileToClassPath(second); // The AppMaster jar itself job.AddFileToClassPath(AppJar.MakeQualified(localFs.GetUri(), AppJar.GetParent()) ); job.AddArchiveToClassPath(third); job.AddCacheArchive(fourth.ToUri()); job.SetMaxMapAttempts(1); // speed up failures job.Submit(); string trackingUrl = job.GetTrackingURL(); string jobId = job.GetJobID().ToString(); NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(false)); NUnit.Framework.Assert.IsTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.EndsWith(Sharpen.Runtime.Substring(jobId, jobId.LastIndexOf ("_")) + "/")); }
/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { Log.Info("starting"); Job job = Job.GetInstance(GetConf()); Path inputDir = new Path(args[0]); Path outputDir = new Path(args[1]); bool useSimplePartitioner = GetUseSimplePartitioner(job); TeraInputFormat.SetInputPaths(job, inputDir); FileOutputFormat.SetOutputPath(job, outputDir); job.SetJobName("TeraSort"); job.SetJarByClass(typeof(TeraSort)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); job.SetInputFormatClass(typeof(TeraInputFormat)); job.SetOutputFormatClass(typeof(TeraOutputFormat)); if (useSimplePartitioner) { job.SetPartitionerClass(typeof(TeraSort.SimplePartitioner)); } else { long start = Runtime.CurrentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PartitionFilename); URI partitionUri = new URI(partitionFile.ToString() + "#" + TeraInputFormat.PartitionFilename ); try { TeraInputFormat.WritePartitionFile(job, partitionFile); } catch (Exception e) { Log.Error(e.Message); return(-1); } job.AddCacheFile(partitionUri); long end = Runtime.CurrentTimeMillis(); System.Console.Out.WriteLine("Spent " + (end - start) + "ms computing partitions." ); job.SetPartitionerClass(typeof(TeraSort.TotalOrderPartitioner)); } job.GetConfiguration().SetInt("dfs.replication", GetOutputReplication(job)); TeraOutputFormat.SetFinalSync(job, true); int ret = job.WaitForCompletion(true) ? 0 : 1; Log.Info("done"); return(ret); }