/// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="Sharpen.URISyntaxException"/>
        private void TestWithConf(Configuration conf)
        {
            // Create a temporary file of length 1.
            Path first = CreateTempFile("distributed.first", "x");
            // Create two jars with a single file inside them.
            Path second = MakeJar(new Path(TestRootDir, "distributed.second.jar"), 2);
            Path third  = MakeJar(new Path(TestRootDir, "distributed.third.jar"), 3);
            Path fourth = MakeJar(new Path(TestRootDir, "distributed.fourth.jar"), 4);
            Job  job    = Job.GetInstance(conf);

            job.SetMapperClass(typeof(TestMRWithDistributedCache.DistributedCacheCheckerMapper
                                      ));
            job.SetReducerClass(typeof(TestMRWithDistributedCache.DistributedCacheCheckerReducer
                                       ));
            job.SetOutputFormatClass(typeof(NullOutputFormat));
            FileInputFormat.SetInputPaths(job, first);
            // Creates the Job Configuration
            job.AddCacheFile(new URI(first.ToUri().ToString() + "#distributed.first.symlink")
                             );
            job.AddFileToClassPath(second);
            job.AddArchiveToClassPath(third);
            job.AddCacheArchive(fourth.ToUri());
            job.SetMaxMapAttempts(1);
            // speed up failures
            job.Submit();
            NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(false));
        }
示例#2
0
        public virtual void TestDetermineTimestamps()
        {
            Job job = Job.GetInstance(conf);

            job.AddCacheFile(firstCacheFile.ToUri());
            job.AddCacheFile(secondCacheFile.ToUri());
            Configuration jobConf = job.GetConfiguration();
            IDictionary <URI, FileStatus> statCache = new Dictionary <URI, FileStatus>();

            ClientDistributedCacheManager.DetermineTimestamps(jobConf, statCache);
            FileStatus firstStatus  = statCache[firstCacheFile.ToUri()];
            FileStatus secondStatus = statCache[secondCacheFile.ToUri()];

            NUnit.Framework.Assert.IsNotNull(firstStatus);
            NUnit.Framework.Assert.IsNotNull(secondStatus);
            NUnit.Framework.Assert.AreEqual(2, statCache.Count);
            string expected = firstStatus.GetModificationTime() + "," + secondStatus.GetModificationTime
                                  ();

            NUnit.Framework.Assert.AreEqual(expected, jobConf.Get(MRJobConfig.CacheFileTimestamps
                                                                  ));
        }
示例#3
0
        /// <exception cref="System.Exception"/>
        public virtual void _testDistributedCache(string jobJarPath)
        {
            if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists())
            {
                Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test."
                         );
                return;
            }
            // Create a temporary file of length 1.
            Path first = CreateTempFile("distributed.first", "x");
            // Create two jars with a single file inside them.
            Path second = MakeJar(new Path(TestRootDir, "distributed.second.jar"), 2);
            Path third  = MakeJar(new Path(TestRootDir, "distributed.third.jar"), 3);
            Path fourth = MakeJar(new Path(TestRootDir, "distributed.fourth.jar"), 4);
            Job  job    = Job.GetInstance(mrCluster.GetConfig());

            // Set the job jar to a new "dummy" jar so we can check that its extracted
            // properly
            job.SetJar(jobJarPath);
            // Because the job jar is a "dummy" jar, we need to include the jar with
            // DistributedCacheChecker or it won't be able to find it
            Path distributedCacheCheckerJar = new Path(JarFinder.GetJar(typeof(TestMRJobs.DistributedCacheChecker
                                                                               )));

            job.AddFileToClassPath(distributedCacheCheckerJar.MakeQualified(localFs.GetUri(),
                                                                            distributedCacheCheckerJar.GetParent()));
            job.SetMapperClass(typeof(TestMRJobs.DistributedCacheChecker));
            job.SetOutputFormatClass(typeof(NullOutputFormat));
            FileInputFormat.SetInputPaths(job, first);
            // Creates the Job Configuration
            job.AddCacheFile(new URI(first.ToUri().ToString() + "#distributed.first.symlink")
                             );
            job.AddFileToClassPath(second);
            // The AppMaster jar itself
            job.AddFileToClassPath(AppJar.MakeQualified(localFs.GetUri(), AppJar.GetParent())
                                   );
            job.AddArchiveToClassPath(third);
            job.AddCacheArchive(fourth.ToUri());
            job.SetMaxMapAttempts(1);
            // speed up failures
            job.Submit();
            string trackingUrl = job.GetTrackingURL();
            string jobId       = job.GetJobID().ToString();

            NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(false));
            NUnit.Framework.Assert.IsTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID "
                                          + jobId, trackingUrl.EndsWith(Sharpen.Runtime.Substring(jobId, jobId.LastIndexOf
                                                                                                      ("_")) + "/"));
        }
示例#4
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            Log.Info("starting");
            Job  job                  = Job.GetInstance(GetConf());
            Path inputDir             = new Path(args[0]);
            Path outputDir            = new Path(args[1]);
            bool useSimplePartitioner = GetUseSimplePartitioner(job);

            TeraInputFormat.SetInputPaths(job, inputDir);
            FileOutputFormat.SetOutputPath(job, outputDir);
            job.SetJobName("TeraSort");
            job.SetJarByClass(typeof(TeraSort));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetInputFormatClass(typeof(TeraInputFormat));
            job.SetOutputFormatClass(typeof(TeraOutputFormat));
            if (useSimplePartitioner)
            {
                job.SetPartitionerClass(typeof(TeraSort.SimplePartitioner));
            }
            else
            {
                long start         = Runtime.CurrentTimeMillis();
                Path partitionFile = new Path(outputDir, TeraInputFormat.PartitionFilename);
                URI  partitionUri  = new URI(partitionFile.ToString() + "#" + TeraInputFormat.PartitionFilename
                                             );
                try
                {
                    TeraInputFormat.WritePartitionFile(job, partitionFile);
                }
                catch (Exception e)
                {
                    Log.Error(e.Message);
                    return(-1);
                }
                job.AddCacheFile(partitionUri);
                long end = Runtime.CurrentTimeMillis();
                System.Console.Out.WriteLine("Spent " + (end - start) + "ms computing partitions."
                                             );
                job.SetPartitionerClass(typeof(TeraSort.TotalOrderPartitioner));
            }
            job.GetConfiguration().SetInt("dfs.replication", GetOutputReplication(job));
            TeraOutputFormat.SetFinalSync(job, true);
            int ret = job.WaitForCompletion(true) ? 0 : 1;

            Log.Info("done");
            return(ret);
        }