Beispiel #1
0
        /// <exception cref="System.Exception"/>
        public virtual void TestSetupDistributedCache()
        {
            Configuration conf = new Configuration();

            conf.SetClass("fs.mockfs.impl", typeof(TestMRApps.MockFileSystem), typeof(FileSystem
                                                                                      ));
            URI        mockUri = URI.Create("mockfs://mock/");
            FileSystem mockFs  = ((FilterFileSystem)FileSystem.Get(mockUri, conf)).GetRawFileSystem
                                     ();
            URI  archive     = new URI("mockfs://mock/tmp/something.zip");
            Path archivePath = new Path(archive);
            URI  file        = new URI("mockfs://mock/tmp/something.txt#something");
            Path filePath    = new Path(file);

            Org.Mockito.Mockito.When(mockFs.ResolvePath(archivePath)).ThenReturn(archivePath);
            Org.Mockito.Mockito.When(mockFs.ResolvePath(filePath)).ThenReturn(filePath);
            DistributedCache.AddCacheArchive(archive, conf);
            conf.Set(MRJobConfig.CacheArchivesTimestamps, "10");
            conf.Set(MRJobConfig.CacheArchivesSizes, "10");
            conf.Set(MRJobConfig.CacheArchivesVisibilities, "true");
            DistributedCache.AddCacheFile(file, conf);
            conf.Set(MRJobConfig.CacheFileTimestamps, "11");
            conf.Set(MRJobConfig.CacheFilesSizes, "11");
            conf.Set(MRJobConfig.CacheFileVisibilities, "true");
            IDictionary <string, LocalResource> localResources = new Dictionary <string, LocalResource
                                                                                 >();

            MRApps.SetupDistributedCache(conf, localResources);
            NUnit.Framework.Assert.AreEqual(2, localResources.Count);
            LocalResource lr = localResources["something.zip"];

            NUnit.Framework.Assert.IsNotNull(lr);
            NUnit.Framework.Assert.AreEqual(10l, lr.GetSize());
            NUnit.Framework.Assert.AreEqual(10l, lr.GetTimestamp());
            NUnit.Framework.Assert.AreEqual(LocalResourceType.Archive, lr.GetType());
            lr = localResources["something"];
            NUnit.Framework.Assert.IsNotNull(lr);
            NUnit.Framework.Assert.AreEqual(11l, lr.GetSize());
            NUnit.Framework.Assert.AreEqual(11l, lr.GetTimestamp());
            NUnit.Framework.Assert.AreEqual(LocalResourceType.File, lr.GetType());
        }
Beispiel #2
0
        /// <exception cref="System.IO.IOException"/>
        private static void AddMRFrameworkToDistributedCache(Configuration conf)
        {
            string framework = conf.Get(MRJobConfig.MapreduceApplicationFrameworkPath, string.Empty
                                        );

            if (!framework.IsEmpty())
            {
                URI uri;
                try
                {
                    uri = new URI(framework);
                }
                catch (URISyntaxException e)
                {
                    throw new ArgumentException("Unable to parse '" + framework + "' as a URI, check the setting for "
                                                + MRJobConfig.MapreduceApplicationFrameworkPath, e);
                }
                string linkedName = uri.GetFragment();
                // resolve any symlinks in the URI path so using a "current" symlink
                // to point to a specific version shows the specific version
                // in the distributed cache configuration
                FileSystem fs            = FileSystem.Get(conf);
                Path       frameworkPath = fs.MakeQualified(new Path(uri.GetScheme(), uri.GetAuthority(
                                                                         ), uri.GetPath()));
                FileContext fc = FileContext.GetFileContext(frameworkPath.ToUri(), conf);
                frameworkPath = fc.ResolvePath(frameworkPath);
                uri           = frameworkPath.ToUri();
                try
                {
                    uri = new URI(uri.GetScheme(), uri.GetAuthority(), uri.GetPath(), null, linkedName
                                  );
                }
                catch (URISyntaxException e)
                {
                    throw new ArgumentException(e);
                }
                DistributedCache.AddCacheArchive(uri, conf);
            }
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestDeprecatedFunctions()
        {
            DistributedCache.AddLocalArchives(conf, "Test Local Archives 1");
            NUnit.Framework.Assert.AreEqual("Test Local Archives 1", conf.Get(DistributedCache
                                                                              .CacheLocalarchives));
            NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheArchives(conf).Length
                                            );
            NUnit.Framework.Assert.AreEqual("Test Local Archives 1", DistributedCache.GetLocalCacheArchives
                                                (conf)[0].GetName());
            DistributedCache.AddLocalArchives(conf, "Test Local Archives 2");
            NUnit.Framework.Assert.AreEqual("Test Local Archives 1,Test Local Archives 2", conf
                                            .Get(DistributedCache.CacheLocalarchives));
            NUnit.Framework.Assert.AreEqual(2, DistributedCache.GetLocalCacheArchives(conf).Length
                                            );
            NUnit.Framework.Assert.AreEqual("Test Local Archives 2", DistributedCache.GetLocalCacheArchives
                                                (conf)[1].GetName());
            DistributedCache.SetLocalArchives(conf, "Test Local Archives 3");
            NUnit.Framework.Assert.AreEqual("Test Local Archives 3", conf.Get(DistributedCache
                                                                              .CacheLocalarchives));
            NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheArchives(conf).Length
                                            );
            NUnit.Framework.Assert.AreEqual("Test Local Archives 3", DistributedCache.GetLocalCacheArchives
                                                (conf)[0].GetName());
            DistributedCache.AddLocalFiles(conf, "Test Local Files 1");
            NUnit.Framework.Assert.AreEqual("Test Local Files 1", conf.Get(DistributedCache.CacheLocalfiles
                                                                           ));
            NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheFiles(conf).Length
                                            );
            NUnit.Framework.Assert.AreEqual("Test Local Files 1", DistributedCache.GetLocalCacheFiles
                                                (conf)[0].GetName());
            DistributedCache.AddLocalFiles(conf, "Test Local Files 2");
            NUnit.Framework.Assert.AreEqual("Test Local Files 1,Test Local Files 2", conf.Get
                                                (DistributedCache.CacheLocalfiles));
            NUnit.Framework.Assert.AreEqual(2, DistributedCache.GetLocalCacheFiles(conf).Length
                                            );
            NUnit.Framework.Assert.AreEqual("Test Local Files 2", DistributedCache.GetLocalCacheFiles
                                                (conf)[1].GetName());
            DistributedCache.SetLocalFiles(conf, "Test Local Files 3");
            NUnit.Framework.Assert.AreEqual("Test Local Files 3", conf.Get(DistributedCache.CacheLocalfiles
                                                                           ));
            NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetLocalCacheFiles(conf).Length
                                            );
            NUnit.Framework.Assert.AreEqual("Test Local Files 3", DistributedCache.GetLocalCacheFiles
                                                (conf)[0].GetName());
            DistributedCache.SetArchiveTimestamps(conf, "1234567890");
            NUnit.Framework.Assert.AreEqual(1234567890, conf.GetLong(DistributedCache.CacheArchivesTimestamps
                                                                     , 0));
            NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetArchiveTimestamps(conf).Length
                                            );
            NUnit.Framework.Assert.AreEqual(1234567890, DistributedCache.GetArchiveTimestamps
                                                (conf)[0]);
            DistributedCache.SetFileTimestamps(conf, "1234567890");
            NUnit.Framework.Assert.AreEqual(1234567890, conf.GetLong(DistributedCache.CacheFilesTimestamps
                                                                     , 0));
            NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetFileTimestamps(conf).Length
                                            );
            NUnit.Framework.Assert.AreEqual(1234567890, DistributedCache.GetFileTimestamps(conf
                                                                                           )[0]);
            DistributedCache.CreateAllSymlink(conf, new FilePath("Test Job Cache Dir"), new FilePath
                                                  ("Test Work Dir"));
            NUnit.Framework.Assert.IsNull(conf.Get(DistributedCache.CacheSymlink));
            NUnit.Framework.Assert.IsTrue(DistributedCache.GetSymlink(conf));
            NUnit.Framework.Assert.IsTrue(symlinkFile.CreateNewFile());
            FileStatus fileStatus = DistributedCache.GetFileStatus(conf, symlinkFile.ToURI());

            NUnit.Framework.Assert.IsNotNull(fileStatus);
            NUnit.Framework.Assert.AreEqual(fileStatus.GetModificationTime(), DistributedCache
                                            .GetTimestamp(conf, symlinkFile.ToURI()));
            NUnit.Framework.Assert.IsTrue(symlinkFile.Delete());
            DistributedCache.AddCacheArchive(symlinkFile.ToURI(), conf);
            NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI().ToString(), conf.Get(DistributedCache
                                                                                     .CacheArchives));
            NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetCacheArchives(conf).Length
                                            );
            NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI(), DistributedCache.GetCacheArchives
                                                (conf)[0]);
            DistributedCache.AddCacheFile(symlinkFile.ToURI(), conf);
            NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI().ToString(), conf.Get(DistributedCache
                                                                                     .CacheFiles));
            NUnit.Framework.Assert.AreEqual(1, DistributedCache.GetCacheFiles(conf).Length);
            NUnit.Framework.Assert.AreEqual(symlinkFile.ToURI(), DistributedCache.GetCacheFiles
                                                (conf)[0]);
        }
        /// <summary>
        /// Upload and configure files, libjars, jobjars, and archives pertaining to
        /// the passed job.
        /// </summary>
        /// <param name="job">the job containing the files to be uploaded</param>
        /// <param name="submitJobDir">the submission directory of the job</param>
        /// <exception cref="System.IO.IOException"/>
        public virtual void UploadFiles(Job job, Path submitJobDir)
        {
            Configuration conf        = job.GetConfiguration();
            short         replication = (short)conf.GetInt(Job.SubmitReplication, Job.DefaultSubmitReplication
                                                           );

            if (!(conf.GetBoolean(Job.UsedGenericParser, false)))
            {
                Log.Warn("Hadoop command-line option parsing not performed. " + "Implement the Tool interface and execute your application "
                         + "with ToolRunner to remedy this.");
            }
            // get all the command line arguments passed in by the user conf
            string files    = conf.Get("tmpfiles");
            string libjars  = conf.Get("tmpjars");
            string archives = conf.Get("tmparchives");
            string jobJar   = job.GetJar();

            //
            // Figure out what fs the JobTracker is using. Copy the
            // job to it, under a temporary name. This allows DFS to work,
            // and under the local fs also provides UNIX-like object loading
            // semantics. (that is, if the job file is deleted right after
            // submission, we can still run the submission to completion)
            //
            // Create a number of filenames in the JobTracker's fs namespace
            Log.Debug("default FileSystem: " + jtFs.GetUri());
            if (jtFs.Exists(submitJobDir))
            {
                throw new IOException("Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in"
                                      + " that directory");
            }
            submitJobDir = jtFs.MakeQualified(submitJobDir);
            submitJobDir = new Path(submitJobDir.ToUri().GetPath());
            FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission
                                                           );

            FileSystem.Mkdirs(jtFs, submitJobDir, mapredSysPerms);
            Path filesDir    = JobSubmissionFiles.GetJobDistCacheFiles(submitJobDir);
            Path archivesDir = JobSubmissionFiles.GetJobDistCacheArchives(submitJobDir);
            Path libjarsDir  = JobSubmissionFiles.GetJobDistCacheLibjars(submitJobDir);

            // add all the command line files/ jars and archive
            // first copy them to jobtrackers filesystem
            if (files != null)
            {
                FileSystem.Mkdirs(jtFs, filesDir, mapredSysPerms);
                string[] fileArr = files.Split(",");
                foreach (string tmpFile in fileArr)
                {
                    URI tmpURI = null;
                    try
                    {
                        tmpURI = new URI(tmpFile);
                    }
                    catch (URISyntaxException e)
                    {
                        throw new ArgumentException(e);
                    }
                    Path tmp     = new Path(tmpURI);
                    Path newPath = CopyRemoteFiles(filesDir, tmp, conf, replication);
                    try
                    {
                        URI pathURI = GetPathURI(newPath, tmpURI.GetFragment());
                        DistributedCache.AddCacheFile(pathURI, conf);
                    }
                    catch (URISyntaxException ue)
                    {
                        // should not throw a uri exception
                        throw new IOException("Failed to create uri for " + tmpFile, ue);
                    }
                }
            }
            if (libjars != null)
            {
                FileSystem.Mkdirs(jtFs, libjarsDir, mapredSysPerms);
                string[] libjarsArr = libjars.Split(",");
                foreach (string tmpjars in libjarsArr)
                {
                    Path tmp     = new Path(tmpjars);
                    Path newPath = CopyRemoteFiles(libjarsDir, tmp, conf, replication);
                    DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf, jtFs
                                                        );
                }
            }
            if (archives != null)
            {
                FileSystem.Mkdirs(jtFs, archivesDir, mapredSysPerms);
                string[] archivesArr = archives.Split(",");
                foreach (string tmpArchives in archivesArr)
                {
                    URI tmpURI;
                    try
                    {
                        tmpURI = new URI(tmpArchives);
                    }
                    catch (URISyntaxException e)
                    {
                        throw new ArgumentException(e);
                    }
                    Path tmp     = new Path(tmpURI);
                    Path newPath = CopyRemoteFiles(archivesDir, tmp, conf, replication);
                    try
                    {
                        URI pathURI = GetPathURI(newPath, tmpURI.GetFragment());
                        DistributedCache.AddCacheArchive(pathURI, conf);
                    }
                    catch (URISyntaxException ue)
                    {
                        // should not throw an uri excpetion
                        throw new IOException("Failed to create uri for " + tmpArchives, ue);
                    }
                }
            }
            if (jobJar != null)
            {
                // copy jar to JobTracker's fs
                // use jar name if job is not named.
                if (string.Empty.Equals(job.GetJobName()))
                {
                    job.SetJobName(new Path(jobJar).GetName());
                }
                Path jobJarPath = new Path(jobJar);
                URI  jobJarURI  = jobJarPath.ToUri();
                // If the job jar is already in a global fs,
                // we don't need to copy it from local fs
                if (jobJarURI.GetScheme() == null || jobJarURI.GetScheme().Equals("file"))
                {
                    CopyJar(jobJarPath, JobSubmissionFiles.GetJobJar(submitJobDir), replication);
                    job.SetJar(JobSubmissionFiles.GetJobJar(submitJobDir).ToString());
                }
            }
            else
            {
                Log.Warn("No job jar file set.  User classes may not be found. " + "See Job or Job#setJar(String)."
                         );
            }
            AddLog4jToDistributedCache(job, submitJobDir);
            // set the timestamps of the archives and files
            // set the public/private visibility of the archives and files
            ClientDistributedCacheManager.DetermineTimestampsAndCacheVisibilities(conf);
            // get DelegationToken for cached file
            ClientDistributedCacheManager.GetDelegationTokens(conf, job.GetCredentials());
        }
Beispiel #5
0
        /// <exception cref="System.IO.IOException"/>
        public static MRCaching.TestResult LaunchMRCache(string indir, string outdir, string
                                                         cacheDir, JobConf conf, string input)
        {
            string TestRootDir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).ToString
                                     ().Replace(' ', '+');

            //if (TEST_ROOT_DIR.startsWith("C:")) TEST_ROOT_DIR = "/tmp";
            conf.Set("test.build.data", TestRootDir);
            Path       inDir  = new Path(indir);
            Path       outDir = new Path(outdir);
            FileSystem fs     = FileSystem.Get(conf);

            fs.Delete(outDir, true);
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            {
                System.Console.Out.WriteLine("HERE:" + inDir);
                DataOutputStream file = fs.Create(new Path(inDir, "part-0"));
                file.WriteBytes(input);
                file.Close();
            }
            conf.SetJobName("cachetest");
            // the keys are words (strings)
            conf.SetOutputKeyClass(typeof(Text));
            // the values are counts (ints)
            conf.SetOutputValueClass(typeof(IntWritable));
            conf.SetCombinerClass(typeof(MRCaching.ReduceClass));
            conf.SetReducerClass(typeof(MRCaching.ReduceClass));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(1);
            conf.SetNumReduceTasks(1);
            conf.SetSpeculativeExecution(false);
            URI[] uris = new URI[6];
            conf.SetMapperClass(typeof(MRCaching.MapClass2));
            uris[0] = fs.GetUri().Resolve(cacheDir + "/test.txt");
            uris[1] = fs.GetUri().Resolve(cacheDir + "/test.jar");
            uris[2] = fs.GetUri().Resolve(cacheDir + "/test.zip");
            uris[3] = fs.GetUri().Resolve(cacheDir + "/test.tgz");
            uris[4] = fs.GetUri().Resolve(cacheDir + "/test.tar.gz");
            uris[5] = fs.GetUri().Resolve(cacheDir + "/test.tar");
            DistributedCache.AddCacheFile(uris[0], conf);
            // Save expected file sizes
            long[] fileSizes = new long[1];
            fileSizes[0] = fs.GetFileStatus(new Path(uris[0].GetPath())).GetLen();
            long[] archiveSizes = new long[5];
            // track last 5
            for (int i = 1; i < 6; i++)
            {
                DistributedCache.AddCacheArchive(uris[i], conf);
                archiveSizes[i - 1] = fs.GetFileStatus(new Path(uris[i].GetPath())).GetLen();
            }
            // starting with second archive
            RunningJob job   = JobClient.RunJob(conf);
            int        count = 0;
            // after the job ran check to see if the input from the localized cache
            // match the real string. check if there are 3 instances or not.
            Path result = new Path(TestRootDir + "/test.txt");

            {
                BufferedReader file = new BufferedReader(new InputStreamReader(FileSystem.GetLocal
                                                                                   (conf).Open(result)));
                string line = file.ReadLine();
                while (line != null)
                {
                    if (!testStr.Equals(line))
                    {
                        return(new MRCaching.TestResult(job, false));
                    }
                    count++;
                    line = file.ReadLine();
                }
                file.Close();
            }
            if (count != 6)
            {
                return(new MRCaching.TestResult(job, false));
            }
            // Check to ensure the filesizes of files in DC were correctly saved.
            // Note, the underlying job clones the original conf before determine
            // various stats (timestamps etc.), so we have to getConfiguration here.
            ValidateCacheFileSizes(job.GetConfiguration(), fileSizes, MRJobConfig.CacheFilesSizes
                                   );
            ValidateCacheFileSizes(job.GetConfiguration(), archiveSizes, MRJobConfig.CacheArchivesSizes
                                   );
            return(new MRCaching.TestResult(job, true));
        }