// copy user specified log4j.property file in local
        // to HDFS with putting on distributed cache and adding its parent directory
        // to classpath.
        /// <exception cref="System.IO.IOException"/>
        private void CopyLog4jPropertyFile(Job job, Path submitJobDir, short replication)
        {
            Configuration conf = job.GetConfiguration();
            string        file = ValidateFilePath(conf.Get(MRJobConfig.MapreduceJobLog4jPropertiesFile
                                                           ), conf);

            Log.Debug("default FileSystem: " + jtFs.GetUri());
            FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission
                                                           );

            if (!jtFs.Exists(submitJobDir))
            {
                throw new IOException("Cannot find job submission directory! " + "It should just be created, so something wrong here."
                                      );
            }
            Path fileDir = JobSubmissionFiles.GetJobLog4jFile(submitJobDir);

            // first copy local log4j.properties file to HDFS under submitJobDir
            if (file != null)
            {
                FileSystem.Mkdirs(jtFs, fileDir, mapredSysPerms);
                URI tmpURI = null;
                try
                {
                    tmpURI = new URI(file);
                }
                catch (URISyntaxException e)
                {
                    throw new ArgumentException(e);
                }
                Path tmp     = new Path(tmpURI);
                Path newPath = CopyRemoteFiles(fileDir, tmp, conf, replication);
                DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf);
            }
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestJobSubmissionSpecsAndFiles()
        {
            Configuration conf = CreateJobConf();
            Job           job  = MapReduceTestUtil.CreateJob(conf, GetInputDir(), GetOutputDir(), 1, 1);

            job.SetOutputFormatClass(typeof(TestMRJobClient.BadOutputFormat));
            try
            {
                job.Submit();
                Fail("Should've thrown an exception while checking output specs.");
            }
            catch (Exception e)
            {
                NUnit.Framework.Assert.IsTrue(e is IOException);
            }
            Cluster cluster        = new Cluster(conf);
            Path    jobStagingArea = JobSubmissionFiles.GetStagingDir(cluster, job.GetConfiguration
                                                                          ());
            Path submitJobDir  = new Path(jobStagingArea, "JobId");
            Path submitJobFile = JobSubmissionFiles.GetJobConfPath(submitJobDir);

            NUnit.Framework.Assert.IsFalse("Shouldn't have created a job file if job specs failed."
                                           , FileSystem.Get(conf).Exists(submitJobFile));
        }
        /// <summary>
        /// Upload and configure files, libjars, jobjars, and archives pertaining to
        /// the passed job.
        /// </summary>
        /// <param name="job">the job containing the files to be uploaded</param>
        /// <param name="submitJobDir">the submission directory of the job</param>
        /// <exception cref="System.IO.IOException"/>
        public virtual void UploadFiles(Job job, Path submitJobDir)
        {
            Configuration conf        = job.GetConfiguration();
            short         replication = (short)conf.GetInt(Job.SubmitReplication, Job.DefaultSubmitReplication
                                                           );

            if (!(conf.GetBoolean(Job.UsedGenericParser, false)))
            {
                Log.Warn("Hadoop command-line option parsing not performed. " + "Implement the Tool interface and execute your application "
                         + "with ToolRunner to remedy this.");
            }
            // get all the command line arguments passed in by the user conf
            string files    = conf.Get("tmpfiles");
            string libjars  = conf.Get("tmpjars");
            string archives = conf.Get("tmparchives");
            string jobJar   = job.GetJar();

            //
            // Figure out what fs the JobTracker is using. Copy the
            // job to it, under a temporary name. This allows DFS to work,
            // and under the local fs also provides UNIX-like object loading
            // semantics. (that is, if the job file is deleted right after
            // submission, we can still run the submission to completion)
            //
            // Create a number of filenames in the JobTracker's fs namespace
            Log.Debug("default FileSystem: " + jtFs.GetUri());
            if (jtFs.Exists(submitJobDir))
            {
                throw new IOException("Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in"
                                      + " that directory");
            }
            submitJobDir = jtFs.MakeQualified(submitJobDir);
            submitJobDir = new Path(submitJobDir.ToUri().GetPath());
            FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission
                                                           );

            FileSystem.Mkdirs(jtFs, submitJobDir, mapredSysPerms);
            Path filesDir    = JobSubmissionFiles.GetJobDistCacheFiles(submitJobDir);
            Path archivesDir = JobSubmissionFiles.GetJobDistCacheArchives(submitJobDir);
            Path libjarsDir  = JobSubmissionFiles.GetJobDistCacheLibjars(submitJobDir);

            // add all the command line files/ jars and archive
            // first copy them to jobtrackers filesystem
            if (files != null)
            {
                FileSystem.Mkdirs(jtFs, filesDir, mapredSysPerms);
                string[] fileArr = files.Split(",");
                foreach (string tmpFile in fileArr)
                {
                    URI tmpURI = null;
                    try
                    {
                        tmpURI = new URI(tmpFile);
                    }
                    catch (URISyntaxException e)
                    {
                        throw new ArgumentException(e);
                    }
                    Path tmp     = new Path(tmpURI);
                    Path newPath = CopyRemoteFiles(filesDir, tmp, conf, replication);
                    try
                    {
                        URI pathURI = GetPathURI(newPath, tmpURI.GetFragment());
                        DistributedCache.AddCacheFile(pathURI, conf);
                    }
                    catch (URISyntaxException ue)
                    {
                        // should not throw a uri exception
                        throw new IOException("Failed to create uri for " + tmpFile, ue);
                    }
                }
            }
            if (libjars != null)
            {
                FileSystem.Mkdirs(jtFs, libjarsDir, mapredSysPerms);
                string[] libjarsArr = libjars.Split(",");
                foreach (string tmpjars in libjarsArr)
                {
                    Path tmp     = new Path(tmpjars);
                    Path newPath = CopyRemoteFiles(libjarsDir, tmp, conf, replication);
                    DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf, jtFs
                                                        );
                }
            }
            if (archives != null)
            {
                FileSystem.Mkdirs(jtFs, archivesDir, mapredSysPerms);
                string[] archivesArr = archives.Split(",");
                foreach (string tmpArchives in archivesArr)
                {
                    URI tmpURI;
                    try
                    {
                        tmpURI = new URI(tmpArchives);
                    }
                    catch (URISyntaxException e)
                    {
                        throw new ArgumentException(e);
                    }
                    Path tmp     = new Path(tmpURI);
                    Path newPath = CopyRemoteFiles(archivesDir, tmp, conf, replication);
                    try
                    {
                        URI pathURI = GetPathURI(newPath, tmpURI.GetFragment());
                        DistributedCache.AddCacheArchive(pathURI, conf);
                    }
                    catch (URISyntaxException ue)
                    {
                        // should not throw an uri excpetion
                        throw new IOException("Failed to create uri for " + tmpArchives, ue);
                    }
                }
            }
            if (jobJar != null)
            {
                // copy jar to JobTracker's fs
                // use jar name if job is not named.
                if (string.Empty.Equals(job.GetJobName()))
                {
                    job.SetJobName(new Path(jobJar).GetName());
                }
                Path jobJarPath = new Path(jobJar);
                URI  jobJarURI  = jobJarPath.ToUri();
                // If the job jar is already in a global fs,
                // we don't need to copy it from local fs
                if (jobJarURI.GetScheme() == null || jobJarURI.GetScheme().Equals("file"))
                {
                    CopyJar(jobJarPath, JobSubmissionFiles.GetJobJar(submitJobDir), replication);
                    job.SetJar(JobSubmissionFiles.GetJobJar(submitJobDir).ToString());
                }
            }
            else
            {
                Log.Warn("No job jar file set.  User classes may not be found. " + "See Job or Job#setJar(String)."
                         );
            }
            AddLog4jToDistributedCache(job, submitJobDir);
            // set the timestamps of the archives and files
            // set the public/private visibility of the archives and files
            ClientDistributedCacheManager.DetermineTimestampsAndCacheVisibilities(conf);
            // get DelegationToken for cached file
            ClientDistributedCacheManager.GetDelegationTokens(conf, job.GetCredentials());
        }
Exemple #4
0
        /// <summary>Internal method for submitting jobs to the system.</summary>
        /// <remarks>
        /// Internal method for submitting jobs to the system.
        /// <p>The job submission process involves:
        /// <ol>
        /// <li>
        /// Checking the input and output specifications of the job.
        /// </li>
        /// <li>
        /// Computing the
        /// <see cref="InputSplit"/>
        /// s for the job.
        /// </li>
        /// <li>
        /// Setup the requisite accounting information for the
        /// <see cref="Org.Apache.Hadoop.Mapreduce.Filecache.DistributedCache"/>
        /// of the job, if necessary.
        /// </li>
        /// <li>
        /// Copying the job's jar and configuration to the map-reduce system
        /// directory on the distributed file-system.
        /// </li>
        /// <li>
        /// Submitting the job to the <code>JobTracker</code> and optionally
        /// monitoring it's status.
        /// </li>
        /// </ol></p>
        /// </remarks>
        /// <param name="job">the configuration to submit</param>
        /// <param name="cluster">the handle to the Cluster</param>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.IO.IOException"/>
        internal virtual JobStatus SubmitJobInternal(Job job, Cluster cluster)
        {
            //validate the jobs output specs
            CheckSpecs(job);
            Configuration conf = job.GetConfiguration();

            AddMRFrameworkToDistributedCache(conf);
            Path jobStagingArea = JobSubmissionFiles.GetStagingDir(cluster, conf);
            //configure the command line options correctly on the submitting dfs
            IPAddress ip = Sharpen.Runtime.GetLocalHost();

            if (ip != null)
            {
                submitHostAddress = ip.GetHostAddress();
                submitHostName    = ip.GetHostName();
                conf.Set(MRJobConfig.JobSubmithost, submitHostName);
                conf.Set(MRJobConfig.JobSubmithostaddr, submitHostAddress);
            }
            JobID jobId = submitClient.GetNewJobID();

            job.SetJobID(jobId);
            Path      submitJobDir = new Path(jobStagingArea, jobId.ToString());
            JobStatus status       = null;

            try
            {
                conf.Set(MRJobConfig.UserName, UserGroupInformation.GetCurrentUser().GetShortUserName
                             ());
                conf.Set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer"
                         );
                conf.Set(MRJobConfig.MapreduceJobDir, submitJobDir.ToString());
                Log.Debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir"
                          );
                // get delegation token for the dir
                TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), new Path[] { submitJobDir }, conf);
                PopulateTokenCache(conf, job.GetCredentials());
                // generate a secret to authenticate shuffle transfers
                if (TokenCache.GetShuffleSecretKey(job.GetCredentials()) == null)
                {
                    KeyGenerator keyGen;
                    try
                    {
                        keyGen = KeyGenerator.GetInstance(ShuffleKeygenAlgorithm);
                        keyGen.Init(ShuffleKeyLength);
                    }
                    catch (NoSuchAlgorithmException e)
                    {
                        throw new IOException("Error generating shuffle secret key", e);
                    }
                    SecretKey shuffleKey = keyGen.GenerateKey();
                    TokenCache.SetShuffleSecretKey(shuffleKey.GetEncoded(), job.GetCredentials());
                }
                if (CryptoUtils.IsEncryptedSpillEnabled(conf))
                {
                    conf.SetInt(MRJobConfig.MrAmMaxAttempts, 1);
                    Log.Warn("Max job attempts set to 1 since encrypted intermediate" + "data spill is enabled"
                             );
                }
                CopyAndConfigureFiles(job, submitJobDir);
                Path submitJobFile = JobSubmissionFiles.GetJobConfPath(submitJobDir);
                // Create the splits for the job
                Log.Debug("Creating splits at " + jtFs.MakeQualified(submitJobDir));
                int maps = WriteSplits(job, submitJobDir);
                conf.SetInt(MRJobConfig.NumMaps, maps);
                Log.Info("number of splits:" + maps);
                // write "queue admins of the queue to which job is being submitted"
                // to job file.
                string            queue = conf.Get(MRJobConfig.QueueName, JobConf.DefaultQueueName);
                AccessControlList acl   = submitClient.GetQueueAdmins(queue);
                conf.Set(QueueManager.ToFullPropertyName(queue, QueueACL.AdministerJobs.GetAclName
                                                             ()), acl.GetAclString());
                // removing jobtoken referrals before copying the jobconf to HDFS
                // as the tasks don't need this setting, actually they may break
                // because of it if present as the referral will point to a
                // different job.
                TokenCache.CleanUpTokenReferral(conf);
                if (conf.GetBoolean(MRJobConfig.JobTokenTrackingIdsEnabled, MRJobConfig.DefaultJobTokenTrackingIdsEnabled
                                    ))
                {
                    // Add HDFS tracking ids
                    AList <string> trackingIds = new AList <string>();
                    foreach (Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> t in job.GetCredentials
                                 ().GetAllTokens())
                    {
                        trackingIds.AddItem(t.DecodeIdentifier().GetTrackingId());
                    }
                    conf.SetStrings(MRJobConfig.JobTokenTrackingIds, Sharpen.Collections.ToArray(trackingIds
                                                                                                 , new string[trackingIds.Count]));
                }
                // Set reservation info if it exists
                ReservationId reservationId = job.GetReservationId();
                if (reservationId != null)
                {
                    conf.Set(MRJobConfig.ReservationId, reservationId.ToString());
                }
                // Write job file to submit dir
                WriteConf(conf, submitJobFile);
                //
                // Now, actually submit the job (using the submit name)
                //
                PrintTokens(jobId, job.GetCredentials());
                status = submitClient.SubmitJob(jobId, submitJobDir.ToString(), job.GetCredentials
                                                    ());
                if (status != null)
                {
                    return(status);
                }
                else
                {
                    throw new IOException("Could not launch job");
                }
            }
            finally
            {
                if (status == null)
                {
                    Log.Info("Cleaning up the staging area " + submitJobDir);
                    if (jtFs != null && submitJobDir != null)
                    {
                        jtFs.Delete(submitJobDir, true);
                    }
                }
            }
        }