// copy user specified log4j.property file in local // to HDFS with putting on distributed cache and adding its parent directory // to classpath. /// <exception cref="System.IO.IOException"/> private void CopyLog4jPropertyFile(Job job, Path submitJobDir, short replication) { Configuration conf = job.GetConfiguration(); string file = ValidateFilePath(conf.Get(MRJobConfig.MapreduceJobLog4jPropertiesFile ), conf); Log.Debug("default FileSystem: " + jtFs.GetUri()); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission ); if (!jtFs.Exists(submitJobDir)) { throw new IOException("Cannot find job submission directory! " + "It should just be created, so something wrong here." ); } Path fileDir = JobSubmissionFiles.GetJobLog4jFile(submitJobDir); // first copy local log4j.properties file to HDFS under submitJobDir if (file != null) { FileSystem.Mkdirs(jtFs, fileDir, mapredSysPerms); URI tmpURI = null; try { tmpURI = new URI(file); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(fileDir, tmp, conf, replication); DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf); } }
/// <exception cref="System.Exception"/> public virtual void TestJobSubmissionSpecsAndFiles() { Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, GetInputDir(), GetOutputDir(), 1, 1); job.SetOutputFormatClass(typeof(TestMRJobClient.BadOutputFormat)); try { job.Submit(); Fail("Should've thrown an exception while checking output specs."); } catch (Exception e) { NUnit.Framework.Assert.IsTrue(e is IOException); } Cluster cluster = new Cluster(conf); Path jobStagingArea = JobSubmissionFiles.GetStagingDir(cluster, job.GetConfiguration ()); Path submitJobDir = new Path(jobStagingArea, "JobId"); Path submitJobFile = JobSubmissionFiles.GetJobConfPath(submitJobDir); NUnit.Framework.Assert.IsFalse("Shouldn't have created a job file if job specs failed." , FileSystem.Get(conf).Exists(submitJobFile)); }
/// <summary> /// Upload and configure files, libjars, jobjars, and archives pertaining to /// the passed job. /// </summary> /// <param name="job">the job containing the files to be uploaded</param> /// <param name="submitJobDir">the submission directory of the job</param> /// <exception cref="System.IO.IOException"/> public virtual void UploadFiles(Job job, Path submitJobDir) { Configuration conf = job.GetConfiguration(); short replication = (short)conf.GetInt(Job.SubmitReplication, Job.DefaultSubmitReplication ); if (!(conf.GetBoolean(Job.UsedGenericParser, false))) { Log.Warn("Hadoop command-line option parsing not performed. " + "Implement the Tool interface and execute your application " + "with ToolRunner to remedy this."); } // get all the command line arguments passed in by the user conf string files = conf.Get("tmpfiles"); string libjars = conf.Get("tmpjars"); string archives = conf.Get("tmparchives"); string jobJar = job.GetJar(); // // Figure out what fs the JobTracker is using. Copy the // job to it, under a temporary name. This allows DFS to work, // and under the local fs also provides UNIX-like object loading // semantics. (that is, if the job file is deleted right after // submission, we can still run the submission to completion) // // Create a number of filenames in the JobTracker's fs namespace Log.Debug("default FileSystem: " + jtFs.GetUri()); if (jtFs.Exists(submitJobDir)) { throw new IOException("Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in" + " that directory"); } submitJobDir = jtFs.MakeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.ToUri().GetPath()); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission ); FileSystem.Mkdirs(jtFs, submitJobDir, mapredSysPerms); Path filesDir = JobSubmissionFiles.GetJobDistCacheFiles(submitJobDir); Path archivesDir = JobSubmissionFiles.GetJobDistCacheArchives(submitJobDir); Path libjarsDir = JobSubmissionFiles.GetJobDistCacheLibjars(submitJobDir); // add all the command line files/ jars and archive // first copy them to jobtrackers filesystem if (files != null) { FileSystem.Mkdirs(jtFs, filesDir, mapredSysPerms); string[] fileArr = files.Split(","); foreach (string tmpFile in fileArr) { URI tmpURI = null; try { tmpURI = new URI(tmpFile); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(filesDir, tmp, conf, replication); try { URI pathURI = GetPathURI(newPath, tmpURI.GetFragment()); DistributedCache.AddCacheFile(pathURI, conf); } catch (URISyntaxException ue) { // should not throw a uri exception throw new IOException("Failed to create uri for " + tmpFile, ue); } } } if (libjars != null) { FileSystem.Mkdirs(jtFs, libjarsDir, mapredSysPerms); string[] libjarsArr = libjars.Split(","); foreach (string tmpjars in libjarsArr) { Path tmp = new Path(tmpjars); Path newPath = CopyRemoteFiles(libjarsDir, tmp, conf, replication); DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf, jtFs ); } } if (archives != null) { FileSystem.Mkdirs(jtFs, archivesDir, mapredSysPerms); string[] archivesArr = archives.Split(","); foreach (string tmpArchives in archivesArr) { URI tmpURI; try { tmpURI = new URI(tmpArchives); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(archivesDir, tmp, conf, replication); try { URI pathURI = GetPathURI(newPath, tmpURI.GetFragment()); DistributedCache.AddCacheArchive(pathURI, conf); } catch (URISyntaxException ue) { // should not throw an uri excpetion throw new IOException("Failed to create uri for " + tmpArchives, ue); } } } if (jobJar != null) { // copy jar to JobTracker's fs // use jar name if job is not named. if (string.Empty.Equals(job.GetJobName())) { job.SetJobName(new Path(jobJar).GetName()); } Path jobJarPath = new Path(jobJar); URI jobJarURI = jobJarPath.ToUri(); // If the job jar is already in a global fs, // we don't need to copy it from local fs if (jobJarURI.GetScheme() == null || jobJarURI.GetScheme().Equals("file")) { CopyJar(jobJarPath, JobSubmissionFiles.GetJobJar(submitJobDir), replication); job.SetJar(JobSubmissionFiles.GetJobJar(submitJobDir).ToString()); } } else { Log.Warn("No job jar file set. User classes may not be found. " + "See Job or Job#setJar(String)." ); } AddLog4jToDistributedCache(job, submitJobDir); // set the timestamps of the archives and files // set the public/private visibility of the archives and files ClientDistributedCacheManager.DetermineTimestampsAndCacheVisibilities(conf); // get DelegationToken for cached file ClientDistributedCacheManager.GetDelegationTokens(conf, job.GetCredentials()); }
/// <summary>Internal method for submitting jobs to the system.</summary> /// <remarks> /// Internal method for submitting jobs to the system. /// <p>The job submission process involves: /// <ol> /// <li> /// Checking the input and output specifications of the job. /// </li> /// <li> /// Computing the /// <see cref="InputSplit"/> /// s for the job. /// </li> /// <li> /// Setup the requisite accounting information for the /// <see cref="Org.Apache.Hadoop.Mapreduce.Filecache.DistributedCache"/> /// of the job, if necessary. /// </li> /// <li> /// Copying the job's jar and configuration to the map-reduce system /// directory on the distributed file-system. /// </li> /// <li> /// Submitting the job to the <code>JobTracker</code> and optionally /// monitoring it's status. /// </li> /// </ol></p> /// </remarks> /// <param name="job">the configuration to submit</param> /// <param name="cluster">the handle to the Cluster</param> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.IO.IOException"/> internal virtual JobStatus SubmitJobInternal(Job job, Cluster cluster) { //validate the jobs output specs CheckSpecs(job); Configuration conf = job.GetConfiguration(); AddMRFrameworkToDistributedCache(conf); Path jobStagingArea = JobSubmissionFiles.GetStagingDir(cluster, conf); //configure the command line options correctly on the submitting dfs IPAddress ip = Sharpen.Runtime.GetLocalHost(); if (ip != null) { submitHostAddress = ip.GetHostAddress(); submitHostName = ip.GetHostName(); conf.Set(MRJobConfig.JobSubmithost, submitHostName); conf.Set(MRJobConfig.JobSubmithostaddr, submitHostAddress); } JobID jobId = submitClient.GetNewJobID(); job.SetJobID(jobId); Path submitJobDir = new Path(jobStagingArea, jobId.ToString()); JobStatus status = null; try { conf.Set(MRJobConfig.UserName, UserGroupInformation.GetCurrentUser().GetShortUserName ()); conf.Set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer" ); conf.Set(MRJobConfig.MapreduceJobDir, submitJobDir.ToString()); Log.Debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir" ); // get delegation token for the dir TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), new Path[] { submitJobDir }, conf); PopulateTokenCache(conf, job.GetCredentials()); // generate a secret to authenticate shuffle transfers if (TokenCache.GetShuffleSecretKey(job.GetCredentials()) == null) { KeyGenerator keyGen; try { keyGen = KeyGenerator.GetInstance(ShuffleKeygenAlgorithm); keyGen.Init(ShuffleKeyLength); } catch (NoSuchAlgorithmException e) { throw new IOException("Error generating shuffle secret key", e); } SecretKey shuffleKey = keyGen.GenerateKey(); TokenCache.SetShuffleSecretKey(shuffleKey.GetEncoded(), job.GetCredentials()); } if (CryptoUtils.IsEncryptedSpillEnabled(conf)) { conf.SetInt(MRJobConfig.MrAmMaxAttempts, 1); Log.Warn("Max job attempts set to 1 since encrypted intermediate" + "data spill is enabled" ); } CopyAndConfigureFiles(job, submitJobDir); Path submitJobFile = JobSubmissionFiles.GetJobConfPath(submitJobDir); // Create the splits for the job Log.Debug("Creating splits at " + jtFs.MakeQualified(submitJobDir)); int maps = WriteSplits(job, submitJobDir); conf.SetInt(MRJobConfig.NumMaps, maps); Log.Info("number of splits:" + maps); // write "queue admins of the queue to which job is being submitted" // to job file. string queue = conf.Get(MRJobConfig.QueueName, JobConf.DefaultQueueName); AccessControlList acl = submitClient.GetQueueAdmins(queue); conf.Set(QueueManager.ToFullPropertyName(queue, QueueACL.AdministerJobs.GetAclName ()), acl.GetAclString()); // removing jobtoken referrals before copying the jobconf to HDFS // as the tasks don't need this setting, actually they may break // because of it if present as the referral will point to a // different job. TokenCache.CleanUpTokenReferral(conf); if (conf.GetBoolean(MRJobConfig.JobTokenTrackingIdsEnabled, MRJobConfig.DefaultJobTokenTrackingIdsEnabled )) { // Add HDFS tracking ids AList <string> trackingIds = new AList <string>(); foreach (Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> t in job.GetCredentials ().GetAllTokens()) { trackingIds.AddItem(t.DecodeIdentifier().GetTrackingId()); } conf.SetStrings(MRJobConfig.JobTokenTrackingIds, Sharpen.Collections.ToArray(trackingIds , new string[trackingIds.Count])); } // Set reservation info if it exists ReservationId reservationId = job.GetReservationId(); if (reservationId != null) { conf.Set(MRJobConfig.ReservationId, reservationId.ToString()); } // Write job file to submit dir WriteConf(conf, submitJobFile); // // Now, actually submit the job (using the submit name) // PrintTokens(jobId, job.GetCredentials()); status = submitClient.SubmitJob(jobId, submitJobDir.ToString(), job.GetCredentials ()); if (status != null) { return(status); } else { throw new IOException("Could not launch job"); } } finally { if (status == null) { Log.Info("Cleaning up the staging area " + submitJobDir); if (jtFs != null && submitJobDir != null) { jtFs.Delete(submitJobDir, true); } } } }