/// <exception cref="System.Exception"/> public virtual void TestJobSubmissionSpecsAndFiles() { Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, GetInputDir(), GetOutputDir(), 1, 1); job.SetOutputFormatClass(typeof(TestMRJobClient.BadOutputFormat)); try { job.Submit(); Fail("Should've thrown an exception while checking output specs."); } catch (Exception e) { NUnit.Framework.Assert.IsTrue(e is IOException); } Cluster cluster = new Cluster(conf); Path jobStagingArea = JobSubmissionFiles.GetStagingDir(cluster, job.GetConfiguration ()); Path submitJobDir = new Path(jobStagingArea, "JobId"); Path submitJobFile = JobSubmissionFiles.GetJobConfPath(submitJobDir); NUnit.Framework.Assert.IsFalse("Shouldn't have created a job file if job specs failed." , FileSystem.Get(conf).Exists(submitJobFile)); }
/// <summary>Internal method for submitting jobs to the system.</summary> /// <remarks> /// Internal method for submitting jobs to the system. /// <p>The job submission process involves: /// <ol> /// <li> /// Checking the input and output specifications of the job. /// </li> /// <li> /// Computing the /// <see cref="InputSplit"/> /// s for the job. /// </li> /// <li> /// Setup the requisite accounting information for the /// <see cref="Org.Apache.Hadoop.Mapreduce.Filecache.DistributedCache"/> /// of the job, if necessary. /// </li> /// <li> /// Copying the job's jar and configuration to the map-reduce system /// directory on the distributed file-system. /// </li> /// <li> /// Submitting the job to the <code>JobTracker</code> and optionally /// monitoring it's status. /// </li> /// </ol></p> /// </remarks> /// <param name="job">the configuration to submit</param> /// <param name="cluster">the handle to the Cluster</param> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.IO.IOException"/> internal virtual JobStatus SubmitJobInternal(Job job, Cluster cluster) { //validate the jobs output specs CheckSpecs(job); Configuration conf = job.GetConfiguration(); AddMRFrameworkToDistributedCache(conf); Path jobStagingArea = JobSubmissionFiles.GetStagingDir(cluster, conf); //configure the command line options correctly on the submitting dfs IPAddress ip = Sharpen.Runtime.GetLocalHost(); if (ip != null) { submitHostAddress = ip.GetHostAddress(); submitHostName = ip.GetHostName(); conf.Set(MRJobConfig.JobSubmithost, submitHostName); conf.Set(MRJobConfig.JobSubmithostaddr, submitHostAddress); } JobID jobId = submitClient.GetNewJobID(); job.SetJobID(jobId); Path submitJobDir = new Path(jobStagingArea, jobId.ToString()); JobStatus status = null; try { conf.Set(MRJobConfig.UserName, UserGroupInformation.GetCurrentUser().GetShortUserName ()); conf.Set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer" ); conf.Set(MRJobConfig.MapreduceJobDir, submitJobDir.ToString()); Log.Debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir" ); // get delegation token for the dir TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), new Path[] { submitJobDir }, conf); PopulateTokenCache(conf, job.GetCredentials()); // generate a secret to authenticate shuffle transfers if (TokenCache.GetShuffleSecretKey(job.GetCredentials()) == null) { KeyGenerator keyGen; try { keyGen = KeyGenerator.GetInstance(ShuffleKeygenAlgorithm); keyGen.Init(ShuffleKeyLength); } catch (NoSuchAlgorithmException e) { throw new IOException("Error generating shuffle secret key", e); } SecretKey shuffleKey = keyGen.GenerateKey(); TokenCache.SetShuffleSecretKey(shuffleKey.GetEncoded(), job.GetCredentials()); } if (CryptoUtils.IsEncryptedSpillEnabled(conf)) { conf.SetInt(MRJobConfig.MrAmMaxAttempts, 1); Log.Warn("Max job attempts set to 1 since encrypted intermediate" + "data spill is enabled" ); } CopyAndConfigureFiles(job, submitJobDir); Path submitJobFile = JobSubmissionFiles.GetJobConfPath(submitJobDir); // Create the splits for the job Log.Debug("Creating splits at " + jtFs.MakeQualified(submitJobDir)); int maps = WriteSplits(job, submitJobDir); conf.SetInt(MRJobConfig.NumMaps, maps); Log.Info("number of splits:" + maps); // write "queue admins of the queue to which job is being submitted" // to job file. string queue = conf.Get(MRJobConfig.QueueName, JobConf.DefaultQueueName); AccessControlList acl = submitClient.GetQueueAdmins(queue); conf.Set(QueueManager.ToFullPropertyName(queue, QueueACL.AdministerJobs.GetAclName ()), acl.GetAclString()); // removing jobtoken referrals before copying the jobconf to HDFS // as the tasks don't need this setting, actually they may break // because of it if present as the referral will point to a // different job. TokenCache.CleanUpTokenReferral(conf); if (conf.GetBoolean(MRJobConfig.JobTokenTrackingIdsEnabled, MRJobConfig.DefaultJobTokenTrackingIdsEnabled )) { // Add HDFS tracking ids AList <string> trackingIds = new AList <string>(); foreach (Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> t in job.GetCredentials ().GetAllTokens()) { trackingIds.AddItem(t.DecodeIdentifier().GetTrackingId()); } conf.SetStrings(MRJobConfig.JobTokenTrackingIds, Sharpen.Collections.ToArray(trackingIds , new string[trackingIds.Count])); } // Set reservation info if it exists ReservationId reservationId = job.GetReservationId(); if (reservationId != null) { conf.Set(MRJobConfig.ReservationId, reservationId.ToString()); } // Write job file to submit dir WriteConf(conf, submitJobFile); // // Now, actually submit the job (using the submit name) // PrintTokens(jobId, job.GetCredentials()); status = submitClient.SubmitJob(jobId, submitJobDir.ToString(), job.GetCredentials ()); if (status != null) { return(status); } else { throw new IOException("Could not launch job"); } } finally { if (status == null) { Log.Info("Cleaning up the staging area " + submitJobDir); if (jtFs != null && submitJobDir != null) { jtFs.Delete(submitJobDir, true); } } } }