public virtual void TestCombinerShouldUpdateTheReporter() { JobConf conf = new JobConf(mrCluster.GetConfig()); int numMaps = 5; int numReds = 2; Path @in = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-in" ); Path @out = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-out" ); CreateInputOutPutFolder(@in, @out, numMaps); conf.SetJobName("test-job-with-combiner"); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetCombinerClass(typeof(TestMRAppWithCombiner.MyCombinerToCheckReporter)); //conf.setJarByClass(MyCombinerToCheckReporter.class); conf.SetReducerClass(typeof(IdentityReducer)); DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf); conf.SetOutputCommitter(typeof(CustomOutputCommitter)); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); FileInputFormat.SetInputPaths(conf, @in); FileOutputFormat.SetOutputPath(conf, @out); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReds); RunJob(conf); }
// copy user specified log4j.property file in local // to HDFS with putting on distributed cache and adding its parent directory // to classpath. /// <exception cref="System.IO.IOException"/> private void CopyLog4jPropertyFile(Job job, Path submitJobDir, short replication) { Configuration conf = job.GetConfiguration(); string file = ValidateFilePath(conf.Get(MRJobConfig.MapreduceJobLog4jPropertiesFile ), conf); Log.Debug("default FileSystem: " + jtFs.GetUri()); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission ); if (!jtFs.Exists(submitJobDir)) { throw new IOException("Cannot find job submission directory! " + "It should just be created, so something wrong here." ); } Path fileDir = JobSubmissionFiles.GetJobLog4jFile(submitJobDir); // first copy local log4j.properties file to HDFS under submitJobDir if (file != null) { FileSystem.Mkdirs(jtFs, fileDir, mapredSysPerms); URI tmpURI = null; try { tmpURI = new URI(file); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(fileDir, tmp, conf, replication); DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal static bool RunJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds) { FileSystem fs = FileSystem.Get(conf); if (fs.Exists(outDir)) { fs.Delete(outDir, true); } if (!fs.Exists(inDir)) { fs.Mkdirs(inDir); } string input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n"; for (int i = 0; i < numMaps; ++i) { DataOutputStream file = fs.Create(new Path(inDir, "part-" + i)); file.WriteBytes(input); file.Close(); } DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf, fs); conf.SetOutputCommitter(typeof(CustomOutputCommitter)); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReds); JobClient jobClient = new JobClient(conf); RunningJob job = jobClient.SubmitJob(conf); return(jobClient.MonitorAndPrintJob(conf, job)); }
/// <summary> /// Upload and configure files, libjars, jobjars, and archives pertaining to /// the passed job. /// </summary> /// <param name="job">the job containing the files to be uploaded</param> /// <param name="submitJobDir">the submission directory of the job</param> /// <exception cref="System.IO.IOException"/> public virtual void UploadFiles(Job job, Path submitJobDir) { Configuration conf = job.GetConfiguration(); short replication = (short)conf.GetInt(Job.SubmitReplication, Job.DefaultSubmitReplication ); if (!(conf.GetBoolean(Job.UsedGenericParser, false))) { Log.Warn("Hadoop command-line option parsing not performed. " + "Implement the Tool interface and execute your application " + "with ToolRunner to remedy this."); } // get all the command line arguments passed in by the user conf string files = conf.Get("tmpfiles"); string libjars = conf.Get("tmpjars"); string archives = conf.Get("tmparchives"); string jobJar = job.GetJar(); // // Figure out what fs the JobTracker is using. Copy the // job to it, under a temporary name. This allows DFS to work, // and under the local fs also provides UNIX-like object loading // semantics. (that is, if the job file is deleted right after // submission, we can still run the submission to completion) // // Create a number of filenames in the JobTracker's fs namespace Log.Debug("default FileSystem: " + jtFs.GetUri()); if (jtFs.Exists(submitJobDir)) { throw new IOException("Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in" + " that directory"); } submitJobDir = jtFs.MakeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.ToUri().GetPath()); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission ); FileSystem.Mkdirs(jtFs, submitJobDir, mapredSysPerms); Path filesDir = JobSubmissionFiles.GetJobDistCacheFiles(submitJobDir); Path archivesDir = JobSubmissionFiles.GetJobDistCacheArchives(submitJobDir); Path libjarsDir = JobSubmissionFiles.GetJobDistCacheLibjars(submitJobDir); // add all the command line files/ jars and archive // first copy them to jobtrackers filesystem if (files != null) { FileSystem.Mkdirs(jtFs, filesDir, mapredSysPerms); string[] fileArr = files.Split(","); foreach (string tmpFile in fileArr) { URI tmpURI = null; try { tmpURI = new URI(tmpFile); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(filesDir, tmp, conf, replication); try { URI pathURI = GetPathURI(newPath, tmpURI.GetFragment()); DistributedCache.AddCacheFile(pathURI, conf); } catch (URISyntaxException ue) { // should not throw a uri exception throw new IOException("Failed to create uri for " + tmpFile, ue); } } } if (libjars != null) { FileSystem.Mkdirs(jtFs, libjarsDir, mapredSysPerms); string[] libjarsArr = libjars.Split(","); foreach (string tmpjars in libjarsArr) { Path tmp = new Path(tmpjars); Path newPath = CopyRemoteFiles(libjarsDir, tmp, conf, replication); DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf, jtFs ); } } if (archives != null) { FileSystem.Mkdirs(jtFs, archivesDir, mapredSysPerms); string[] archivesArr = archives.Split(","); foreach (string tmpArchives in archivesArr) { URI tmpURI; try { tmpURI = new URI(tmpArchives); } catch (URISyntaxException e) { throw new ArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = CopyRemoteFiles(archivesDir, tmp, conf, replication); try { URI pathURI = GetPathURI(newPath, tmpURI.GetFragment()); DistributedCache.AddCacheArchive(pathURI, conf); } catch (URISyntaxException ue) { // should not throw an uri excpetion throw new IOException("Failed to create uri for " + tmpArchives, ue); } } } if (jobJar != null) { // copy jar to JobTracker's fs // use jar name if job is not named. if (string.Empty.Equals(job.GetJobName())) { job.SetJobName(new Path(jobJar).GetName()); } Path jobJarPath = new Path(jobJar); URI jobJarURI = jobJarPath.ToUri(); // If the job jar is already in a global fs, // we don't need to copy it from local fs if (jobJarURI.GetScheme() == null || jobJarURI.GetScheme().Equals("file")) { CopyJar(jobJarPath, JobSubmissionFiles.GetJobJar(submitJobDir), replication); job.SetJar(JobSubmissionFiles.GetJobJar(submitJobDir).ToString()); } } else { Log.Warn("No job jar file set. User classes may not be found. " + "See Job or Job#setJar(String)." ); } AddLog4jToDistributedCache(job, submitJobDir); // set the timestamps of the archives and files // set the public/private visibility of the archives and files ClientDistributedCacheManager.DetermineTimestampsAndCacheVisibilities(conf); // get DelegationToken for cached file ClientDistributedCacheManager.GetDelegationTokens(conf, job.GetCredentials()); }