Example #1
0
        public virtual void TestCombinerShouldUpdateTheReporter()
        {
            JobConf conf    = new JobConf(mrCluster.GetConfig());
            int     numMaps = 5;
            int     numReds = 2;
            Path    @in     = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-in"
                                       );
            Path @out = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-out"
                                 );

            CreateInputOutPutFolder(@in, @out, numMaps);
            conf.SetJobName("test-job-with-combiner");
            conf.SetMapperClass(typeof(IdentityMapper));
            conf.SetCombinerClass(typeof(TestMRAppWithCombiner.MyCombinerToCheckReporter));
            //conf.setJarByClass(MyCombinerToCheckReporter.class);
            conf.SetReducerClass(typeof(IdentityReducer));
            DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf);
            conf.SetOutputCommitter(typeof(CustomOutputCommitter));
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            FileInputFormat.SetInputPaths(conf, @in);
            FileOutputFormat.SetOutputPath(conf, @out);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReds);
            RunJob(conf);
        }
        // copy user specified log4j.property file in local
        // to HDFS with putting on distributed cache and adding its parent directory
        // to classpath.
        /// <exception cref="System.IO.IOException"/>
        private void CopyLog4jPropertyFile(Job job, Path submitJobDir, short replication)
        {
            Configuration conf = job.GetConfiguration();
            string        file = ValidateFilePath(conf.Get(MRJobConfig.MapreduceJobLog4jPropertiesFile
                                                           ), conf);

            Log.Debug("default FileSystem: " + jtFs.GetUri());
            FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission
                                                           );

            if (!jtFs.Exists(submitJobDir))
            {
                throw new IOException("Cannot find job submission directory! " + "It should just be created, so something wrong here."
                                      );
            }
            Path fileDir = JobSubmissionFiles.GetJobLog4jFile(submitJobDir);

            // first copy local log4j.properties file to HDFS under submitJobDir
            if (file != null)
            {
                FileSystem.Mkdirs(jtFs, fileDir, mapredSysPerms);
                URI tmpURI = null;
                try
                {
                    tmpURI = new URI(file);
                }
                catch (URISyntaxException e)
                {
                    throw new ArgumentException(e);
                }
                Path tmp     = new Path(tmpURI);
                Path newPath = CopyRemoteFiles(fileDir, tmp, conf, replication);
                DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf);
            }
        }
Example #3
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal static bool RunJob(JobConf conf, Path inDir, Path outDir, int numMaps, int
                                    numReds)
        {
            FileSystem fs = FileSystem.Get(conf);

            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            if (!fs.Exists(inDir))
            {
                fs.Mkdirs(inDir);
            }
            string input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n";

            for (int i = 0; i < numMaps; ++i)
            {
                DataOutputStream file = fs.Create(new Path(inDir, "part-" + i));
                file.WriteBytes(input);
                file.Close();
            }
            DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf, fs);
            conf.SetOutputCommitter(typeof(CustomOutputCommitter));
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReds);
            JobClient  jobClient = new JobClient(conf);
            RunningJob job       = jobClient.SubmitJob(conf);

            return(jobClient.MonitorAndPrintJob(conf, job));
        }
        /// <summary>
        /// Upload and configure files, libjars, jobjars, and archives pertaining to
        /// the passed job.
        /// </summary>
        /// <param name="job">the job containing the files to be uploaded</param>
        /// <param name="submitJobDir">the submission directory of the job</param>
        /// <exception cref="System.IO.IOException"/>
        public virtual void UploadFiles(Job job, Path submitJobDir)
        {
            Configuration conf        = job.GetConfiguration();
            short         replication = (short)conf.GetInt(Job.SubmitReplication, Job.DefaultSubmitReplication
                                                           );

            if (!(conf.GetBoolean(Job.UsedGenericParser, false)))
            {
                Log.Warn("Hadoop command-line option parsing not performed. " + "Implement the Tool interface and execute your application "
                         + "with ToolRunner to remedy this.");
            }
            // get all the command line arguments passed in by the user conf
            string files    = conf.Get("tmpfiles");
            string libjars  = conf.Get("tmpjars");
            string archives = conf.Get("tmparchives");
            string jobJar   = job.GetJar();

            //
            // Figure out what fs the JobTracker is using. Copy the
            // job to it, under a temporary name. This allows DFS to work,
            // and under the local fs also provides UNIX-like object loading
            // semantics. (that is, if the job file is deleted right after
            // submission, we can still run the submission to completion)
            //
            // Create a number of filenames in the JobTracker's fs namespace
            Log.Debug("default FileSystem: " + jtFs.GetUri());
            if (jtFs.Exists(submitJobDir))
            {
                throw new IOException("Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in"
                                      + " that directory");
            }
            submitJobDir = jtFs.MakeQualified(submitJobDir);
            submitJobDir = new Path(submitJobDir.ToUri().GetPath());
            FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JobDirPermission
                                                           );

            FileSystem.Mkdirs(jtFs, submitJobDir, mapredSysPerms);
            Path filesDir    = JobSubmissionFiles.GetJobDistCacheFiles(submitJobDir);
            Path archivesDir = JobSubmissionFiles.GetJobDistCacheArchives(submitJobDir);
            Path libjarsDir  = JobSubmissionFiles.GetJobDistCacheLibjars(submitJobDir);

            // add all the command line files/ jars and archive
            // first copy them to jobtrackers filesystem
            if (files != null)
            {
                FileSystem.Mkdirs(jtFs, filesDir, mapredSysPerms);
                string[] fileArr = files.Split(",");
                foreach (string tmpFile in fileArr)
                {
                    URI tmpURI = null;
                    try
                    {
                        tmpURI = new URI(tmpFile);
                    }
                    catch (URISyntaxException e)
                    {
                        throw new ArgumentException(e);
                    }
                    Path tmp     = new Path(tmpURI);
                    Path newPath = CopyRemoteFiles(filesDir, tmp, conf, replication);
                    try
                    {
                        URI pathURI = GetPathURI(newPath, tmpURI.GetFragment());
                        DistributedCache.AddCacheFile(pathURI, conf);
                    }
                    catch (URISyntaxException ue)
                    {
                        // should not throw a uri exception
                        throw new IOException("Failed to create uri for " + tmpFile, ue);
                    }
                }
            }
            if (libjars != null)
            {
                FileSystem.Mkdirs(jtFs, libjarsDir, mapredSysPerms);
                string[] libjarsArr = libjars.Split(",");
                foreach (string tmpjars in libjarsArr)
                {
                    Path tmp     = new Path(tmpjars);
                    Path newPath = CopyRemoteFiles(libjarsDir, tmp, conf, replication);
                    DistributedCache.AddFileToClassPath(new Path(newPath.ToUri().GetPath()), conf, jtFs
                                                        );
                }
            }
            if (archives != null)
            {
                FileSystem.Mkdirs(jtFs, archivesDir, mapredSysPerms);
                string[] archivesArr = archives.Split(",");
                foreach (string tmpArchives in archivesArr)
                {
                    URI tmpURI;
                    try
                    {
                        tmpURI = new URI(tmpArchives);
                    }
                    catch (URISyntaxException e)
                    {
                        throw new ArgumentException(e);
                    }
                    Path tmp     = new Path(tmpURI);
                    Path newPath = CopyRemoteFiles(archivesDir, tmp, conf, replication);
                    try
                    {
                        URI pathURI = GetPathURI(newPath, tmpURI.GetFragment());
                        DistributedCache.AddCacheArchive(pathURI, conf);
                    }
                    catch (URISyntaxException ue)
                    {
                        // should not throw an uri excpetion
                        throw new IOException("Failed to create uri for " + tmpArchives, ue);
                    }
                }
            }
            if (jobJar != null)
            {
                // copy jar to JobTracker's fs
                // use jar name if job is not named.
                if (string.Empty.Equals(job.GetJobName()))
                {
                    job.SetJobName(new Path(jobJar).GetName());
                }
                Path jobJarPath = new Path(jobJar);
                URI  jobJarURI  = jobJarPath.ToUri();
                // If the job jar is already in a global fs,
                // we don't need to copy it from local fs
                if (jobJarURI.GetScheme() == null || jobJarURI.GetScheme().Equals("file"))
                {
                    CopyJar(jobJarPath, JobSubmissionFiles.GetJobJar(submitJobDir), replication);
                    job.SetJar(JobSubmissionFiles.GetJobJar(submitJobDir).ToString());
                }
            }
            else
            {
                Log.Warn("No job jar file set.  User classes may not be found. " + "See Job or Job#setJar(String)."
                         );
            }
            AddLog4jToDistributedCache(job, submitJobDir);
            // set the timestamps of the archives and files
            // set the public/private visibility of the archives and files
            ClientDistributedCacheManager.DetermineTimestampsAndCacheVisibilities(conf);
            // get DelegationToken for cached file
            ClientDistributedCacheManager.GetDelegationTokens(conf, job.GetCredentials());
        }