Ejemplo n.º 1
0
        /// <exception cref="System.Exception"/>
        private void TestMapFileOutputCommitterInternal(int version)
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
            RecordWriter        theRecordWriter = theOutputFormat.GetRecordWriter(tContext);

            WriteMapFileOutput(theRecordWriter, tContext);
            // do commit
            committer.CommitTask(tContext);
            committer.CommitJob(jContext);
            // validate output
            ValidateMapFileOutputContent(FileSystem.Get(job.GetConfiguration()), outDir);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Ejemplo n.º 2
0
        /// <summary>Get the default path and filename for the output format.</summary>
        /// <param name="context">the task context</param>
        /// <param name="extension">an extension to add to the filename</param>
        /// <returns>a full path $output/_temporary/$taskid/part-[mr]-$id</returns>
        /// <exception cref="System.IO.IOException"/>
        public virtual Path GetDefaultWorkFile(TaskAttemptContext context, string extension
                                               )
        {
            FileOutputCommitter committer = (FileOutputCommitter)GetOutputCommitter(context);

            return(new Path(committer.GetWorkPath(), GetUniqueFile(context, GetOutputName(context
                                                                                          ), extension)));
        }
Ejemplo n.º 3
0
            /// <exception cref="System.IO.IOException"/>
            public override Path GetDefaultWorkFile(TaskAttemptContext context, string extension
                                                    )
            {
                FileOutputCommitter foc = (FileOutputCommitter)this._enclosing._enclosing.GetOutputCommitter
                                              (context);

                return(new Path(new Path(foc.GetWorkPath(), TestFileOutputCommitter.SubDir), FileOutputFormat
                                .GetUniqueFile(context, FileOutputFormat.GetOutputName(context), extension)));
            }
Ejemplo n.º 4
0
 /// <exception cref="System.IO.IOException"/>
 public override OutputCommitter GetOutputCommitter(TaskAttemptContext context)
 {
     lock (this)
     {
         if (committer == null)
         {
             Path output = GetOutputPath(context);
             committer = new FileOutputCommitter(output, context);
         }
         return(committer);
     }
 }
Ejemplo n.º 5
0
        /// <exception cref="System.Exception"/>
        private void TestConcurrentCommitTaskWithSubDir(int version)
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            conf.SetClass("fs.file.impl", typeof(TestFileOutputCommitter.RLFS), typeof(FileSystem
                                                                                       ));
            FileSystem.CloseAll();
            JobContext          jContext    = new JobContextImpl(conf, taskID.GetJobID());
            FileOutputCommitter amCommitter = new FileOutputCommitter(outDir, jContext);

            amCommitter.SetupJob(jContext);
            TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2];
            taCtx[0] = new TaskAttemptContextImpl(conf, taskID);
            taCtx[1] = new TaskAttemptContextImpl(conf, taskID1);
            TextOutputFormat[] tof = new TextOutputFormat[2];
            for (int i = 0; i < tof.Length; i++)
            {
                tof[i] = new _TextOutputFormat_508(this);
            }
            ExecutorService executor = Executors.NewFixedThreadPool(2);

            try
            {
                for (int i_1 = 0; i_1 < taCtx.Length; i_1++)
                {
                    int taskIdx = i_1;
                    executor.Submit(new _Callable_524(this, tof, taskIdx, taCtx));
                }
            }
            finally
            {
                executor.Shutdown();
                while (!executor.AwaitTermination(1, TimeUnit.Seconds))
                {
                    Log.Info("Awaiting thread termination!");
                }
            }
            amCommitter.CommitJob(jContext);
            RawLocalFileSystem lfs = new RawLocalFileSystem();

            lfs.SetConf(conf);
            NUnit.Framework.Assert.IsFalse("Must not end up with sub_dir/sub_dir", lfs.Exists
                                               (new Path(OutSubDir, SubDir)));
            // validate output
            ValidateContent(OutSubDir);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Ejemplo n.º 6
0
        /// <exception cref="System.Exception"/>
        public virtual void TestEmptyOutput()
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // Do not write any output
            // do commit
            committer.CommitTask(tContext);
            committer.CommitJob(jContext);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Ejemplo n.º 7
0
        /// <exception cref="System.Exception"/>
        public virtual void TestCommitter()
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            TextOutputFormat theOutputFormat = new TextOutputFormat();
            RecordWriter     theRecordWriter = theOutputFormat.GetRecordWriter(tContext);

            WriteOutput(theRecordWriter, tContext);
            // do commit
            committer.CommitTask(tContext);
            committer.CommitJob(jContext);
            // validate output
            FilePath      expectedFile   = new FilePath(new Path(outDir, partFile).ToString());
            StringBuilder expectedOutput = new StringBuilder();

            expectedOutput.Append(key1).Append('\t').Append(val1).Append("\n");
            expectedOutput.Append(val1).Append("\n");
            expectedOutput.Append(val2).Append("\n");
            expectedOutput.Append(key2).Append("\n");
            expectedOutput.Append(key1).Append("\n");
            expectedOutput.Append(key2).Append('\t').Append(val2).Append("\n");
            string output = UtilsForTests.Slurp(expectedFile);

            NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString());
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Ejemplo n.º 8
0
        //test passed
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private void TestAbortInternal(int version)
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // do setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            TextOutputFormat theOutputFormat = new TextOutputFormat();
            RecordWriter     theRecordWriter = theOutputFormat.GetRecordWriter(tContext);

            WriteOutput(theRecordWriter, tContext);
            // do abort
            committer.AbortTask(tContext);
            FilePath expectedFile = new FilePath(new Path(committer.GetWorkPath(), partFile).
                                                 ToString());

            NUnit.Framework.Assert.IsFalse("task temp dir still exists", expectedFile.Exists(
                                               ));
            committer.AbortJob(jContext, JobStatus.State.Failed);
            expectedFile = new FilePath(new Path(outDir, FileOutputCommitter.PendingDirName).
                                        ToString());
            NUnit.Framework.Assert.IsFalse("job temp dir still exists", expectedFile.Exists()
                                           );
            NUnit.Framework.Assert.AreEqual("Output directory not empty", 0, new FilePath(outDir
                                                                                          .ToString()).ListFiles().Length);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Get the
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// to the task's temporary output directory
        /// for the map-reduce job
        /// <b id="SideEffectFiles">Tasks' Side-Effect Files</b>
        /// <p>Some applications need to create/write-to side-files, which differ from
        /// the actual job-outputs.
        /// <p>In such cases there could be issues with 2 instances of the same TIP
        /// (running simultaneously e.g. speculative tasks) trying to open/write-to the
        /// same file (path) on HDFS. Hence the application-writer will have to pick
        /// unique names per task-attempt (e.g. using the attemptid, say
        /// <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
        /// <p>To get around this the Map-Reduce framework helps the application-writer
        /// out by maintaining a special
        /// <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt>
        /// sub-directory for each task-attempt on HDFS where the output of the
        /// task-attempt goes. On successful completion of the task-attempt the files
        /// in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only)
        /// are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the
        /// framework discards the sub-directory of unsuccessful task-attempts. This
        /// is completely transparent to the application.</p>
        /// <p>The application-writer can take advantage of this by creating any
        /// side-files required in a work directory during execution
        /// of his task i.e. via
        /// <see cref="FileOutputFormat{K, V}.GetWorkOutputPath(Org.Apache.Hadoop.Mapreduce.TaskInputOutputContext{KEYIN, VALUEIN, KEYOUT, VALUEOUT})
        ///     "/>
        /// , and
        /// the framework will move them out similarly - thus she doesn't have to pick
        /// unique paths per task-attempt.</p>
        /// <p>The entire discussion holds true for maps of jobs with
        /// reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
        /// goes directly to HDFS.</p>
        /// </summary>
        /// <returns>
        /// the
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// to the task's temporary output directory
        /// for the map-reduce job.
        /// </returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public static Path GetWorkOutputPath <_T0>(TaskInputOutputContext <_T0> context)
        {
            FileOutputCommitter committer = (FileOutputCommitter)context.GetOutputCommitter();

            return(committer.GetWorkPath());
        }
Ejemplo n.º 10
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private void TestFailAbortInternal(int version)
        {
            Job           job  = Job.GetInstance();
            Configuration conf = job.GetConfiguration();

            conf.Set(FileSystem.FsDefaultNameKey, "faildel:///");
            conf.SetClass("fs.faildel.impl", typeof(TestFileOutputCommitter.FakeFileSystem),
                          typeof(FileSystem));
            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(MRJobConfig.ApplicationAttemptId, 1);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            FileOutputFormat.SetOutputPath(job, outDir);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // do setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat();
            RecordWriter <object, object>     theRecordWriter = theOutputFormat.GetRecordWriter(tContext
                                                                                                );

            WriteOutput(theRecordWriter, tContext);
            // do abort
            Exception th = null;

            try
            {
                committer.AbortTask(tContext);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            Path     jtd          = committer.GetJobAttemptPath(jContext);
            FilePath jobTmpDir    = new FilePath(jtd.ToUri().GetPath());
            Path     ttd          = committer.GetTaskAttemptPath(tContext);
            FilePath taskTmpDir   = new FilePath(ttd.ToUri().GetPath());
            FilePath expectedFile = new FilePath(taskTmpDir, partFile);

            NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists
                                              ());
            th = null;
            try
            {
                committer.AbortJob(jContext, JobStatus.State.Failed);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists());
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Ejemplo n.º 11
0
        /// <exception cref="System.Exception"/>
        private void TestRecoveryInternal(int commitVersion, int recoveryVersion)
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(MRJobConfig.ApplicationAttemptId, 1);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, commitVersion
                        );
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            TextOutputFormat theOutputFormat = new TextOutputFormat();
            RecordWriter     theRecordWriter = theOutputFormat.GetRecordWriter(tContext);

            WriteOutput(theRecordWriter, tContext);
            // do commit
            committer.CommitTask(tContext);
            Path     jobTempDir1 = committer.GetCommittedTaskPath(tContext);
            FilePath jtd         = new FilePath(jobTempDir1.ToUri().GetPath());

            if (commitVersion == 1)
            {
                NUnit.Framework.Assert.IsTrue("Version 1 commits to temporary dir " + jtd, jtd.Exists
                                                  ());
                ValidateContent(jtd);
            }
            else
            {
                NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd, jtd.Exists
                                                   ());
            }
            //now while running the second app attempt,
            //recover the task output from first attempt
            Configuration conf2 = job.GetConfiguration();

            conf2.Set(MRJobConfig.TaskAttemptId, attempt);
            conf2.SetInt(MRJobConfig.ApplicationAttemptId, 2);
            conf2.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, recoveryVersion
                         );
            JobContext          jContext2  = new JobContextImpl(conf2, taskID.GetJobID());
            TaskAttemptContext  tContext2  = new TaskAttemptContextImpl(conf2, taskID);
            FileOutputCommitter committer2 = new FileOutputCommitter(outDir, tContext2);

            committer2.SetupJob(tContext2);
            Path     jobTempDir2 = committer2.GetCommittedTaskPath(tContext2);
            FilePath jtd2        = new FilePath(jobTempDir2.ToUri().GetPath());

            committer2.RecoverTask(tContext2);
            if (recoveryVersion == 1)
            {
                NUnit.Framework.Assert.IsTrue("Version 1 recovers to " + jtd2, jtd2.Exists());
                ValidateContent(jtd2);
            }
            else
            {
                NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd2, jtd2.Exists
                                                   ());
                if (commitVersion == 1)
                {
                    NUnit.Framework.Assert.IsTrue("Version 2  recovery moves to output dir from " + jtd
                                                  , jtd.List().Length == 0);
                }
            }
            committer2.CommitJob(jContext2);
            ValidateContent(outDir);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }