/// <exception cref="System.Exception"/>
        private void TestMapFileOutputCommitterInternal(int version)
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
            RecordWriter        theRecordWriter = theOutputFormat.GetRecordWriter(tContext);

            WriteMapFileOutput(theRecordWriter, tContext);
            // do commit
            committer.CommitTask(tContext);
            committer.CommitJob(jContext);
            // validate output
            ValidateMapFileOutputContent(FileSystem.Get(job.GetConfiguration()), outDir);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Example #2
0
            /// <summary>From each split sampled, take the first numSamples / numSplits records.</summary>
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, Job job)
            {
                // ArrayList::toArray doesn't preserve type
                IList <InputSplit> splits  = inf.GetSplits(job);
                AList <K>          samples = new AList <K>(numSamples);
                int  splitsToSample        = Math.Min(maxSplitsSampled, splits.Count);
                int  samplesPerSplit       = numSamples / splitsToSample;
                long records = 0;

                for (int i = 0; i < splitsToSample; ++i)
                {
                    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(job.GetConfiguration
                                                                                        (), new TaskAttemptID());
                    RecordReader <K, V> reader = inf.CreateRecordReader(splits[i], samplingContext);
                    reader.Initialize(splits[i], samplingContext);
                    while (reader.NextKeyValue())
                    {
                        samples.AddItem(ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey
                                                                 (), null));
                        ++records;
                        if ((i + 1) * samplesPerSplit <= records)
                        {
                            break;
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
Example #3
0
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 public override RecordReader CreateRecordReader(InputSplit split, TaskAttemptContext
                                                 taskContext)
 {
     try
     {
         if (!rrCstrMap.Contains(ident))
         {
             throw new IOException("No RecordReader for " + ident);
         }
         Configuration      conf    = GetConf(taskContext.GetConfiguration());
         TaskAttemptContext context = new TaskAttemptContextImpl(conf, TaskAttemptID.ForName
                                                                     (conf.Get(MRJobConfig.TaskAttemptId)), new Parser.WrappedStatusReporter(taskContext
                                                                                                                                             ));
         return(rrCstrMap[ident].NewInstance(id, inf.CreateRecordReader(split, context), cmpcl
                                             ));
     }
     catch (MemberAccessException e)
     {
         throw new IOException(e);
     }
     catch (InstantiationException e)
     {
         throw new IOException(e);
     }
     catch (TargetInvocationException e)
     {
         throw new IOException(e);
     }
 }
Example #4
0
            public override void Run()
            {
                long records = 0;

                try
                {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.GetConfiguration(), new
                                                                            TaskAttemptID());
                    RecordReader <Text, Text> reader = inFormat.CreateRecordReader(splits[sampleStep *
                                                                                          idx], context);
                    reader.Initialize(splits[sampleStep * idx], context);
                    while (reader.NextKeyValue())
                    {
                        sampler.AddKey(new Text(reader.GetCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records)
                        {
                            break;
                        }
                    }
                }
                catch (IOException ie)
                {
                    System.Console.Error.WriteLine("Got an exception while reading splits " + StringUtils
                                                   .StringifyException(ie));
                    throw new RuntimeException(ie);
                }
                catch (Exception)
                {
                }
            }
Example #5
0
        // Use the LineRecordReader to read records from the file
        /// <exception cref="System.IO.IOException"/>
        public virtual AList <string> ReadRecords(Uri testFileUrl, int splitSize)
        {
            // Set up context
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            long          testFileSize = testFile.Length();
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            Configuration conf         = new Configuration();

            conf.SetInt("io.file.buffer.size", 1);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // Gather the records returned by the record reader
            AList <string> records = new AList <string>();
            long           offset  = 0;

            while (offset < testFileSize)
            {
                FileSplit        split  = new FileSplit(testFilePath, offset, splitSize, null);
                LineRecordReader reader = new LineRecordReader();
                reader.Initialize(split, context);
                while (reader.NextKeyValue())
                {
                    records.AddItem(reader.GetCurrentValue().ToString());
                }
                offset += splitSize;
            }
            return(records);
        }
Example #6
0
            /// <summary>
            /// Randomize the split order, then take the specified number of keys from
            /// each split sampled, where each key is selected with the specified
            /// probability and possibly replaced by a subsequently selected key when
            /// the quota of keys from that split is satisfied.
            /// </summary>
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            public virtual K[] GetSample(InputFormat <K, V> inf, Job job)
            {
                // ArrayList::toArray doesn't preserve type
                IList <InputSplit> splits  = inf.GetSplits(job);
                AList <K>          samples = new AList <K>(numSamples);
                int    splitsToSample      = Math.Min(maxSplitsSampled, splits.Count);
                Random r    = new Random();
                long   seed = r.NextLong();

                r.SetSeed(seed);
                Log.Debug("seed: " + seed);
                // shuffle splits
                for (int i = 0; i < splits.Count; ++i)
                {
                    InputSplit tmp = splits[i];
                    int        j   = r.Next(splits.Count);
                    splits.Set(i, splits[j]);
                    splits.Set(j, tmp);
                }
                // our target rate is in terms of the maximum number of sample splits,
                // but we accept the possibility of sampling additional splits to hit
                // the target sample keyset
                for (int i_1 = 0; i_1 < splitsToSample || (i_1 < splits.Count && samples.Count <
                                                           numSamples); ++i_1)
                {
                    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(job.GetConfiguration
                                                                                        (), new TaskAttemptID());
                    RecordReader <K, V> reader = inf.CreateRecordReader(splits[i_1], samplingContext);
                    reader.Initialize(splits[i_1], samplingContext);
                    while (reader.NextKeyValue())
                    {
                        if (r.NextDouble() <= freq)
                        {
                            if (samples.Count < numSamples)
                            {
                                samples.AddItem(ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey
                                                                         (), null));
                            }
                            else
                            {
                                // When exceeding the maximum number of samples, replace a
                                // random element with this one, then adjust the frequency
                                // to reflect the possibility of existing elements being
                                // pushed out
                                int ind = r.Next(numSamples);
                                if (ind != numSamples)
                                {
                                    samples.Set(ind, ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey
                                                                              (), null));
                                }
                                freq *= (numSamples - 1) / (double)numSamples;
                            }
                        }
                    }
                    reader.Close();
                }
                return((K[])Sharpen.Collections.ToArray(samples));
            }
Example #7
0
        /// <exception cref="System.IO.IOException"/>
        private void TestSplitRecordsForFile(Configuration conf, long firstSplitLength, long
                                             testFileSize, Path testFilePath)
        {
            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            NUnit.Framework.Assert.IsTrue("unexpected test data at " + testFilePath, testFileSize
                                          > firstSplitLength);
            string delimiter = conf.Get("textinputformat.record.delimiter");

            byte[] recordDelimiterBytes = null;
            if (null != delimiter)
            {
                recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets.Utf8
                                                                         );
            }
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // read the data without splitting to count the records
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, (string[])null);
            LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);

            reader.Initialize(split, context);
            int numRecordsNoSplits = 0;

            while (reader.NextKeyValue())
            {
                ++numRecordsNoSplits;
            }
            reader.Close();
            // count the records in the first split
            split  = new FileSplit(testFilePath, 0, firstSplitLength, (string[])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            int numRecordsFirstSplit = 0;

            while (reader.NextKeyValue())
            {
                ++numRecordsFirstSplit;
            }
            reader.Close();
            // count the records in the second split
            split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength
                                  , (string[])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            int numRecordsRemainingSplits = 0;

            while (reader.NextKeyValue())
            {
                ++numRecordsRemainingSplits;
            }
            reader.Close();
            NUnit.Framework.Assert.AreEqual("Unexpected number of records in split ", numRecordsNoSplits
                                            , numRecordsFirstSplit + numRecordsRemainingSplits);
        }
        /// <exception cref="System.Exception"/>
        private void TestConcurrentCommitTaskWithSubDir(int version)
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            conf.SetClass("fs.file.impl", typeof(TestFileOutputCommitter.RLFS), typeof(FileSystem
                                                                                       ));
            FileSystem.CloseAll();
            JobContext          jContext    = new JobContextImpl(conf, taskID.GetJobID());
            FileOutputCommitter amCommitter = new FileOutputCommitter(outDir, jContext);

            amCommitter.SetupJob(jContext);
            TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2];
            taCtx[0] = new TaskAttemptContextImpl(conf, taskID);
            taCtx[1] = new TaskAttemptContextImpl(conf, taskID1);
            TextOutputFormat[] tof = new TextOutputFormat[2];
            for (int i = 0; i < tof.Length; i++)
            {
                tof[i] = new _TextOutputFormat_508(this);
            }
            ExecutorService executor = Executors.NewFixedThreadPool(2);

            try
            {
                for (int i_1 = 0; i_1 < taCtx.Length; i_1++)
                {
                    int taskIdx = i_1;
                    executor.Submit(new _Callable_524(this, tof, taskIdx, taCtx));
                }
            }
            finally
            {
                executor.Shutdown();
                while (!executor.AwaitTermination(1, TimeUnit.Seconds))
                {
                    Log.Info("Awaiting thread termination!");
                }
            }
            amCommitter.CommitJob(jContext);
            RawLocalFileSystem lfs = new RawLocalFileSystem();

            lfs.SetConf(conf);
            NUnit.Framework.Assert.IsFalse("Must not end up with sub_dir/sub_dir", lfs.Exists
                                               (new Path(OutSubDir, SubDir)));
            // validate output
            ValidateContent(OutSubDir);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Example #9
0
        // Create a taskAttemptContext for the named output with
        // output format and output key/value types put in the context
        /// <exception cref="System.IO.IOException"/>
        private TaskAttemptContext GetContext(string nameOutput)
        {
            TaskAttemptContext taskContext = taskContexts[nameOutput];

            if (taskContext != null)
            {
                return(taskContext);
            }
            // The following trick leverages the instantiation of a record writer via
            // the job thus supporting arbitrary output formats.
            Job job = Job.GetInstance(context.GetConfiguration());

            job.SetOutputFormatClass(GetNamedOutputFormatClass(context, nameOutput));
            job.SetOutputKeyClass(GetNamedOutputKeyClass(context, nameOutput));
            job.SetOutputValueClass(GetNamedOutputValueClass(context, nameOutput));
            taskContext = new TaskAttemptContextImpl(job.GetConfiguration(), context.GetTaskAttemptID
                                                         (), new MultipleOutputs.WrappedStatusReporter(context));
            taskContexts[nameOutput] = taskContext;
            return(taskContext);
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestInvalidVersionNumber()
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, 3);
            TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);

            try
            {
                new FileOutputCommitter(outDir, tContext);
                Fail("should've thrown an exception!");
            }
            catch (IOException)
            {
            }
        }
Example #11
0
        /// <exception cref="System.Exception"/>
        public virtual void TestEmptyOutput()
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // Do not write any output
            // do commit
            committer.CommitTask(tContext);
            committer.CommitJob(jContext);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Example #12
0
        public virtual void TestStripBOM()
        {
            // the test data contains a BOM at the start of the file
            // confirm the BOM is skipped by LineRecordReader
            string Utf8Bom     = "\uFEFF";
            Uri    testFileUrl = GetType().GetClassLoader().GetResource("testBOM.txt");

            NUnit.Framework.Assert.IsNotNull("Cannot find testBOM.txt", testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // read the data and check whether BOM is skipped
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, (string[])null);
            LineRecordReader reader = new LineRecordReader();

            reader.Initialize(split, context);
            int  numRecords = 0;
            bool firstLine  = true;
            bool skipBOM    = true;

            while (reader.NextKeyValue())
            {
                if (firstLine)
                {
                    firstLine = false;
                    if (reader.GetCurrentValue().ToString().StartsWith(Utf8Bom))
                    {
                        skipBOM = false;
                    }
                }
                ++numRecords;
            }
            reader.Close();
            NUnit.Framework.Assert.IsTrue("BOM is not skipped", skipBOM);
        }
Example #13
0
        /// <exception cref="System.Exception"/>
        public virtual void TestCommitter()
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            TextOutputFormat theOutputFormat = new TextOutputFormat();
            RecordWriter     theRecordWriter = theOutputFormat.GetRecordWriter(tContext);

            WriteOutput(theRecordWriter, tContext);
            // do commit
            committer.CommitTask(tContext);
            committer.CommitJob(jContext);
            // validate output
            FilePath      expectedFile   = new FilePath(new Path(outDir, partFile).ToString());
            StringBuilder expectedOutput = new StringBuilder();

            expectedOutput.Append(key1).Append('\t').Append(val1).Append("\n");
            expectedOutput.Append(val1).Append("\n");
            expectedOutput.Append(val2).Append("\n");
            expectedOutput.Append(key2).Append("\n");
            expectedOutput.Append(key1).Append("\n");
            expectedOutput.Append(key2).Append('\t').Append(val2).Append("\n");
            string output = UtilsForTests.Slurp(expectedFile);

            NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString());
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Example #14
0
        public virtual void TestMultipleClose()
        {
            Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2"
                                                                     );

            NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2"
                                             , testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // read the data and check whether BOM is skipped
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, null);
            LineRecordReader reader = new LineRecordReader();

            reader.Initialize(split, context);
            //noinspection StatementWithEmptyBody
            while (reader.NextKeyValue())
            {
            }
            reader.Close();
            reader.Close();
            BZip2Codec codec = new BZip2Codec();

            codec.SetConf(conf);
            ICollection <Decompressor> decompressors = new HashSet <Decompressor>();

            for (int i = 0; i < 10; ++i)
            {
                decompressors.AddItem(CodecPool.GetDecompressor(codec));
            }
            NUnit.Framework.Assert.AreEqual(10, decompressors.Count);
        }
        //test passed
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private void TestAbortInternal(int version)
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // do setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            TextOutputFormat theOutputFormat = new TextOutputFormat();
            RecordWriter     theRecordWriter = theOutputFormat.GetRecordWriter(tContext);

            WriteOutput(theRecordWriter, tContext);
            // do abort
            committer.AbortTask(tContext);
            FilePath expectedFile = new FilePath(new Path(committer.GetWorkPath(), partFile).
                                                 ToString());

            NUnit.Framework.Assert.IsFalse("task temp dir still exists", expectedFile.Exists(
                                               ));
            committer.AbortJob(jContext, JobStatus.State.Failed);
            expectedFile = new FilePath(new Path(outDir, FileOutputCommitter.PendingDirName).
                                        ToString());
            NUnit.Framework.Assert.IsFalse("job temp dir still exists", expectedFile.Exists()
                                           );
            NUnit.Framework.Assert.AreEqual("Output directory not empty", 0, new FilePath(outDir
                                                                                          .ToString()).ListFiles().Length);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Example #16
0
        public virtual void TestProgressIsReportedIfInputASeriesOfEmptyFiles()
        {
            JobConf conf = new JobConf();

            Path[]     paths      = new Path[3];
            FilePath[] files      = new FilePath[3];
            long[]     fileLength = new long[3];
            try
            {
                for (int i = 0; i < 3; i++)
                {
                    FilePath dir = new FilePath(outDir.ToString());
                    dir.Mkdir();
                    files[i] = new FilePath(dir, "testfile" + i);
                    FileWriter fileWriter = new FileWriter(files[i]);
                    fileWriter.Flush();
                    fileWriter.Close();
                    fileLength[i] = i;
                    paths[i]      = new Path(outDir + "/testfile" + i);
                }
                CombineFileSplit       combineFileSplit   = new CombineFileSplit(paths, fileLength);
                TaskAttemptID          taskAttemptID      = Org.Mockito.Mockito.Mock <TaskAttemptID>();
                Task.TaskReporter      reporter           = Org.Mockito.Mockito.Mock <Task.TaskReporter>();
                TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(conf, taskAttemptID
                                                                                       , reporter);
                CombineFileRecordReader cfrr = new CombineFileRecordReader(combineFileSplit, taskAttemptContext
                                                                           , typeof(TestCombineFileRecordReader.TextRecordReaderWrapper));
                cfrr.Initialize(combineFileSplit, taskAttemptContext);
                Org.Mockito.Mockito.Verify(reporter).Progress();
                NUnit.Framework.Assert.IsFalse(cfrr.NextKeyValue());
                Org.Mockito.Mockito.Verify(reporter, Org.Mockito.Mockito.Times(3)).Progress();
            }
            finally
            {
                FileUtil.FullyDelete(new FilePath(outDir.ToString()));
            }
        }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private void TestFailAbortInternal(int version)
        {
            Job           job  = Job.GetInstance();
            Configuration conf = job.GetConfiguration();

            conf.Set(FileSystem.FsDefaultNameKey, "faildel:///");
            conf.SetClass("fs.faildel.impl", typeof(TestFileOutputCommitter.FakeFileSystem),
                          typeof(FileSystem));
            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(MRJobConfig.ApplicationAttemptId, 1);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            FileOutputFormat.SetOutputPath(job, outDir);
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // do setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat();
            RecordWriter <object, object>     theRecordWriter = theOutputFormat.GetRecordWriter(tContext
                                                                                                );

            WriteOutput(theRecordWriter, tContext);
            // do abort
            Exception th = null;

            try
            {
                committer.AbortTask(tContext);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            Path     jtd          = committer.GetJobAttemptPath(jContext);
            FilePath jobTmpDir    = new FilePath(jtd.ToUri().GetPath());
            Path     ttd          = committer.GetTaskAttemptPath(tContext);
            FilePath taskTmpDir   = new FilePath(ttd.ToUri().GetPath());
            FilePath expectedFile = new FilePath(taskTmpDir, partFile);

            NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists
                                              ());
            th = null;
            try
            {
                committer.AbortJob(jContext, JobStatus.State.Failed);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists());
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
        /// <exception cref="System.Exception"/>
        private void TestRecoveryInternal(int commitVersion, int recoveryVersion)
        {
            Job job = Job.GetInstance();

            FileOutputFormat.SetOutputPath(job, outDir);
            Configuration conf = job.GetConfiguration();

            conf.Set(MRJobConfig.TaskAttemptId, attempt);
            conf.SetInt(MRJobConfig.ApplicationAttemptId, 1);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, commitVersion
                        );
            JobContext          jContext  = new JobContextImpl(conf, taskID.GetJobID());
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

            // setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            TextOutputFormat theOutputFormat = new TextOutputFormat();
            RecordWriter     theRecordWriter = theOutputFormat.GetRecordWriter(tContext);

            WriteOutput(theRecordWriter, tContext);
            // do commit
            committer.CommitTask(tContext);
            Path     jobTempDir1 = committer.GetCommittedTaskPath(tContext);
            FilePath jtd         = new FilePath(jobTempDir1.ToUri().GetPath());

            if (commitVersion == 1)
            {
                NUnit.Framework.Assert.IsTrue("Version 1 commits to temporary dir " + jtd, jtd.Exists
                                                  ());
                ValidateContent(jtd);
            }
            else
            {
                NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd, jtd.Exists
                                                   ());
            }
            //now while running the second app attempt,
            //recover the task output from first attempt
            Configuration conf2 = job.GetConfiguration();

            conf2.Set(MRJobConfig.TaskAttemptId, attempt);
            conf2.SetInt(MRJobConfig.ApplicationAttemptId, 2);
            conf2.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, recoveryVersion
                         );
            JobContext          jContext2  = new JobContextImpl(conf2, taskID.GetJobID());
            TaskAttemptContext  tContext2  = new TaskAttemptContextImpl(conf2, taskID);
            FileOutputCommitter committer2 = new FileOutputCommitter(outDir, tContext2);

            committer2.SetupJob(tContext2);
            Path     jobTempDir2 = committer2.GetCommittedTaskPath(tContext2);
            FilePath jtd2        = new FilePath(jobTempDir2.ToUri().GetPath());

            committer2.RecoverTask(tContext2);
            if (recoveryVersion == 1)
            {
                NUnit.Framework.Assert.IsTrue("Version 1 recovers to " + jtd2, jtd2.Exists());
                ValidateContent(jtd2);
            }
            else
            {
                NUnit.Framework.Assert.IsFalse("Version 2 commits to output dir " + jtd2, jtd2.Exists
                                                   ());
                if (commitVersion == 1)
                {
                    NUnit.Framework.Assert.IsTrue("Version 2  recovery moves to output dir from " + jtd
                                                  , jtd.List().Length == 0);
                }
            }
            committer2.CommitJob(jContext2);
            ValidateContent(outDir);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Example #19
0
        public virtual void TestUncompressedInputDefaultDelimiterPosValue()
        {
            Configuration conf      = new Configuration();
            string        inputData = "1234567890\r\n12\r\n345";
            Path          inputFile = CreateInputFile(conf, inputData);

            conf.SetInt("io.file.buffer.size", 10);
            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            FileSplit          split   = new FileSplit(inputFile, 0, 15, (string[])null);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            LineRecordReader reader = new LineRecordReader(null);

            reader.Initialize(split, context);
            LongWritable key;
            Text         value;

            reader.NextKeyValue();
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            // Get first record:"1234567890"
            NUnit.Framework.Assert.AreEqual(10, value.GetLength());
            NUnit.Framework.Assert.AreEqual(0, key.Get());
            reader.NextKeyValue();
            // Get second record:"12"
            NUnit.Framework.Assert.AreEqual(2, value.GetLength());
            // Key should be 12 right after "1234567890\r\n"
            NUnit.Framework.Assert.AreEqual(12, key.Get());
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            // Key should be 16 right after "1234567890\r\n12\r\n"
            NUnit.Framework.Assert.AreEqual(16, key.Get());
            split  = new FileSplit(inputFile, 15, 4, (string[])null);
            reader = new LineRecordReader(null);
            reader.Initialize(split, context);
            // The second split dropped the first record "\n"
            reader.NextKeyValue();
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            // Get third record:"345"
            NUnit.Framework.Assert.AreEqual(3, value.GetLength());
            // Key should be 16 right after "1234567890\r\n12\r\n"
            NUnit.Framework.Assert.AreEqual(16, key.Get());
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            // Key should be 19 right after "1234567890\r\n12\r\n345"
            NUnit.Framework.Assert.AreEqual(19, key.Get());
            inputData = "123456789\r\r\n";
            inputFile = CreateInputFile(conf, inputData);
            split     = new FileSplit(inputFile, 0, 12, (string[])null);
            reader    = new LineRecordReader(null);
            reader.Initialize(split, context);
            reader.NextKeyValue();
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            // Get first record:"123456789"
            NUnit.Framework.Assert.AreEqual(9, value.GetLength());
            NUnit.Framework.Assert.AreEqual(0, key.Get());
            reader.NextKeyValue();
            // Get second record:""
            NUnit.Framework.Assert.AreEqual(0, value.GetLength());
            // Key should be 10 right after "123456789\r"
            NUnit.Framework.Assert.AreEqual(10, key.Get());
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            // Key should be 12 right after "123456789\r\r\n"
            NUnit.Framework.Assert.AreEqual(12, key.Get());
        }
Example #20
0
        public virtual void TestUncompressedInputCustomDelimiterPosValue()
        {
            Configuration conf = new Configuration();

            conf.SetInt("io.file.buffer.size", 10);
            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            string inputData = "abcdefghij++kl++mno";
            Path   inputFile = CreateInputFile(conf, inputData);
            string delimiter = "++";

            byte[] recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets
                                                                            .Utf8);
            int                splitLength = 15;
            FileSplit          split       = new FileSplit(inputFile, 0, splitLength, (string[])null);
            TaskAttemptContext context     = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                        );
            LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);

            reader.Initialize(split, context);
            // Get first record: "abcdefghij"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            LongWritable key   = reader.GetCurrentKey();
            Text         value = reader.GetCurrentValue();

            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 10, value.GetLength
                                                ());
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 0, key.Get());
            // Get second record: "kl"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 2, value.GetLength
                                                ());
            // Key should be 12 right after "abcdefghij++"
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 12, key.Get()
                                            );
            // Get third record: "mno"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 3, value.GetLength
                                                ());
            // Key should be 16 right after "abcdefghij++kl++"
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 16, key.Get()
                                            );
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            // Key should be 19 right after "abcdefghij++kl++mno"
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 19, key.Get()
                                            );
            // after refresh should be empty
            key = reader.GetCurrentKey();
            NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
            reader.Close();
            split = new FileSplit(inputFile, splitLength, inputData.Length - splitLength, (string
                                                                                           [])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            // No record is in the second split because the second split dropped
            // the first record, which was already reported by the first split.
            NUnit.Framework.Assert.IsFalse("Unexpected record returned", reader.NextKeyValue(
                                               ));
            key = reader.GetCurrentKey();
            NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
            reader.Close();
            // multi char delimiter with starting part of the delimiter in the data
            inputData   = "abcd+efgh++ijk++mno";
            inputFile   = CreateInputFile(conf, inputData);
            splitLength = 5;
            split       = new FileSplit(inputFile, 0, splitLength, (string[])null);
            reader      = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            // Get first record: "abcd+efgh"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 0, key.Get());
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 9, value.GetLength
                                                ());
            // should have jumped over the delimiter, no record
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 11, key.Get()
                                            );
            // after refresh should be empty
            key = reader.GetCurrentKey();
            NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
            reader.Close();
            // next split: check for duplicate or dropped records
            split = new FileSplit(inputFile, splitLength, inputData.Length - splitLength, (string
                                                                                           [])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            // Get second record: "ijk" first in this split
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 11, key.Get()
                                            );
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 3, value.GetLength
                                                ());
            // Get third record: "mno" second in this split
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 16, key.Get()
                                            );
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 3, value.GetLength
                                                ());
            // should be at the end of the input
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 19, key.Get()
                                            );
            reader.Close();
            inputData            = "abcd|efgh|+|ij|kl|+|mno|pqr";
            inputFile            = CreateInputFile(conf, inputData);
            delimiter            = "|+|";
            recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets.Utf8
                                                                     );
            // walking over the buffer and split sizes checks for proper processing
            // of the ambiguous bytes of the delimiter
            for (int bufferSize = 1; bufferSize <= inputData.Length; bufferSize++)
            {
                for (int splitSize = 1; splitSize < inputData.Length; splitSize++)
                {
                    // track where we are in the inputdata
                    int keyPosition = 0;
                    conf.SetInt("io.file.buffer.size", bufferSize);
                    split  = new FileSplit(inputFile, 0, bufferSize, (string[])null);
                    reader = new LineRecordReader(recordDelimiterBytes);
                    reader.Initialize(split, context);
                    // Get the first record: "abcd|efgh" always possible
                    NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                                      ));
                    key   = reader.GetCurrentKey();
                    value = reader.GetCurrentValue();
                    NUnit.Framework.Assert.IsTrue("abcd|efgh".Equals(value.ToString()));
                    // Position should be 0 right at the start
                    NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                    key.Get());
                    // Position should be 12 right after the first "|+|"
                    keyPosition = 12;
                    // get the next record: "ij|kl" if the split/buffer allows it
                    if (reader.NextKeyValue())
                    {
                        // check the record info: "ij|kl"
                        NUnit.Framework.Assert.IsTrue("ij|kl".Equals(value.ToString()));
                        NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                        key.Get());
                        // Position should be 20 after the second "|+|"
                        keyPosition = 20;
                    }
                    // get the third record: "mno|pqr" if the split/buffer allows it
                    if (reader.NextKeyValue())
                    {
                        // check the record info: "mno|pqr"
                        NUnit.Framework.Assert.IsTrue("mno|pqr".Equals(value.ToString()));
                        NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                        key.Get());
                        // Position should be the end of the input
                        keyPosition = inputData.Length;
                    }
                    NUnit.Framework.Assert.IsFalse("Unexpected record returned", reader.NextKeyValue(
                                                       ));
                    // no more records can be read we should be at the last position
                    NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                    key.Get());
                    // after refresh should be empty
                    key = reader.GetCurrentKey();
                    NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
                    reader.Close();
                }
            }
        }