Esempio n. 1
0
        public virtual void TestFormatWithCustomSeparator()
        {
            JobConf job       = new JobConf();
            string  separator = "\u0001";

            job.Set("mapreduce.output.textoutputformat.separator", separator);
            job.Set(JobContext.TaskAttemptId, attempt);
            FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, workDir);
            FileSystem fs = workDir.GetFileSystem(job);

            if (!fs.Mkdirs(workDir))
            {
                NUnit.Framework.Assert.Fail("Failed to create output directory");
            }
            string file = "test_custom.txt";
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object
                                                                                      >();
            RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs
                                                                                            , job, file, reporter);

            Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1");
            Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2");
            Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1");
            Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2");
            NullWritable nullWritable      = NullWritable.Get();

            try
            {
                theRecordWriter.Write(key1, val1);
                theRecordWriter.Write(null, nullWritable);
                theRecordWriter.Write(null, val1);
                theRecordWriter.Write(nullWritable, val2);
                theRecordWriter.Write(key2, nullWritable);
                theRecordWriter.Write(key1, null);
                theRecordWriter.Write(null, null);
                theRecordWriter.Write(key2, val2);
            }
            finally
            {
                theRecordWriter.Close(reporter);
            }
            FilePath      expectedFile   = new FilePath(new Path(workDir, file).ToString());
            StringBuilder expectedOutput = new StringBuilder();

            expectedOutput.Append(key1).Append(separator).Append(val1).Append("\n");
            expectedOutput.Append(val1).Append("\n");
            expectedOutput.Append(val2).Append("\n");
            expectedOutput.Append(key2).Append("\n");
            expectedOutput.Append(key1).Append("\n");
            expectedOutput.Append(key2).Append(separator).Append(val2).Append("\n");
            string output = UtilsForTests.Slurp(expectedFile);

            NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output);
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestcheckOutputSpecsForbidRecordCompression()
        {
            JobConf    job       = new JobConf();
            FileSystem fs        = FileSystem.GetLocal(job);
            Path       dir       = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       outputdir = new Path(Runtime.GetProperty("test.build.data", ".") + "/output"
                                            );

            fs.Delete(dir, true);
            fs.Delete(outputdir, true);
            if (!fs.Mkdirs(dir))
            {
                Fail("Failed to create output directory");
            }
            FileOutputFormat.SetWorkOutputPath(job, dir);
            // Without outputpath, FileOutputFormat.checkoutputspecs will throw
            // InvalidJobConfException
            FileOutputFormat.SetOutputPath(job, outputdir);
            // SequenceFileAsBinaryOutputFormat doesn't support record compression
            // It should throw an exception when checked by checkOutputSpecs
            SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true);
            SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType
                                                                      .Block);
            try
            {
                new SequenceFileAsBinaryOutputFormat().CheckOutputSpecs(fs, job);
            }
            catch (Exception e)
            {
                Fail("Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:"
                     + "Caught " + e.GetType().FullName);
            }
            SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType
                                                                      .Record);
            try
            {
                new SequenceFileAsBinaryOutputFormat().CheckOutputSpecs(fs, job);
                Fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat"
                     );
            }
            catch (InvalidJobConfException)
            {
            }
            catch (Exception e)
            {
                // expected
                Fail("Expected " + typeof(InvalidJobConfException).FullName + "but caught " + e.GetType
                         ().FullName);
            }
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestCommitter()
        {
            JobConf job = new JobConf();

            SetConfForFileOutputCommitter(job);
            JobContext          jContext  = new JobContextImpl(job, ((JobID)taskID.GetJobID()));
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(job, taskID);
            FileOutputCommitter committer = new FileOutputCommitter();

            FileOutputFormat.SetWorkOutputPath(job, committer.GetTaskAttemptPath(tContext));
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            string file = "test.txt";
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            // write output
            FileSystem       localFs         = FileSystem.GetLocal(job);
            TextOutputFormat theOutputFormat = new TextOutputFormat();
            RecordWriter     theRecordWriter = theOutputFormat.GetRecordWriter(localFs, job, file
                                                                               , reporter);

            WriteOutput(theRecordWriter, reporter);
            // do commit
            committer.CommitTask(tContext);
            committer.CommitJob(jContext);
            // validate output
            FilePath      expectedFile   = new FilePath(new Path(outDir, file).ToString());
            StringBuilder expectedOutput = new StringBuilder();

            expectedOutput.Append(key1).Append('\t').Append(val1).Append("\n");
            expectedOutput.Append(val1).Append("\n");
            expectedOutput.Append(val2).Append("\n");
            expectedOutput.Append(key2).Append("\n");
            expectedOutput.Append(key1).Append("\n");
            expectedOutput.Append(key2).Append('\t').Append(val2).Append("\n");
            string output = UtilsForTests.Slurp(expectedFile);

            NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString());
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestAbort()
        {
            JobConf job = new JobConf();

            SetConfForFileOutputCommitter(job);
            JobContext          jContext  = new JobContextImpl(job, ((JobID)taskID.GetJobID()));
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(job, taskID);
            FileOutputCommitter committer = new FileOutputCommitter();

            FileOutputFormat.SetWorkOutputPath(job, committer.GetTaskAttemptPath(tContext));
            // do setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            string file = "test.txt";
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            // write output
            FileSystem       localFs         = FileSystem.GetLocal(job);
            TextOutputFormat theOutputFormat = new TextOutputFormat();
            RecordWriter     theRecordWriter = theOutputFormat.GetRecordWriter(localFs, job, file
                                                                               , reporter);

            WriteOutput(theRecordWriter, reporter);
            // do abort
            committer.AbortTask(tContext);
            FilePath expectedFile = new FilePath(new Path(committer.GetTaskAttemptPath(tContext
                                                                                       ), file).ToString());

            NUnit.Framework.Assert.IsFalse("task temp dir still exists", expectedFile.Exists(
                                               ));
            committer.AbortJob(jContext, JobStatus.State.Failed);
            expectedFile = new FilePath(new Path(outDir, FileOutputCommitter.TempDirName).ToString
                                            ());
            NUnit.Framework.Assert.IsFalse("job temp dir " + expectedFile + " still exists",
                                           expectedFile.Exists());
            NUnit.Framework.Assert.AreEqual("Output directory not empty", 0, new FilePath(outDir
                                                                                          .ToString()).ListFiles().Length);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
        // A random task attempt id for testing.
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestBinary()
        {
            JobConf    job = new JobConf();
            FileSystem fs  = FileSystem.GetLocal(job);
            Path       dir = new Path(new Path(new Path(Runtime.GetProperty("test.build.data", ".")
                                                        ), FileOutputCommitter.TempDirName), "_" + attempt);
            Path   file = new Path(dir, "testbinary.seq");
            Random r    = new Random();
            long   seed = r.NextLong();

            r.SetSeed(seed);
            fs.Delete(dir, true);
            if (!fs.Mkdirs(dir))
            {
                Fail("Failed to create output directory");
            }
            job.Set(JobContext.TaskAttemptId, attempt);
            FileOutputFormat.SetOutputPath(job, dir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, dir);
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable
                                                                                       ));
            SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable
                                                                                         ));
            SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true);
            SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType
                                                                      .Block);
            BytesWritable bkey = new BytesWritable();
            BytesWritable bval = new BytesWritable();
            RecordWriter <BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat
                                                                     ().GetRecordWriter(fs, job, file.ToString(), Reporter.Null);
            IntWritable      iwritable = new IntWritable();
            DoubleWritable   dwritable = new DoubleWritable();
            DataOutputBuffer outbuf    = new DataOutputBuffer();

            Log.Info("Creating data by SequenceFileAsBinaryOutputFormat");
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    iwritable = new IntWritable(r.Next());
                    iwritable.Write(outbuf);
                    bkey.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    dwritable = new DoubleWritable(r.NextDouble());
                    dwritable.Write(outbuf);
                    bval.Set(outbuf.GetData(), 0, outbuf.GetLength());
                    outbuf.Reset();
                    writer.Write(bkey, bval);
                }
            }
            finally
            {
                writer.Close(Reporter.Null);
            }
            InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable
                                                                                             , DoubleWritable>();
            int count = 0;

            r.SetSeed(seed);
            DataInputBuffer buf       = new DataInputBuffer();
            int             NumSplits = 3;

            SequenceFileInputFormat.AddInputPath(job, file);
            Log.Info("Reading data by SequenceFileInputFormat");
            foreach (InputSplit split in iformat.GetSplits(job, NumSplits))
            {
                RecordReader <IntWritable, DoubleWritable> reader = iformat.GetRecordReader(split,
                                                                                            job, Reporter.Null);
                try
                {
                    int    sourceInt;
                    double sourceDouble;
                    while (reader.Next(iwritable, dwritable))
                    {
                        sourceInt    = r.Next();
                        sourceDouble = r.NextDouble();
                        NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":"
                                                        + sourceInt + "*", sourceInt, iwritable.Get());
                        NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":"
                                                      + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0);
                        ++count;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            NUnit.Framework.Assert.AreEqual("Some records not found", Records, count);
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf job = new JobConf();

            job.Set(JobContext.TaskAttemptId, attempt);
            FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, workDir);
            FileSystem fs = workDir.GetFileSystem(job);

            if (!fs.Mkdirs(workDir))
            {
                Fail("Failed to create output directory");
            }
            //System.out.printf("workdir: %s\n", workDir.toString());
            TestMultipleTextOutputFormat.Test1(job);
            TestMultipleTextOutputFormat.Test2(job);
            string   file_11         = "1-part-00000";
            FilePath expectedFile_11 = new FilePath(new Path(workDir, file_11).ToString());
            //System.out.printf("expectedFile_11: %s\n", new Path(workDir, file_11).toString());
            StringBuilder expectedOutput = new StringBuilder();

            for (int i = 10; i < 20; i++)
            {
                expectedOutput.Append(string.Empty + i).Append('\t').Append(string.Empty + i).Append
                    ("\n");
            }
            string output = UtilsForTests.Slurp(expectedFile_11);

            //System.out.printf("File_2 output: %s\n", output);
            NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString());
            string   file_12         = "2-part-00000";
            FilePath expectedFile_12 = new FilePath(new Path(workDir, file_12).ToString());

            //System.out.printf("expectedFile_12: %s\n", new Path(workDir, file_12).toString());
            expectedOutput = new StringBuilder();
            for (int i_1 = 20; i_1 < 30; i_1++)
            {
                expectedOutput.Append(string.Empty + i_1).Append('\t').Append(string.Empty + i_1)
                .Append("\n");
            }
            output = UtilsForTests.Slurp(expectedFile_12);
            //System.out.printf("File_2 output: %s\n", output);
            NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString());
            string   file_13         = "3-part-00000";
            FilePath expectedFile_13 = new FilePath(new Path(workDir, file_13).ToString());

            //System.out.printf("expectedFile_13: %s\n", new Path(workDir, file_13).toString());
            expectedOutput = new StringBuilder();
            for (int i_2 = 30; i_2 < 40; i_2++)
            {
                expectedOutput.Append(string.Empty + i_2).Append('\t').Append(string.Empty + i_2)
                .Append("\n");
            }
            output = UtilsForTests.Slurp(expectedFile_13);
            //System.out.printf("File_2 output: %s\n", output);
            NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString());
            string   file_2         = "2/3";
            FilePath expectedFile_2 = new FilePath(new Path(workDir, file_2).ToString());

            //System.out.printf("expectedFile_2: %s\n", new Path(workDir, file_2).toString());
            expectedOutput = new StringBuilder();
            for (int i_3 = 10; i_3 < 40; i_3++)
            {
                expectedOutput.Append(string.Empty + i_3).Append('\t').Append(string.Empty + i_3)
                .Append("\n");
            }
            output = UtilsForTests.Slurp(expectedFile_2);
            //System.out.printf("File_2 output: %s\n", output);
            NUnit.Framework.Assert.AreEqual(output, expectedOutput.ToString());
        }
Esempio n. 7
0
        public virtual void TestCompress()
        {
            JobConf job = new JobConf();

            job.Set(JobContext.TaskAttemptId, attempt);
            job.Set(FileOutputFormat.Compress, "true");
            FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, workDir);
            FileSystem fs = workDir.GetFileSystem(job);

            if (!fs.Mkdirs(workDir))
            {
                NUnit.Framework.Assert.Fail("Failed to create output directory");
            }
            string file = "test_compress.txt";
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object
                                                                                      >();
            RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs
                                                                                            , job, file, reporter);

            Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1");
            Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2");
            Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1");
            Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2");
            NullWritable nullWritable      = NullWritable.Get();

            try
            {
                theRecordWriter.Write(key1, val1);
                theRecordWriter.Write(null, nullWritable);
                theRecordWriter.Write(null, val1);
                theRecordWriter.Write(nullWritable, val2);
                theRecordWriter.Write(key2, nullWritable);
                theRecordWriter.Write(key1, null);
                theRecordWriter.Write(null, null);
                theRecordWriter.Write(key2, val2);
            }
            finally
            {
                theRecordWriter.Close(reporter);
            }
            StringBuilder expectedOutput = new StringBuilder();

            expectedOutput.Append(key1).Append("\t").Append(val1).Append("\n");
            expectedOutput.Append(val1).Append("\n");
            expectedOutput.Append(val2).Append("\n");
            expectedOutput.Append(key2).Append("\n");
            expectedOutput.Append(key1).Append("\n");
            expectedOutput.Append(key2).Append("\t").Append(val2).Append("\n");
            DefaultCodec codec = new DefaultCodec();

            codec.SetConf(job);
            Path                   expectedFile = new Path(workDir, file + codec.GetDefaultExtension());
            FileInputStream        istream      = new FileInputStream(expectedFile.ToString());
            CompressionInputStream cistream     = codec.CreateInputStream(istream);
            LineReader             reader       = new LineReader(cistream);
            string                 output       = string.Empty;

            Org.Apache.Hadoop.IO.Text @out = new Org.Apache.Hadoop.IO.Text();
            while (reader.ReadLine(@out) > 0)
            {
                output += @out;
                output += "\n";
            }
            reader.Close();
            NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output);
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestFailAbort()
        {
            JobConf job = new JobConf();

            job.Set(FileSystem.FsDefaultNameKey, "faildel:///");
            job.SetClass("fs.faildel.impl", typeof(TestMRCJCFileOutputCommitter.FakeFileSystem
                                                   ), typeof(FileSystem));
            SetConfForFileOutputCommitter(job);
            JobContext          jContext  = new JobContextImpl(job, ((JobID)taskID.GetJobID()));
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(job, taskID);
            FileOutputCommitter committer = new FileOutputCommitter();

            FileOutputFormat.SetWorkOutputPath(job, committer.GetTaskAttemptPath(tContext));
            // do setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            string   file      = "test.txt";
            FilePath jobTmpDir = new FilePath(committer.GetJobAttemptPath(jContext).ToUri().GetPath
                                                  ());
            FilePath taskTmpDir = new FilePath(committer.GetTaskAttemptPath(tContext).ToUri()
                                               .GetPath());
            FilePath expectedFile = new FilePath(taskTmpDir, file);
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            // write output
            FileSystem       localFs         = new TestMRCJCFileOutputCommitter.FakeFileSystem();
            TextOutputFormat theOutputFormat = new TextOutputFormat();
            RecordWriter     theRecordWriter = theOutputFormat.GetRecordWriter(localFs, job, expectedFile
                                                                               .GetAbsolutePath(), reporter);

            WriteOutput(theRecordWriter, reporter);
            // do abort
            Exception th = null;

            try
            {
                committer.AbortTask(tContext);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists
                                              ());
            th = null;
            try
            {
                committer.AbortJob(jContext, JobStatus.State.Failed);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists());
        }