Ejemplo n.º 1
0
 /// <exception cref="System.IO.IOException"/>
 public static void VerifyCounters(RunningJob runningJob, int expected)
 {
     NUnit.Framework.Assert.AreEqual(expected, runningJob.GetCounters().GetCounter(TestUserDefinedCounters.EnumCounter
                                                                                   .MapRecords));
     NUnit.Framework.Assert.AreEqual(expected, runningJob.GetCounters().GetGroup("StringCounter"
                                                                                 ).GetCounter("MapRecords"));
 }
Ejemplo n.º 2
0
        public virtual void TestOldCounterC()
        {
            JobConf conf = CreateConfiguration();

            CreateWordsFile(inFiles[3], conf);
            CreateWordsFile(inFiles[4], conf);
            long inputSize = 0;

            inputSize += GetFileSize(inFiles[0]);
            inputSize += GetFileSize(inFiles[1]);
            inputSize += GetFileSize(inFiles[2]);
            inputSize += GetFileSize(inFiles[3]);
            inputSize += GetFileSize(inFiles[4]);
            conf.SetNumMapTasks(4);
            conf.SetInt(JobContext.IoSortFactor, 3);
            FileInputFormat.SetInputPaths(conf, InDir);
            FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO2"));
            RunningJob myJob = JobClient.RunJob(conf);
            Counters   c1    = myJob.GetCounters();

            // As above, each map spills 2^14 records, so 5 maps spill 81920
            // 1st merge: read + write = 6 * 8192
            // final merge: unmerged = 2 * 8192
            // Total reduce: 45056
            // 5 files, 5120 = 5 * 1024 rec/file = 15360 input records
            // 4 records/line = 102400 output records
            ValidateCounters(c1, 122880, 25600, 102400);
            ValidateFileCounters(c1, inputSize, 0, 0, 0);
        }
Ejemplo n.º 3
0
        public virtual void TestOldCounterB()
        {
            JobConf conf = CreateConfiguration();

            CreateWordsFile(inFiles[3], conf);
            RemoveWordsFile(inFiles[4], conf);
            long inputSize = 0;

            inputSize += GetFileSize(inFiles[0]);
            inputSize += GetFileSize(inFiles[1]);
            inputSize += GetFileSize(inFiles[2]);
            inputSize += GetFileSize(inFiles[3]);
            conf.SetNumMapTasks(4);
            conf.SetInt(JobContext.IoSortFactor, 2);
            FileInputFormat.SetInputPaths(conf, InDir);
            FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO1"));
            RunningJob myJob = JobClient.RunJob(conf);
            Counters   c1    = myJob.GetCounters();

            // As above, each map spills 2^14 records, so 4 maps spill 2^16 records
            // In the reduce, there are two intermediate merges before the reduce.
            // 1st merge: read + write = 8192 * 4
            // 2nd merge: read + write = 8192 * 4
            // final merge: 0
            // Total reduce: 32768
            // Total: map + reduce = 2^16 + 2^15 = 98304
            // 4 files, 5120 = 5 * 1024 rec/file = 15360 input records
            // 4 records/line = 81920 output records
            ValidateCounters(c1, 98304, 20480, 81920);
            ValidateFileCounters(c1, inputSize, 0, 0, 0);
        }
Ejemplo n.º 4
0
        /// <exception cref="System.Exception"/>
        public static Counters RunJob(JobConf conf)
        {
            conf.SetMapperClass(typeof(TestReduceFetchFromPartialMem.MapMB));
            conf.SetReducerClass(typeof(TestReduceFetchFromPartialMem.MBValidate));
            conf.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            conf.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            conf.SetNumReduceTasks(1);
            conf.SetInputFormat(typeof(TestReduceFetchFromPartialMem.FakeIF));
            conf.SetNumTasksToExecutePerJvm(1);
            conf.SetInt(JobContext.MapMaxAttempts, 0);
            conf.SetInt(JobContext.ReduceMaxAttempts, 0);
            FileInputFormat.SetInputPaths(conf, new Path("/in"));
            Path outp = new Path("/out");

            FileOutputFormat.SetOutputPath(conf, outp);
            RunningJob job = null;

            try
            {
                job = JobClient.RunJob(conf);
                NUnit.Framework.Assert.IsTrue(job.IsSuccessful());
            }
            finally
            {
                FileSystem fs = dfsCluster.GetFileSystem();
                if (fs.Exists(outp))
                {
                    fs.Delete(outp, true);
                }
            }
            return(job.GetCounters());
        }
Ejemplo n.º 5
0
        // run a job which gets stuck in mapper and kill it.
        /// <exception cref="System.IO.IOException"/>
        private void TestKilledJob(string fileName, Type committer, string[] exclude)
        {
            JobConf jc     = mr.CreateJobConf();
            Path    outDir = GetNewOutputDir();

            ConfigureJob(jc, "kill job with abort()", 1, 0, outDir);
            // set the job to wait for long
            jc.SetMapperClass(typeof(UtilsForTests.KillMapper));
            jc.SetOutputCommitter(committer);
            JobClient  jobClient = new JobClient(jc);
            RunningJob job       = jobClient.SubmitJob(jc);
            JobID      id        = job.GetID();
            Counters   counters  = job.GetCounters();

            // wait for the map to be launched
            while (true)
            {
                if (counters.GetCounter(JobCounter.TotalLaunchedMaps) == 1)
                {
                    break;
                }
                Log.Info("Waiting for a map task to be launched");
                UtilsForTests.WaitFor(100);
                counters = job.GetCounters();
            }
            job.KillJob();
            // kill the job
            job.WaitForCompletion();
            // wait for the job to complete
            NUnit.Framework.Assert.AreEqual("Job was not killed", JobStatus.Killed, job.GetJobState
                                                ());
            if (fileName != null)
            {
                Path testFile = new Path(outDir, fileName);
                NUnit.Framework.Assert.IsTrue("File " + testFile + " missing for job " + id, fileSys
                                              .Exists(testFile));
            }
            // check if the files from the missing set exists
            foreach (string ex in exclude)
            {
                Path file = new Path(outDir, ex);
                NUnit.Framework.Assert.IsFalse("File " + file + " should not be present for killed job "
                                               + id, fileSys.Exists(file));
            }
        }
Ejemplo n.º 6
0
        public virtual void TestOldCounterA()
        {
            JobConf conf = CreateConfiguration();

            conf.SetNumMapTasks(3);
            conf.SetInt(JobContext.IoSortFactor, 2);
            RemoveWordsFile(inFiles[3], conf);
            RemoveWordsFile(inFiles[4], conf);
            long inputSize = 0;

            inputSize += GetFileSize(inFiles[0]);
            inputSize += GetFileSize(inFiles[1]);
            inputSize += GetFileSize(inFiles[2]);
            FileInputFormat.SetInputPaths(conf, InDir);
            FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO0"));
            RunningJob myJob = JobClient.RunJob(conf);
            Counters   c1    = myJob.GetCounters();

            // Each record requires 16 bytes of metadata, 16 bytes per serialized rec
            // (vint word len + word + IntWritable) = (1 + 11 + 4)
            // (2^20 buf * .5 spill pcnt) / 32 bytes/record = 2^14 recs per spill
            // Each file contains 5 replicas of 4096 words, so the first spill will
            // contain 4 (2^14 rec / 2^12 rec/replica) replicas, the second just one.
            // Each map spills twice, emitting 4096 records per spill from the
            // combiner per spill. The merge adds an additional 8192 records, as
            // there are too few spills to combine (2 < 3)
            // Each map spills 2^14 records, so maps spill 49152 records, combined.
            // The combiner has emitted 24576 records to the reducer; these are all
            // fetched straight to memory from the map side. The intermediate merge
            // adds 8192 records per segment read; again, there are too few spills to
            // combine, so all Total spilled records in the reduce
            // is 8192 records / map * 3 maps = 24576.
            // Total: map + reduce = 49152 + 24576 = 73728
            // 3 files, 5120 = 5 * 1024 rec/file = 15360 input records
            // 4 records/line = 61440 output records
            ValidateCounters(c1, 73728, 15360, 61440);
            ValidateFileCounters(c1, inputSize, 0, 0, 0);
            ValidateOldFileCounters(c1, inputSize, 61928, 0, 0);
        }
        /// <summary>Launches a MR job and tests the job counters against the expected values.
        ///     </summary>
        /// <param name="testName">The name for the job</param>
        /// <param name="mr">The MR cluster</param>
        /// <param name="fileSys">The FileSystem</param>
        /// <param name="in">Input path</param>
        /// <param name="out">Output path</param>
        /// <param name="numMaps">Number of maps</param>
        /// <param name="otherLocalMaps">Expected value of other local maps</param>
        /// <param name="datalocalMaps">Expected value of data(node) local maps</param>
        /// <param name="racklocalMaps">Expected value of rack local maps</param>
        /// <exception cref="System.IO.IOException"/>
        internal static void LaunchJobAndTestCounters(string jobName, MiniMRCluster mr, FileSystem
                                                      fileSys, Path @in, Path @out, int numMaps, int otherLocalMaps, int dataLocalMaps
                                                      , int rackLocalMaps)
        {
            JobConf jobConf = mr.CreateJobConf();

            if (fileSys.Exists(@out))
            {
                fileSys.Delete(@out, true);
            }
            RunningJob job      = LaunchJob(jobConf, @in, @out, numMaps, jobName);
            Counters   counters = job.GetCounters();

            NUnit.Framework.Assert.AreEqual("Number of local maps", counters.GetCounter(JobCounter
                                                                                        .OtherLocalMaps), otherLocalMaps);
            NUnit.Framework.Assert.AreEqual("Number of Data-local maps", counters.GetCounter(
                                                JobCounter.DataLocalMaps), dataLocalMaps);
            NUnit.Framework.Assert.AreEqual("Number of Rack-local maps", counters.GetCounter(
                                                JobCounter.RackLocalMaps), rackLocalMaps);
            mr.WaitUntilIdle();
            mr.Shutdown();
        }
Ejemplo n.º 8
0
        public virtual void TestOldCounterD()
        {
            JobConf conf = CreateConfiguration();

            conf.SetNumMapTasks(3);
            conf.SetInt(JobContext.IoSortFactor, 2);
            conf.SetNumReduceTasks(0);
            RemoveWordsFile(inFiles[3], conf);
            RemoveWordsFile(inFiles[4], conf);
            long inputSize = 0;

            inputSize += GetFileSize(inFiles[0]);
            inputSize += GetFileSize(inFiles[1]);
            inputSize += GetFileSize(inFiles[2]);
            FileInputFormat.SetInputPaths(conf, InDir);
            FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO3"));
            RunningJob myJob = JobClient.RunJob(conf);
            Counters   c1    = myJob.GetCounters();

            // No Reduces. Will go through the direct output collector. Spills=0
            ValidateCounters(c1, 0, 15360, 61440);
            ValidateFileCounters(c1, inputSize, 0, -1, -1);
        }
Ejemplo n.º 9
0
        /// <exception cref="System.IO.IOException"/>
        internal static void RunProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program
                                        , Path inputPath, Path outputPath, int numMaps, int numReduces, string[] expectedResults
                                        , JobConf conf)
        {
            Path    wordExec = new Path("testing/bin/application");
            JobConf job      = null;

            if (conf == null)
            {
                job = mr.CreateJobConf();
            }
            else
            {
                job = new JobConf(conf);
            }
            job.SetNumMapTasks(numMaps);
            job.SetNumReduceTasks(numReduces);
            {
                FileSystem fs = dfs.GetFileSystem();
                fs.Delete(wordExec.GetParent(), true);
                fs.CopyFromLocalFile(program, wordExec);
                Submitter.SetExecutable(job, fs.MakeQualified(wordExec).ToString());
                Submitter.SetIsJavaRecordReader(job, true);
                Submitter.SetIsJavaRecordWriter(job, true);
                FileInputFormat.SetInputPaths(job, inputPath);
                FileOutputFormat.SetOutputPath(job, outputPath);
                RunningJob rJob = null;
                if (numReduces == 0)
                {
                    rJob = Submitter.JobSubmit(job);
                    while (!rJob.IsComplete())
                    {
                        try
                        {
                            Sharpen.Thread.Sleep(1000);
                        }
                        catch (Exception ie)
                        {
                            throw new RuntimeException(ie);
                        }
                    }
                }
                else
                {
                    rJob = Submitter.RunJob(job);
                }
                NUnit.Framework.Assert.IsTrue("pipes job failed", rJob.IsSuccessful());
                Counters       counters          = rJob.GetCounters();
                Counters.Group wordCountCounters = counters.GetGroup("WORDCOUNT");
                int            numCounters       = 0;
                foreach (Counters.Counter c in wordCountCounters)
                {
                    System.Console.Out.WriteLine(c);
                    ++numCounters;
                }
                NUnit.Framework.Assert.IsTrue("No counters found!", (numCounters > 0));
            }
            IList <string> results = new AList <string>();

            foreach (Path p in FileUtil.Stat2Paths(dfs.GetFileSystem().ListStatus(outputPath,
                                                                                  new Utils.OutputFileUtils.OutputFilesFilter())))
            {
                results.AddItem(MapReduceTestUtil.ReadOutput(p, job));
            }
            NUnit.Framework.Assert.AreEqual("number of reduces is wrong", expectedResults.Length
                                            , results.Count);
            for (int i = 0; i < results.Count; i++)
            {
                NUnit.Framework.Assert.AreEqual("pipes program " + program + " output " + i + " wrong"
                                                , expectedResults[i], results[i]);
            }
        }
Ejemplo n.º 10
0
        /// <exception cref="System.Exception"/>
        private void ValidateOutput(JobConf conf, RunningJob runningJob, IList <string> mapperBadRecords
                                    , IList <string> redBadRecords)
        {
            Log.Info(runningJob.GetCounters().ToString());
            NUnit.Framework.Assert.IsTrue(runningJob.IsSuccessful());
            //validate counters
            Counters counters = runningJob.GetCounters();

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapSkippedRecords
                                                                 ).GetCounter(), mapperBadRecords.Count);
            int mapRecs = input.Count - mapperBadRecords.Count;

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapInputRecords)
                                            .GetCounter(), mapRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapOutputRecords
                                                                 ).GetCounter(), mapRecs);
            int redRecs = mapRecs - redBadRecords.Count;

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedRecords
                                                                 ).GetCounter(), redBadRecords.Count);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedGroups
                                                                 ).GetCounter(), redBadRecords.Count);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputGroups
                                                                 ).GetCounter(), redRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputRecords
                                                                 ).GetCounter(), redRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceOutputRecords
                                                                 ).GetCounter(), redRecs);
            //validate skipped records
            Path skipDir = SkipBadRecords.GetSkipOutputPath(conf);

            NUnit.Framework.Assert.IsNotNull(skipDir);
            Path[]         skips      = FileUtil.Stat2Paths(GetFileSystem().ListStatus(skipDir));
            IList <string> mapSkipped = new AList <string>();
            IList <string> redSkipped = new AList <string>();

            foreach (Path skipPath in skips)
            {
                Log.Info("skipPath: " + skipPath);
                SequenceFile.Reader reader = new SequenceFile.Reader(GetFileSystem(), skipPath, conf
                                                                     );
                object key   = ReflectionUtils.NewInstance(reader.GetKeyClass(), conf);
                object value = ReflectionUtils.NewInstance(reader.GetValueClass(), conf);
                key = reader.Next(key);
                while (key != null)
                {
                    value = reader.GetCurrentValue(value);
                    Log.Debug("key:" + key + " value:" + value.ToString());
                    if (skipPath.GetName().Contains("_r_"))
                    {
                        redSkipped.AddItem(value.ToString());
                    }
                    else
                    {
                        mapSkipped.AddItem(value.ToString());
                    }
                    key = reader.Next(key);
                }
                reader.Close();
            }
            NUnit.Framework.Assert.IsTrue(mapSkipped.ContainsAll(mapperBadRecords));
            NUnit.Framework.Assert.IsTrue(redSkipped.ContainsAll(redBadRecords));
            Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(GetOutputDir(
                                                                                    ), new Utils.OutputFileUtils.OutputFilesFilter()));
            IList <string> mapperOutput = GetProcessed(input, mapperBadRecords);

            Log.Debug("mapperOutput " + mapperOutput.Count);
            IList <string> reducerOutput = GetProcessed(mapperOutput, redBadRecords);

            Log.Debug("reducerOutput " + reducerOutput.Count);
            if (outputFiles.Length > 0)
            {
                InputStream    @is     = GetFileSystem().Open(outputFiles[0]);
                BufferedReader reader  = new BufferedReader(new InputStreamReader(@is));
                string         line    = reader.ReadLine();
                int            counter = 0;
                while (line != null)
                {
                    counter++;
                    StringTokenizer tokeniz = new StringTokenizer(line, "\t");
                    string          key     = tokeniz.NextToken();
                    string          value   = tokeniz.NextToken();
                    Log.Debug("Output: key:" + key + "  value:" + value);
                    NUnit.Framework.Assert.IsTrue(value.Contains("hello"));
                    NUnit.Framework.Assert.IsTrue(reducerOutput.Contains(value));
                    line = reader.ReadLine();
                }
                reader.Close();
                NUnit.Framework.Assert.AreEqual(reducerOutput.Count, counter);
            }
        }
Ejemplo n.º 11
0
        public virtual void TestCombiner()
        {
            if (!new FilePath(TestRootDir).Mkdirs())
            {
                throw new RuntimeException("Could not create test dir: " + TestRootDir);
            }
            FilePath @in = new FilePath(TestRootDir, "input");

            if ([email protected]())
            {
                throw new RuntimeException("Could not create test dir: " + @in);
            }
            FilePath    @out = new FilePath(TestRootDir, "output");
            PrintWriter pw   = new PrintWriter(new FileWriter(new FilePath(@in, "data.txt")));

            pw.WriteLine("A|a,1");
            pw.WriteLine("A|b,2");
            pw.WriteLine("B|a,3");
            pw.WriteLine("B|b,4");
            pw.WriteLine("B|c,5");
            pw.Close();
            JobConf job = new JobConf();

            job.Set("mapreduce.framework.name", "local");
            TextInputFormat.SetInputPaths(job, new Path(@in.GetPath()));
            TextOutputFormat.SetOutputPath(job, new Path(@out.GetPath()));
            job.SetMapperClass(typeof(TestOldCombinerGrouping.Map));
            job.SetReducerClass(typeof(TestOldCombinerGrouping.Reduce));
            job.SetInputFormat(typeof(TextInputFormat));
            job.SetMapOutputKeyClass(typeof(Text));
            job.SetMapOutputValueClass(typeof(LongWritable));
            job.SetOutputFormat(typeof(TextOutputFormat));
            job.SetOutputValueGroupingComparator(typeof(TestOldCombinerGrouping.GroupComparator
                                                        ));
            job.SetCombinerClass(typeof(TestOldCombinerGrouping.Combiner));
            job.SetCombinerKeyGroupingComparator(typeof(TestOldCombinerGrouping.GroupComparator
                                                        ));
            job.SetInt("min.num.spills.for.combine", 0);
            JobClient  client     = new JobClient(job);
            RunningJob runningJob = client.SubmitJob(job);

            runningJob.WaitForCompletion();
            if (runningJob.IsSuccessful())
            {
                Counters counters             = runningJob.GetCounters();
                long     combinerInputRecords = counters.GetGroup("org.apache.hadoop.mapreduce.TaskCounter"
                                                                  ).GetCounter("COMBINE_INPUT_RECORDS");
                long combinerOutputRecords = counters.GetGroup("org.apache.hadoop.mapreduce.TaskCounter"
                                                               ).GetCounter("COMBINE_OUTPUT_RECORDS");
                NUnit.Framework.Assert.IsTrue(combinerInputRecords > 0);
                NUnit.Framework.Assert.IsTrue(combinerInputRecords > combinerOutputRecords);
                BufferedReader br = new BufferedReader(new FileReader(new FilePath(@out, "part-00000"
                                                                                   )));
                ICollection <string> output = new HashSet <string>();
                string line = br.ReadLine();
                NUnit.Framework.Assert.IsNotNull(line);
                output.AddItem(Sharpen.Runtime.Substring(line, 0, 1) + Sharpen.Runtime.Substring(
                                   line, 4, 5));
                line = br.ReadLine();
                NUnit.Framework.Assert.IsNotNull(line);
                output.AddItem(Sharpen.Runtime.Substring(line, 0, 1) + Sharpen.Runtime.Substring(
                                   line, 4, 5));
                line = br.ReadLine();
                NUnit.Framework.Assert.IsNull(line);
                br.Close();
                ICollection <string> expected = new HashSet <string>();
                expected.AddItem("A2");
                expected.AddItem("B5");
                NUnit.Framework.Assert.AreEqual(expected, output);
            }
            else
            {
                NUnit.Framework.Assert.Fail("Job failed");
            }
        }
Ejemplo n.º 12
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMOWithJavaSerialization(bool withCounters)
        {
            Path             inDir  = GetDir(InDir);
            Path             outDir = GetDir(OutDir);
            JobConf          conf   = CreateJobConf();
            FileSystem       fs     = FileSystem.Get(conf);
            DataOutputStream file   = fs.Create(new Path(inDir, "part-0"));

            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            fs.Delete(inDir, true);
            fs.Delete(outDir, true);
            file = fs.Create(new Path(inDir, "part-1"));
            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            conf.SetJobName("mo");
            conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                     + "org.apache.hadoop.io.serializer.WritableSerialization");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetMapOutputKeyClass(typeof(long));
            conf.SetMapOutputValueClass(typeof(string));
            conf.SetOutputKeyComparatorClass(typeof(JavaSerializationComparator));
            conf.SetOutputKeyClass(typeof(long));
            conf.SetOutputValueClass(typeof(string));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(long
                                                                                          ), typeof(string));
            MultipleOutputs.SetCountersEnabled(conf, withCounters);
            conf.SetMapperClass(typeof(TestMultipleOutputs.MOJavaSerDeMap));
            conf.SetReducerClass(typeof(TestMultipleOutputs.MOJavaSerDeReduce));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobClient  jc  = new JobClient(conf);
            RunningJob job = jc.SubmitJob(conf);

            while (!job.IsComplete())
            {
                Sharpen.Thread.Sleep(100);
            }
            // assert number of named output part files
            int namedOutputCount = 0;

            FileStatus[] statuses = fs.ListStatus(outDir);
            foreach (FileStatus status in statuses)
            {
                if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName
                        ().Equals("text-r-00000"))
                {
                    namedOutputCount++;
                }
            }
            NUnit.Framework.Assert.AreEqual(2, namedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(conf), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith("text"));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                 );
            if (!withCounters)
            {
                NUnit.Framework.Assert.AreEqual(0, counters.Size());
            }
            else
            {
                NUnit.Framework.Assert.AreEqual(1, counters.Size());
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("text"));
            }
        }
Ejemplo n.º 13
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMultipleOutputs(bool withCounters)
        {
            Path             inDir  = GetDir(InDir);
            Path             outDir = GetDir(OutDir);
            JobConf          conf   = CreateJobConf();
            FileSystem       fs     = FileSystem.Get(conf);
            DataOutputStream file   = fs.Create(new Path(inDir, "part-0"));

            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            file = fs.Create(new Path(inDir, "part-1"));
            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            conf.SetJobName("mo");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(LongWritable
                                                                                          ), typeof(Text));
            MultipleOutputs.AddMultiNamedOutput(conf, "sequence", typeof(SequenceFileOutputFormat
                                                                         ), typeof(LongWritable), typeof(Text));
            MultipleOutputs.SetCountersEnabled(conf, withCounters);
            conf.SetMapperClass(typeof(TestMultipleOutputs.MOMap));
            conf.SetReducerClass(typeof(TestMultipleOutputs.MOReduce));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobClient  jc  = new JobClient(conf);
            RunningJob job = jc.SubmitJob(conf);

            while (!job.IsComplete())
            {
                Sharpen.Thread.Sleep(100);
            }
            // assert number of named output part files
            int namedOutputCount = 0;

            FileStatus[] statuses = fs.ListStatus(outDir);
            foreach (FileStatus status in statuses)
            {
                if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName
                        ().Equals("text-m-00001") || status.GetPath().GetName().Equals("text-r-00000") ||
                    status.GetPath().GetName().Equals("sequence_A-m-00000") || status.GetPath().GetName
                        ().Equals("sequence_A-m-00001") || status.GetPath().GetName().Equals("sequence_B-m-00000"
                                                                                             ) || status.GetPath().GetName().Equals("sequence_B-m-00001") || status.GetPath()
                    .GetName().Equals("sequence_B-r-00000") || status.GetPath().GetName().Equals("sequence_C-r-00000"
                                                                                                 ))
                {
                    namedOutputCount++;
                }
            }
            NUnit.Framework.Assert.AreEqual(9, namedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(conf), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith("text"));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            // assert SequenceOutputFormat files correctness
            SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat
                                                                                 .GetOutputPath(conf), "sequence_B-r-00000"), conf);
            NUnit.Framework.Assert.AreEqual(typeof(LongWritable), seqReader.GetKeyClass());
            NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass());
            count = 0;
            LongWritable key   = new LongWritable();
            Text         value = new Text();

            while (seqReader.Next(key, value))
            {
                NUnit.Framework.Assert.AreEqual("sequence", value.ToString());
                count++;
            }
            seqReader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                 );
            if (!withCounters)
            {
                NUnit.Framework.Assert.AreEqual(0, counters.Size());
            }
            else
            {
                NUnit.Framework.Assert.AreEqual(4, counters.Size());
                NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("text"));
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_A"));
                NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("sequence_B"));
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_C"));
            }
        }