Beispiel #1
0
        /// <exception cref="System.Exception"/>
        public static void Launch()
        {
            Configuration conf            = new Configuration();
            FileSystem    fs              = FileSystem.Get(conf);
            int           numOfInputLines = 10;
            Path          outDir          = new Path(testDir, "output_for_field_selection_test");
            Path          inDir           = new Path(testDir, "input_for_field_selection_test");
            StringBuilder inputData       = new StringBuilder();
            StringBuilder expectedOutput  = new StringBuilder();

            ConstructInputOutputData(inputData, expectedOutput, numOfInputLines);
            conf.Set(FieldSelectionHelper.DataFieldSeperator, "-");
            conf.Set(FieldSelectionHelper.MapOutputKeyValueSpec, "6,5,1-3:0-");
            conf.Set(FieldSelectionHelper.ReduceOutputKeyValueSpec, ":4,3,2,1,0,0-");
            Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, inputData.ToString
                                                      ());

            job.SetMapperClass(typeof(FieldSelectionMapper));
            job.SetReducerClass(typeof(FieldSelectionReducer));
            job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            job.SetNumReduceTasks(1);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue("Job Failed!", job.IsSuccessful());
            //
            // Finally, we compare the reconstructed answer key with the
            // original one.  Remember, we need to ignore zero-count items
            // in the original key.
            //
            string outdata = MapReduceTestUtil.ReadOutput(outDir, conf);

            NUnit.Framework.Assert.AreEqual("Outputs doesnt match.", expectedOutput.ToString(
                                                ), outdata);
            fs.Delete(outDir, true);
        }
        /// <exception cref="System.Exception"/>
        private void Run(bool ioEx, bool rtEx)
        {
            string        localPathRoot = Runtime.GetProperty("test.build.data", "/tmp");
            Path          inDir         = new Path(localPathRoot, "testing/mt/input");
            Path          outDir        = new Path(localPathRoot, "testing/mt/output");
            Configuration conf          = CreateJobConf();

            if (ioEx)
            {
                conf.SetBoolean("multithreaded.ioException", true);
            }
            if (rtEx)
            {
                conf.SetBoolean("multithreaded.runtimeException", true);
            }
            Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1);

            job.SetJobName("mt");
            job.SetMapperClass(typeof(MultithreadedMapper));
            MultithreadedMapper.SetMapperClass(job, typeof(TestMultithreadedMapper.IDMap));
            MultithreadedMapper.SetNumberOfThreads(job, 2);
            job.SetReducerClass(typeof(Reducer));
            job.WaitForCompletion(true);
            if (job.IsSuccessful())
            {
                NUnit.Framework.Assert.IsFalse(ioEx || rtEx);
            }
            else
            {
                NUnit.Framework.Assert.IsTrue(ioEx || rtEx);
            }
        }
        /// <exception cref="System.Exception"/>
        private void TestComparator(string keySpec, int expect)
        {
            string root   = Runtime.GetProperty("test.build.data", "/tmp");
            Path   inDir  = new Path(root, "test_cmp/in");
            Path   outDir = new Path(root, "test_cmp/out");

            conf.Set("mapreduce.partition.keycomparator.options", keySpec);
            conf.Set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
            conf.Set(MRJobConfig.MapOutputKeyFieldSeperator, " ");
            Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, line1 + "\n" + line2
                                                  + "\n");

            job.SetMapperClass(typeof(InverseMapper));
            job.SetReducerClass(typeof(Reducer));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(LongWritable));
            job.SetSortComparatorClass(typeof(KeyFieldBasedComparator));
            job.SetPartitionerClass(typeof(KeyFieldBasedPartitioner));
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue(job.IsSuccessful());
            // validate output
            Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter
                                                                                    ()));
            if (outputFiles.Length > 0)
            {
                InputStream    @is    = GetFileSystem().Open(outputFiles[0]);
                BufferedReader reader = new BufferedReader(new InputStreamReader(@is));
                string         line   = reader.ReadLine();
                //make sure we get what we expect as the first line, and also
                //that we have two lines (both the lines must end up in the same
                //reducer since the partitioner takes the same key spec for all
                //lines
                if (expect == 1)
                {
                    NUnit.Framework.Assert.IsTrue(line.StartsWith(line1));
                }
                else
                {
                    if (expect == 2)
                    {
                        NUnit.Framework.Assert.IsTrue(line.StartsWith(line2));
                    }
                }
                line = reader.ReadLine();
                if (expect == 1)
                {
                    NUnit.Framework.Assert.IsTrue(line.StartsWith(line2));
                }
                else
                {
                    if (expect == 2)
                    {
                        NUnit.Framework.Assert.IsTrue(line.StartsWith(line1));
                    }
                }
                reader.Close();
            }
        }
Beispiel #4
0
        /// <summary>Tests Reducer throwing exception.</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestReducerFail()
        {
            Configuration conf = CreateJobConf();
            Job           job  = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input);

            job.SetJobName("chain");
            ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof(
                                      LongWritable), typeof(Text), null);
            ChainReducer.SetReducer(job, typeof(TestChainErrors.FailReduce), typeof(LongWritable
                                                                                    ), typeof(Text), typeof(LongWritable), typeof(Text), null);
            ChainReducer.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof(
                                       LongWritable), typeof(Text), null);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue("Job Not failed", !job.IsSuccessful());
        }
Beispiel #5
0
        /// <summary>Tests one of the maps consuming output.</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestChainMapNoOuptut()
        {
            Configuration conf           = CreateJobConf();
            string        expectedOutput = string.Empty;
            Job           job            = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 0, input);

            job.SetJobName("chain");
            ChainMapper.AddMapper(job, typeof(TestChainErrors.ConsumeMap), typeof(IntWritable
                                                                                  ), typeof(Text), typeof(LongWritable), typeof(Text), null);
            ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof(
                                      LongWritable), typeof(Text), null);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful());
            NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil
                                            .ReadOutput(outDir, conf));
        }
        // test chain mapper and reducer by adding single mapper and reducer to chain
        /// <exception cref="System.Exception"/>
        public virtual void TestNoChain()
        {
            Path          inDir          = new Path(localPathRoot, "testing/chain/input");
            Path          outDir         = new Path(localPathRoot, "testing/chain/output");
            string        input          = "a\nb\na\n";
            string        expectedOutput = "a\t2\nb\t1\n";
            Configuration conf           = CreateJobConf();
            Job           job            = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input);

            job.SetJobName("chain");
            ChainMapper.AddMapper(job, typeof(TokenCounterMapper), typeof(object), typeof(Text
                                                                                          ), typeof(Text), typeof(IntWritable), null);
            ChainReducer.SetReducer(job, typeof(IntSumReducer), typeof(Text), typeof(IntWritable
                                                                                     ), typeof(Text), typeof(IntWritable), null);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful());
            NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil
                                            .ReadOutput(outDir, conf));
        }
        // run a job with 1 map and let it run to completion
        /// <exception cref="System.Exception"/>
        private void TestSuccessfulJob(string filename, Type output, string[] exclude)
        {
            Path outDir = GetNewOutputDir();
            Job  job    = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 0);

            job.SetOutputFormatClass(output);
            NUnit.Framework.Assert.IsTrue("Job failed!", job.WaitForCompletion(true));
            Path testFile = new Path(outDir, filename);

            NUnit.Framework.Assert.IsTrue("Done file missing for job " + job.GetJobID(), fs.Exists
                                              (testFile));
            // check if the files from the missing set exists
            foreach (string ex in exclude)
            {
                Path file = new Path(outDir, ex);
                NUnit.Framework.Assert.IsFalse("File " + file + " should not be present for successful job "
                                               + job.GetJobID(), fs.Exists(file));
            }
        }
Beispiel #8
0
        /// <summary>Tests errors during submission.</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestChainSubmission()
        {
            Configuration conf = CreateJobConf();
            Job           job  = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 0, 0, input);

            job.SetJobName("chain");
            Exception th = null;

            // output key,value classes of first map are not same as that of second map
            try
            {
                ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof(
                                          IntWritable), typeof(Text), null);
                ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof(
                                          LongWritable), typeof(Text), null);
            }
            catch (ArgumentException iae)
            {
                th = iae;
            }
            NUnit.Framework.Assert.IsTrue(th != null);
            th = null;
            // output key,value classes of reducer are not
            // same as that of mapper in the chain
            try
            {
                ChainReducer.SetReducer(job, typeof(Reducer), typeof(LongWritable), typeof(Text),
                                        typeof(IntWritable), typeof(Text), null);
                ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof(
                                          LongWritable), typeof(Text), null);
            }
            catch (ArgumentException iae)
            {
                th = iae;
            }
            NUnit.Framework.Assert.IsTrue(th != null);
        }
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMOWithJavaSerialization(bool withCounters)
        {
            string        input = "a\nb\nc\nd\ne\nc\nd\ne";
            Configuration conf  = CreateJobConf();

            conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                     + "org.apache.hadoop.io.serializer.WritableSerialization");
            Job job = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input);

            job.SetJobName("mo");
            MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(long),
                                           typeof(string));
            MultipleOutputs.SetCountersEnabled(job, withCounters);
            job.SetSortComparatorClass(typeof(JavaSerializationComparator));
            job.SetMapOutputKeyClass(typeof(long));
            job.SetMapOutputValueClass(typeof(string));
            job.SetOutputKeyClass(typeof(long));
            job.SetOutputValueClass(typeof(string));
            job.SetMapperClass(typeof(TestMRMultipleOutputs.MOJavaSerDeMap));
            job.SetReducerClass(typeof(TestMRMultipleOutputs.MOJavaSerDeReduce));
            job.WaitForCompletion(true);
            // assert number of named output part files
            int        namedOutputCount      = 0;
            int        valueBasedOutputCount = 0;
            FileSystem fs = OutDir.GetFileSystem(conf);

            FileStatus[] statuses = fs.ListStatus(OutDir);
            foreach (FileStatus status in statuses)
            {
                string fileName = status.GetPath().GetName();
                if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName
                    .Equals("text-r-00000"))
                {
                    namedOutputCount++;
                }
                else
                {
                    if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals
                            ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000"))
                    {
                        valueBasedOutputCount++;
                    }
                }
            }
            NUnit.Framework.Assert.AreEqual(3, namedOutputCount);
            NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(job), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith(Text));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            if (withCounters)
            {
                CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                   );
                NUnit.Framework.Assert.AreEqual(6, counters.Size());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue());
            }
        }
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMultipleOutputs(bool withCounters)
        {
            string        input = "a\nb\nc\nd\ne\nc\nd\ne";
            Configuration conf  = CreateJobConf();
            Job           job   = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input);

            job.SetJobName("mo");
            MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(LongWritable
                                                                                       ), typeof(Text));
            MultipleOutputs.AddNamedOutput(job, Sequence, typeof(SequenceFileOutputFormat), typeof(
                                               IntWritable), typeof(Text));
            MultipleOutputs.SetCountersEnabled(job, withCounters);
            job.SetMapperClass(typeof(TestMRMultipleOutputs.MOMap));
            job.SetReducerClass(typeof(TestMRMultipleOutputs.MOReduce));
            job.WaitForCompletion(true);
            // assert number of named output part files
            int        namedOutputCount      = 0;
            int        valueBasedOutputCount = 0;
            FileSystem fs = OutDir.GetFileSystem(conf);

            FileStatus[] statuses = fs.ListStatus(OutDir);
            foreach (FileStatus status in statuses)
            {
                string fileName = status.GetPath().GetName();
                if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName
                    .Equals("text-r-00000") || fileName.Equals("sequence_A-m-00000") || fileName.Equals
                        ("sequence_A-m-00001") || fileName.Equals("sequence_B-m-00000") || fileName.Equals
                        ("sequence_B-m-00001") || fileName.Equals("sequence_B-r-00000") || fileName.Equals
                        ("sequence_C-r-00000"))
                {
                    namedOutputCount++;
                }
                else
                {
                    if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals
                            ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000"))
                    {
                        valueBasedOutputCount++;
                    }
                }
            }
            NUnit.Framework.Assert.AreEqual(9, namedOutputCount);
            NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(job), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith(Text));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            // assert SequenceOutputFormat files correctness
            SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat
                                                                                 .GetOutputPath(job), "sequence_B-r-00000"), conf);
            NUnit.Framework.Assert.AreEqual(typeof(IntWritable), seqReader.GetKeyClass());
            NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass());
            count = 0;
            IntWritable key   = new IntWritable();
            Text        value = new Text();

            while (seqReader.Next(key, value))
            {
                NUnit.Framework.Assert.AreEqual(Sequence, value.ToString());
                count++;
            }
            seqReader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            if (withCounters)
            {
                CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                   );
                NUnit.Framework.Assert.AreEqual(9, counters.Size());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_A").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Sequence + "_B").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_C").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue());
            }
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestChain()
        {
            Path          inDir          = new Path(localPathRoot, "testing/chain/input");
            Path          outDir         = new Path(localPathRoot, "testing/chain/output");
            string        input          = "1\n2\n";
            string        expectedOutput = "0\t1ABCRDEF\n2\t2ABCRDEF\n";
            Configuration conf           = CreateJobConf();

            CleanFlags(conf);
            conf.Set("a", "X");
            Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input);

            job.SetJobName("chain");
            Configuration mapAConf = new Configuration(false);

            mapAConf.Set("a", "A");
            ChainMapper.AddMapper(job, typeof(TestMapReduceChain.AMap), typeof(LongWritable),
                                  typeof(Text), typeof(LongWritable), typeof(Text), mapAConf);
            ChainMapper.AddMapper(job, typeof(TestMapReduceChain.BMap), typeof(LongWritable),
                                  typeof(Text), typeof(LongWritable), typeof(Text), null);
            ChainMapper.AddMapper(job, typeof(TestMapReduceChain.CMap), typeof(LongWritable),
                                  typeof(Text), typeof(LongWritable), typeof(Text), null);
            Configuration reduceConf = new Configuration(false);

            reduceConf.Set("a", "C");
            ChainReducer.SetReducer(job, typeof(TestMapReduceChain.RReduce), typeof(LongWritable
                                                                                    ), typeof(Text), typeof(LongWritable), typeof(Text), reduceConf);
            ChainReducer.AddMapper(job, typeof(TestMapReduceChain.DMap), typeof(LongWritable)
                                   , typeof(Text), typeof(LongWritable), typeof(Text), null);
            Configuration mapEConf = new Configuration(false);

            mapEConf.Set("a", "E");
            ChainReducer.AddMapper(job, typeof(TestMapReduceChain.EMap), typeof(LongWritable)
                                   , typeof(Text), typeof(LongWritable), typeof(Text), mapEConf);
            ChainReducer.AddMapper(job, typeof(TestMapReduceChain.FMap), typeof(LongWritable)
                                   , typeof(Text), typeof(LongWritable), typeof(Text), null);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful());
            string str = "flag not set";

            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.A"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.B"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.C"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "reduce.setup.R"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.D"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.E"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.setup.F"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.A.value.1"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.A.value.2"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.B.value.1A"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.B.value.2A"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.C.value.1AB"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.C.value.2AB"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "reduce.R.value.1ABC"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "reduce.R.value.2ABC"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.D.value.1ABCR"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.D.value.2ABCR"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.E.value.1ABCRD"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.E.value.2ABCRD"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.F.value.1ABCRDE"));
            NUnit.Framework.Assert.IsTrue(str, GetFlag(conf, "map.F.value.2ABCRDE"));
            NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.A"));
            NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.B"));
            NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.C"));
            NUnit.Framework.Assert.IsTrue(GetFlag(conf, "reduce.cleanup.R"));
            NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.D"));
            NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.E"));
            NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.cleanup.F"));
            NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil
                                            .ReadOutput(outDir, conf));
        }