Beispiel #1
0
 public virtual void Configure(JobConf conf)
 {
     mos = new MultipleOutputs(conf);
 }
Beispiel #2
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMOWithJavaSerialization(bool withCounters)
        {
            Path             inDir  = GetDir(InDir);
            Path             outDir = GetDir(OutDir);
            JobConf          conf   = CreateJobConf();
            FileSystem       fs     = FileSystem.Get(conf);
            DataOutputStream file   = fs.Create(new Path(inDir, "part-0"));

            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            fs.Delete(inDir, true);
            fs.Delete(outDir, true);
            file = fs.Create(new Path(inDir, "part-1"));
            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            conf.SetJobName("mo");
            conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                     + "org.apache.hadoop.io.serializer.WritableSerialization");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetMapOutputKeyClass(typeof(long));
            conf.SetMapOutputValueClass(typeof(string));
            conf.SetOutputKeyComparatorClass(typeof(JavaSerializationComparator));
            conf.SetOutputKeyClass(typeof(long));
            conf.SetOutputValueClass(typeof(string));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(long
                                                                                          ), typeof(string));
            MultipleOutputs.SetCountersEnabled(conf, withCounters);
            conf.SetMapperClass(typeof(TestMultipleOutputs.MOJavaSerDeMap));
            conf.SetReducerClass(typeof(TestMultipleOutputs.MOJavaSerDeReduce));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobClient  jc  = new JobClient(conf);
            RunningJob job = jc.SubmitJob(conf);

            while (!job.IsComplete())
            {
                Sharpen.Thread.Sleep(100);
            }
            // assert number of named output part files
            int namedOutputCount = 0;

            FileStatus[] statuses = fs.ListStatus(outDir);
            foreach (FileStatus status in statuses)
            {
                if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName
                        ().Equals("text-r-00000"))
                {
                    namedOutputCount++;
                }
            }
            NUnit.Framework.Assert.AreEqual(2, namedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(conf), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith("text"));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                 );
            if (!withCounters)
            {
                NUnit.Framework.Assert.AreEqual(0, counters.Size());
            }
            else
            {
                NUnit.Framework.Assert.AreEqual(1, counters.Size());
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("text"));
            }
        }
Beispiel #3
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMultipleOutputs(bool withCounters)
        {
            Path             inDir  = GetDir(InDir);
            Path             outDir = GetDir(OutDir);
            JobConf          conf   = CreateJobConf();
            FileSystem       fs     = FileSystem.Get(conf);
            DataOutputStream file   = fs.Create(new Path(inDir, "part-0"));

            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            file = fs.Create(new Path(inDir, "part-1"));
            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            conf.SetJobName("mo");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(LongWritable
                                                                                          ), typeof(Text));
            MultipleOutputs.AddMultiNamedOutput(conf, "sequence", typeof(SequenceFileOutputFormat
                                                                         ), typeof(LongWritable), typeof(Text));
            MultipleOutputs.SetCountersEnabled(conf, withCounters);
            conf.SetMapperClass(typeof(TestMultipleOutputs.MOMap));
            conf.SetReducerClass(typeof(TestMultipleOutputs.MOReduce));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobClient  jc  = new JobClient(conf);
            RunningJob job = jc.SubmitJob(conf);

            while (!job.IsComplete())
            {
                Sharpen.Thread.Sleep(100);
            }
            // assert number of named output part files
            int namedOutputCount = 0;

            FileStatus[] statuses = fs.ListStatus(outDir);
            foreach (FileStatus status in statuses)
            {
                if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName
                        ().Equals("text-m-00001") || status.GetPath().GetName().Equals("text-r-00000") ||
                    status.GetPath().GetName().Equals("sequence_A-m-00000") || status.GetPath().GetName
                        ().Equals("sequence_A-m-00001") || status.GetPath().GetName().Equals("sequence_B-m-00000"
                                                                                             ) || status.GetPath().GetName().Equals("sequence_B-m-00001") || status.GetPath()
                    .GetName().Equals("sequence_B-r-00000") || status.GetPath().GetName().Equals("sequence_C-r-00000"
                                                                                                 ))
                {
                    namedOutputCount++;
                }
            }
            NUnit.Framework.Assert.AreEqual(9, namedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(conf), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith("text"));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            // assert SequenceOutputFormat files correctness
            SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat
                                                                                 .GetOutputPath(conf), "sequence_B-r-00000"), conf);
            NUnit.Framework.Assert.AreEqual(typeof(LongWritable), seqReader.GetKeyClass());
            NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass());
            count = 0;
            LongWritable key   = new LongWritable();
            Text         value = new Text();

            while (seqReader.Next(key, value))
            {
                NUnit.Framework.Assert.AreEqual("sequence", value.ToString());
                count++;
            }
            seqReader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                 );
            if (!withCounters)
            {
                NUnit.Framework.Assert.AreEqual(0, counters.Size());
            }
            else
            {
                NUnit.Framework.Assert.AreEqual(4, counters.Size());
                NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("text"));
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_A"));
                NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("sequence_B"));
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_C"));
            }
        }