Ejemplo n.º 1
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="InstantiationException"/>
        /// <exception cref="System.MemberAccessException"/>
        private static void SequenceFileCodecTest(Configuration conf, int lines, string codecClass
                                                  , int blockSize)
        {
            Path filePath = new Path("SequenceFileCodecTest." + codecClass);

            // Configuration
            conf.SetInt("io.seqfile.compress.blocksize", blockSize);
            // Create the SequenceFile
            FileSystem fs = FileSystem.Get(conf);

            Log.Info("Creating SequenceFile with codec \"" + codecClass + "\"");
            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, filePath, typeof(
                                                                       Text), typeof(Text), SequenceFile.CompressionType.Block, (CompressionCodec)System.Activator.CreateInstance
                                                                       (Runtime.GetType(codecClass)));
            // Write some data
            Log.Info("Writing to SequenceFile...");
            for (int i = 0; i < lines; i++)
            {
                Text key   = new Text("key" + i);
                Text value = new Text("value" + i);
                writer.Append(key, value);
            }
            writer.Close();
            // Read the data back and check
            Log.Info("Reading from the SequenceFile...");
            SequenceFile.Reader reader  = new SequenceFile.Reader(fs, filePath, conf);
            Writable            key_1   = (Writable)System.Activator.CreateInstance(reader.GetKeyClass());
            Writable            value_1 = (Writable)System.Activator.CreateInstance(reader.GetValueClass
                                                                                        ());
            int lc = 0;

            try
            {
                while (reader.Next(key_1, value_1))
                {
                    Assert.Equal("key" + lc, key_1.ToString());
                    Assert.Equal("value" + lc, value_1.ToString());
                    lc++;
                }
            }
            finally
            {
                reader.Close();
            }
            Assert.Equal(lines, lc);
            // Delete temporary files
            fs.Delete(filePath, false);
            Log.Info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass +
                     "\"");
        }
Ejemplo n.º 2
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMultipleOutputs(bool withCounters)
        {
            string        input = "a\nb\nc\nd\ne\nc\nd\ne";
            Configuration conf  = CreateJobConf();
            Job           job   = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input);

            job.SetJobName("mo");
            MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(LongWritable
                                                                                       ), typeof(Text));
            MultipleOutputs.AddNamedOutput(job, Sequence, typeof(SequenceFileOutputFormat), typeof(
                                               IntWritable), typeof(Text));
            MultipleOutputs.SetCountersEnabled(job, withCounters);
            job.SetMapperClass(typeof(TestMRMultipleOutputs.MOMap));
            job.SetReducerClass(typeof(TestMRMultipleOutputs.MOReduce));
            job.WaitForCompletion(true);
            // assert number of named output part files
            int        namedOutputCount      = 0;
            int        valueBasedOutputCount = 0;
            FileSystem fs = OutDir.GetFileSystem(conf);

            FileStatus[] statuses = fs.ListStatus(OutDir);
            foreach (FileStatus status in statuses)
            {
                string fileName = status.GetPath().GetName();
                if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName
                    .Equals("text-r-00000") || fileName.Equals("sequence_A-m-00000") || fileName.Equals
                        ("sequence_A-m-00001") || fileName.Equals("sequence_B-m-00000") || fileName.Equals
                        ("sequence_B-m-00001") || fileName.Equals("sequence_B-r-00000") || fileName.Equals
                        ("sequence_C-r-00000"))
                {
                    namedOutputCount++;
                }
                else
                {
                    if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals
                            ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000"))
                    {
                        valueBasedOutputCount++;
                    }
                }
            }
            NUnit.Framework.Assert.AreEqual(9, namedOutputCount);
            NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(job), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith(Text));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            // assert SequenceOutputFormat files correctness
            SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat
                                                                                 .GetOutputPath(job), "sequence_B-r-00000"), conf);
            NUnit.Framework.Assert.AreEqual(typeof(IntWritable), seqReader.GetKeyClass());
            NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass());
            count = 0;
            IntWritable key   = new IntWritable();
            Text        value = new Text();

            while (seqReader.Next(key, value))
            {
                NUnit.Framework.Assert.AreEqual(Sequence, value.ToString());
                count++;
            }
            seqReader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            if (withCounters)
            {
                CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                   );
                NUnit.Framework.Assert.AreEqual(9, counters.Size());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_A").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Sequence + "_B").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_C").GetValue
                                                    ());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue());
                NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue());
                NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue());
            }
        }
Ejemplo n.º 3
0
        /// <exception cref="System.Exception"/>
        private void ValidateOutput(JobConf conf, RunningJob runningJob, IList <string> mapperBadRecords
                                    , IList <string> redBadRecords)
        {
            Log.Info(runningJob.GetCounters().ToString());
            NUnit.Framework.Assert.IsTrue(runningJob.IsSuccessful());
            //validate counters
            Counters counters = runningJob.GetCounters();

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapSkippedRecords
                                                                 ).GetCounter(), mapperBadRecords.Count);
            int mapRecs = input.Count - mapperBadRecords.Count;

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapInputRecords)
                                            .GetCounter(), mapRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.MapOutputRecords
                                                                 ).GetCounter(), mapRecs);
            int redRecs = mapRecs - redBadRecords.Count;

            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedRecords
                                                                 ).GetCounter(), redBadRecords.Count);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceSkippedGroups
                                                                 ).GetCounter(), redBadRecords.Count);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputGroups
                                                                 ).GetCounter(), redRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceInputRecords
                                                                 ).GetCounter(), redRecs);
            NUnit.Framework.Assert.AreEqual(counters.FindCounter(TaskCounter.ReduceOutputRecords
                                                                 ).GetCounter(), redRecs);
            //validate skipped records
            Path skipDir = SkipBadRecords.GetSkipOutputPath(conf);

            NUnit.Framework.Assert.IsNotNull(skipDir);
            Path[]         skips      = FileUtil.Stat2Paths(GetFileSystem().ListStatus(skipDir));
            IList <string> mapSkipped = new AList <string>();
            IList <string> redSkipped = new AList <string>();

            foreach (Path skipPath in skips)
            {
                Log.Info("skipPath: " + skipPath);
                SequenceFile.Reader reader = new SequenceFile.Reader(GetFileSystem(), skipPath, conf
                                                                     );
                object key   = ReflectionUtils.NewInstance(reader.GetKeyClass(), conf);
                object value = ReflectionUtils.NewInstance(reader.GetValueClass(), conf);
                key = reader.Next(key);
                while (key != null)
                {
                    value = reader.GetCurrentValue(value);
                    Log.Debug("key:" + key + " value:" + value.ToString());
                    if (skipPath.GetName().Contains("_r_"))
                    {
                        redSkipped.AddItem(value.ToString());
                    }
                    else
                    {
                        mapSkipped.AddItem(value.ToString());
                    }
                    key = reader.Next(key);
                }
                reader.Close();
            }
            NUnit.Framework.Assert.IsTrue(mapSkipped.ContainsAll(mapperBadRecords));
            NUnit.Framework.Assert.IsTrue(redSkipped.ContainsAll(redBadRecords));
            Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(GetOutputDir(
                                                                                    ), new Utils.OutputFileUtils.OutputFilesFilter()));
            IList <string> mapperOutput = GetProcessed(input, mapperBadRecords);

            Log.Debug("mapperOutput " + mapperOutput.Count);
            IList <string> reducerOutput = GetProcessed(mapperOutput, redBadRecords);

            Log.Debug("reducerOutput " + reducerOutput.Count);
            if (outputFiles.Length > 0)
            {
                InputStream    @is     = GetFileSystem().Open(outputFiles[0]);
                BufferedReader reader  = new BufferedReader(new InputStreamReader(@is));
                string         line    = reader.ReadLine();
                int            counter = 0;
                while (line != null)
                {
                    counter++;
                    StringTokenizer tokeniz = new StringTokenizer(line, "\t");
                    string          key     = tokeniz.NextToken();
                    string          value   = tokeniz.NextToken();
                    Log.Debug("Output: key:" + key + "  value:" + value);
                    NUnit.Framework.Assert.IsTrue(value.Contains("hello"));
                    NUnit.Framework.Assert.IsTrue(reducerOutput.Contains(value));
                    line = reader.ReadLine();
                }
                reader.Close();
                NUnit.Framework.Assert.AreEqual(reducerOutput.Count, counter);
            }
        }
 /// <summary>
 /// The class of key that must be passed to
 /// <see cref="SequenceFileRecordReader{K, V}.Next(object, object)"/>
 /// ..
 /// </summary>
 public virtual Type GetKeyClass()
 {
     return(@in.GetKeyClass());
 }
Ejemplo n.º 5
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void _testMultipleOutputs(bool withCounters)
        {
            Path             inDir  = GetDir(InDir);
            Path             outDir = GetDir(OutDir);
            JobConf          conf   = CreateJobConf();
            FileSystem       fs     = FileSystem.Get(conf);
            DataOutputStream file   = fs.Create(new Path(inDir, "part-0"));

            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            file = fs.Create(new Path(inDir, "part-1"));
            file.WriteBytes("a\nb\n\nc\nd\ne");
            file.Close();
            conf.SetJobName("mo");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(LongWritable
                                                                                          ), typeof(Text));
            MultipleOutputs.AddMultiNamedOutput(conf, "sequence", typeof(SequenceFileOutputFormat
                                                                         ), typeof(LongWritable), typeof(Text));
            MultipleOutputs.SetCountersEnabled(conf, withCounters);
            conf.SetMapperClass(typeof(TestMultipleOutputs.MOMap));
            conf.SetReducerClass(typeof(TestMultipleOutputs.MOReduce));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobClient  jc  = new JobClient(conf);
            RunningJob job = jc.SubmitJob(conf);

            while (!job.IsComplete())
            {
                Sharpen.Thread.Sleep(100);
            }
            // assert number of named output part files
            int namedOutputCount = 0;

            FileStatus[] statuses = fs.ListStatus(outDir);
            foreach (FileStatus status in statuses)
            {
                if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName
                        ().Equals("text-m-00001") || status.GetPath().GetName().Equals("text-r-00000") ||
                    status.GetPath().GetName().Equals("sequence_A-m-00000") || status.GetPath().GetName
                        ().Equals("sequence_A-m-00001") || status.GetPath().GetName().Equals("sequence_B-m-00000"
                                                                                             ) || status.GetPath().GetName().Equals("sequence_B-m-00001") || status.GetPath()
                    .GetName().Equals("sequence_B-r-00000") || status.GetPath().GetName().Equals("sequence_C-r-00000"
                                                                                                 ))
                {
                    namedOutputCount++;
                }
            }
            NUnit.Framework.Assert.AreEqual(9, namedOutputCount);
            // assert TextOutputFormat files correctness
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path
                                                                                         (FileOutputFormat.GetOutputPath(conf), "text-r-00000"))));
            int    count = 0;
            string line  = reader.ReadLine();

            while (line != null)
            {
                NUnit.Framework.Assert.IsTrue(line.EndsWith("text"));
                line = reader.ReadLine();
                count++;
            }
            reader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            // assert SequenceOutputFormat files correctness
            SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat
                                                                                 .GetOutputPath(conf), "sequence_B-r-00000"), conf);
            NUnit.Framework.Assert.AreEqual(typeof(LongWritable), seqReader.GetKeyClass());
            NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass());
            count = 0;
            LongWritable key   = new LongWritable();
            Text         value = new Text();

            while (seqReader.Next(key, value))
            {
                NUnit.Framework.Assert.AreEqual("sequence", value.ToString());
                count++;
            }
            seqReader.Close();
            NUnit.Framework.Assert.IsFalse(count == 0);
            Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName
                                                                 );
            if (!withCounters)
            {
                NUnit.Framework.Assert.AreEqual(0, counters.Size());
            }
            else
            {
                NUnit.Framework.Assert.AreEqual(4, counters.Size());
                NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("text"));
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_A"));
                NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("sequence_B"));
                NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_C"));
            }
        }