Example #1
0
        /// <summary>Test using the gzip codec for reading</summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestGzip()
        {
            Configuration    conf = new Configuration(defaultConf);
            CompressionCodec gzip = new GzipCodec();

            ReflectionUtils.SetConf(gzip, conf);
            localFs.Delete(workDir, true);
            WriteFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n"
                      );
            WriteFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "this is a test\nof gzip\n"
                      );
            Job job = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(job, workDir);
            CombineTextInputFormat format = new CombineTextInputFormat();
            IList <InputSplit>     splits = format.GetSplits(job);

            NUnit.Framework.Assert.AreEqual("compressed splits == 1", 1, splits.Count);
            IList <Text> results = ReadSplit(format, splits[0], job);

            NUnit.Framework.Assert.AreEqual("splits[0] length", 8, results.Count);
            string[] firstList = new string[] { "the quick", "brown", "fox jumped", "over", " the lazy"
                                                , " dog" };
            string[] secondList = new string[] { "this is a test", "of gzip" };
            string   first      = results[0].ToString();

            if (first.Equals(firstList[0]))
            {
                TestResults(results, firstList, secondList);
            }
            else
            {
                if (first.Equals(secondList[0]))
                {
                    TestResults(results, secondList, firstList);
                }
                else
                {
                    NUnit.Framework.Assert.Fail("unexpected first token!");
                }
            }
        }
Example #2
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            Job    job    = Job.GetInstance(new Configuration(defaultConf));
            Random random = new Random();
            long   seed   = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create files with various lengths
            CreateFiles(length, numFiles, random);
            // create a combined split for the files
            CombineTextInputFormat format = new CombineTextInputFormat();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / 20) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                IList <InputSplit> splits = format.GetSplits(job);
                Log.Info("splitting: got =        " + splits.Count);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check the split
                BitSet bits = new BitSet(length);
                Log.Debug("split= " + split);
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context
                                                                                     );
                NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof(
                                                    CombineFileRecordReader), reader.GetType());
                MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl <
                    LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    int count = 0;
                    while (reader.NextKeyValue())
                    {
                        LongWritable key = reader.GetCurrentKey();
                        NUnit.Framework.Assert.IsNotNull("Key should not be null.", key);
                        Text value = reader.GetCurrentValue();
                        int  v     = System.Convert.ToInt32(value.ToString());
                        Log.Debug("read " + v);
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                        bits.Set(v);
                        count++;
                    }
                    Log.Debug("split=" + split + " count=" + count);
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }