Example #1
0
        /// <summary>Test using the gzip codec for reading</summary>
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestGzip()
        {
            JobConf          job  = new JobConf(defaultConf);
            CompressionCodec gzip = new GzipCodec();

            ReflectionUtils.SetConf(gzip, job);
            localFs.Delete(workDir, true);
            WriteFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n"
                      );
            WriteFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "this is a test\nof gzip\n"
                      );
            FileInputFormat.SetInputPaths(job, workDir);
            CombineTextInputFormat format = new CombineTextInputFormat();

            InputSplit[] splits = format.GetSplits(job, 100);
            NUnit.Framework.Assert.AreEqual("compressed splits == 1", 1, splits.Length);
            IList <Text> results = ReadSplit(format, splits[0], job);

            NUnit.Framework.Assert.AreEqual("splits[0] length", 8, results.Count);
            string[] firstList = new string[] { "the quick", "brown", "fox jumped", "over", " the lazy"
                                                , " dog" };
            string[] secondList = new string[] { "this is a test", "of gzip" };
            string   first      = results[0].ToString();

            if (first.Equals(firstList[0]))
            {
                TestResults(results, firstList, secondList);
            }
            else
            {
                if (first.Equals(secondList[0]))
                {
                    TestResults(results, secondList, firstList);
                }
                else
                {
                    NUnit.Framework.Assert.Fail("unexpected first token!");
                }
            }
        }
Example #2
0
        // A reporter that does nothing
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf job    = new JobConf(defaultConf);
            Random  random = new Random();
            long    seed   = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            CreateFiles(length, numFiles, random);
            // create a combined split for the files
            CombineTextInputFormat format = new CombineTextInputFormat();
            LongWritable           key    = new LongWritable();
            Text value = new Text();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / 20) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("splitting: got =        " + splits.Length);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check the split
                BitSet bits = new BitSet(length);
                Log.Debug("split= " + split);
                RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, job, voidReporter
                                                                                  );
                try
                {
                    int count = 0;
                    while (reader.Next(key, value))
                    {
                        int v = System.Convert.ToInt32(value.ToString());
                        Log.Debug("read " + v);
                        if (bits.Get(v))
                        {
                            Log.Warn("conflict with " + v + " at position " + reader.GetPos());
                        }
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                        bits.Set(v);
                        count++;
                    }
                    Log.Info("splits=" + split + " count=" + count);
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }