public virtual void TestBzip2()
        {
            JobConf          jobConf = new JobConf(defaultConf);
            CompressionCodec bzip2   = new BZip2Codec();

            ReflectionUtils.SetConf(bzip2, jobConf);
            localFs.Delete(workDir, true);
            System.Console.Out.WriteLine(ColorBrCyan + "testBzip2() using non-native CBZip2InputStream (presumably)"
                                         + ColorNormal);
            // copy prebuilt (correct!) version of concat.bz2 to HDFS
            string fn      = "concat" + bzip2.GetDefaultExtension();
            Path   fnLocal = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn);
            Path   fnHDFS  = new Path(workDir, fn);

            localFs.CopyFromLocalFile(fnLocal, fnHDFS);
            WriteFile(localFs, new Path(workDir, "part2.txt.bz2"), bzip2, "this is a test\nof bzip2\n"
                      );
            FileInputFormat.SetInputPaths(jobConf, workDir);
            TextInputFormat format = new TextInputFormat();

            // extends FileInputFormat
            format.Configure(jobConf);
            format.SetMinSplitSize(256);
            // work around 2-byte splits issue
            // [135 splits for a 208-byte file and a 62-byte file(!)]
            InputSplit[] splits = format.GetSplits(jobConf, 100);
            NUnit.Framework.Assert.AreEqual("compressed splits == 2", 2, splits.Length);
            FileSplit tmp = (FileSplit)splits[0];

            if (tmp.GetPath().GetName().Equals("part2.txt.bz2"))
            {
                splits[0] = splits[1];
                splits[1] = tmp;
            }
            IList <Text> results = ReadSplit(format, splits[0], jobConf);

            NUnit.Framework.Assert.AreEqual("splits[0] num lines", 6, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[0][5]", "member #3", results[5].ToString(
                                                ));
            results = ReadSplit(format, splits[1], jobConf);
            NUnit.Framework.Assert.AreEqual("splits[1] num lines", 2, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[1][0]", "this is a test", results[0].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual("splits[1][1]", "of bzip2", results[1].ToString()
                                            );
        }
        public virtual void TestMoreBzip2()
        {
            JobConf          jobConf = new JobConf(defaultConf);
            CompressionCodec bzip2   = new BZip2Codec();

            ReflectionUtils.SetConf(bzip2, jobConf);
            localFs.Delete(workDir, true);
            System.Console.Out.WriteLine(ColorBrMagenta + "testMoreBzip2() using non-native CBZip2InputStream (presumably)"
                                         + ColorNormal);
            // copy single-member test file to HDFS
            string fn1      = "testConcatThenCompress.txt" + bzip2.GetDefaultExtension();
            Path   fnLocal1 = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn1);
            Path   fnHDFS1  = new Path(workDir, fn1);

            localFs.CopyFromLocalFile(fnLocal1, fnHDFS1);
            // copy multiple-member test file to HDFS
            string fn2      = "testCompressThenConcat.txt" + bzip2.GetDefaultExtension();
            Path   fnLocal2 = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn2);
            Path   fnHDFS2  = new Path(workDir, fn2);

            localFs.CopyFromLocalFile(fnLocal2, fnHDFS2);
            FileInputFormat.SetInputPaths(jobConf, workDir);
            // here's first pair of BlockDecompressorStreams:
            FileInputStream in1 = new FileInputStream(fnLocal1.ToString());
            FileInputStream in2 = new FileInputStream(fnLocal2.ToString());

            NUnit.Framework.Assert.AreEqual("concat bytes available", 2567, in1.Available());
            NUnit.Framework.Assert.AreEqual("concat bytes available", 3056, in2.Available());

            /*
             * // FIXME
             * // The while-loop below dies at the beginning of the 2nd concatenated
             * // member (after 17 lines successfully read) with:
             * //
             * //   java.io.IOException: bad block header
             * //   at org.apache.hadoop.io.compress.bzip2.CBZip2InputStream.initBlock(
             * //   CBZip2InputStream.java:527)
             * //
             * // It is not critical to concatenated-gzip support, HADOOP-6835, so it's
             * // simply commented out for now (and HADOOP-6852 filed).  If and when the
             * // latter issue is resolved--perhaps by fixing an error here--this code
             * // should be reenabled.  Note that the doMultipleBzip2BufferSizes() test
             * // below uses the same testCompressThenConcat.txt.bz2 file but works fine.
             *
             * CompressionInputStream cin2 = bzip2.createInputStream(in2);
             * LineReader in = new LineReader(cin2);
             * Text out = new Text();
             *
             * int numBytes, totalBytes=0, lineNum=0;
             * while ((numBytes = in.readLine(out)) > 0) {
             ++lineNum;
             * totalBytes += numBytes;
             * }
             * in.close();
             * assertEquals("total uncompressed bytes in concatenated test file",
             * 5346, totalBytes);
             * assertEquals("total uncompressed lines in concatenated test file",
             * 84, lineNum);
             */
            // test CBZip2InputStream with lots of different input-buffer sizes
            DoMultipleBzip2BufferSizes(jobConf, false);
        }