コード例 #1
        public virtual void TestBuiltInGzipDecompressor()
            // NOTE:  This fails on RHEL4 with "java.io.IOException: header crc mismatch"
            //        due to buggy version of zlib ( included.
            JobConf jobConf = new JobConf(defaultConf);

            jobConf.SetBoolean("io.native.lib.available", false);
            CompressionCodec gzip = new GzipCodec();

            ReflectionUtils.SetConf(gzip, jobConf);
            localFs.Delete(workDir, true);
            NUnit.Framework.Assert.AreEqual("[non-native (Java) codec]", typeof(BuiltInGzipDecompressor
                                                                                ), gzip.GetDecompressorType());
            System.Console.Out.WriteLine(ColorBrYellow + "testBuiltInGzipDecompressor() using"
                                         + " non-native (Java Inflater) Decompressor (" + gzip.GetDecompressorType() + ")"
                                         + ColorNormal);
            // copy single-member test file to HDFS
            string fn1      = "testConcatThenCompress.txt" + gzip.GetDefaultExtension();
            Path   fnLocal1 = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn1);
            Path   fnHDFS1  = new Path(workDir, fn1);

            localFs.CopyFromLocalFile(fnLocal1, fnHDFS1);
            // copy multiple-member test file to HDFS
            // (actually in "seekable gzip" format, a la JIRA PIG-42)
            string fn2      = "testCompressThenConcat.txt" + gzip.GetDefaultExtension();
            Path   fnLocal2 = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn2);
            Path   fnHDFS2  = new Path(workDir, fn2);

            localFs.CopyFromLocalFile(fnLocal2, fnHDFS2);
            FileInputFormat.SetInputPaths(jobConf, workDir);
            // here's first pair of DecompressorStreams:
            FileInputStream in1 = new FileInputStream(fnLocal1.ToString());
            FileInputStream in2 = new FileInputStream(fnLocal2.ToString());

            NUnit.Framework.Assert.AreEqual("concat bytes available", 2734, in1.Available());
            NUnit.Framework.Assert.AreEqual("concat bytes available", 3413, in2.Available());
            // w/hdr CRC
            CompressionInputStream cin2 = gzip.CreateInputStream(in2);
            LineReader             @in  = new LineReader(cin2);
            Text @out = new Text();
            int  numBytes;
            int  totalBytes = 0;
            int  lineNum    = 0;

            while ((numBytes = @in.ReadLine(@out)) > 0)
                totalBytes += numBytes;
            NUnit.Framework.Assert.AreEqual("total uncompressed bytes in concatenated test file"
                                            , 5346, totalBytes);
            NUnit.Framework.Assert.AreEqual("total uncompressed lines in concatenated test file"
                                            , 84, lineNum);
            // test BuiltInGzipDecompressor with lots of different input-buffer sizes
            DoMultipleGzipBufferSizes(jobConf, false);
            // test GzipZlibDecompressor (native), just to be sure
            // (FIXME?  could move this call to testGzip(), but would need filename
            // setup above) (alternatively, maybe just nuke testGzip() and extend this?)
            DoMultipleGzipBufferSizes(jobConf, true);
コード例 #2
        public virtual void TestGzip()
            JobConf          jobConf = new JobConf(defaultConf);
            CompressionCodec gzip    = new GzipCodec();

            ReflectionUtils.SetConf(gzip, jobConf);
            localFs.Delete(workDir, true);
            // preferred, but not compatible with Apache/trunk instance of Hudson:

             * assertFalse("[native (C/C++) codec]",
             * (org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor.class ==
             * gzip.getDecompressorType()) );
             * System.out.println(COLOR_BR_RED +
             * "testGzip() using native-zlib Decompressor (" +
             * gzip.getDecompressorType() + ")" + COLOR_NORMAL);
            // alternative:
            if (typeof(BuiltInGzipDecompressor) == gzip.GetDecompressorType())
                System.Console.Out.WriteLine(ColorBrRed + "testGzip() using native-zlib Decompressor ("
                                             + gzip.GetDecompressorType() + ")" + ColorNormal);
                Log.Warn("testGzip() skipped:  native (C/C++) libs not loaded");

             *      // THIS IS BUGGY: omits 2nd/3rd gzip headers; screws up 2nd/3rd CRCs--
             *      //                see https://issues.apache.org/jira/browse/HADOOP-6799
             *  Path fnHDFS = new Path(workDir, "concat" + gzip.getDefaultExtension());
             *  //OutputStream out = localFs.create(fnHDFS);
             *  //GzipCodec.GzipOutputStream gzOStm = new GzipCodec.GzipOutputStream(out);
             *      // can just combine those two lines, probably
             *  //GzipCodec.GzipOutputStream gzOStm =
             *  //  new GzipCodec.GzipOutputStream(localFs.create(fnHDFS));
             *      // oops, no:  this is a protected helper class; need to access
             *      //   it via createOutputStream() instead:
             *  OutputStream out = localFs.create(fnHDFS);
             *  Compressor gzCmp = gzip.createCompressor();
             *  CompressionOutputStream gzOStm = gzip.createOutputStream(out, gzCmp);
             *      // this SHOULD be going to HDFS:  got out from localFs == HDFS
             *      //   ...yup, works
             *  gzOStm.write("first gzip concat\n member\nwith three lines\n".getBytes());
             *  gzOStm.finish();
             *  gzOStm.resetState();
             *  gzOStm.write("2nd gzip concat member\n".getBytes());
             *  gzOStm.finish();
             *  gzOStm.resetState();
             *  gzOStm.write("gzip concat\nmember #3\n".getBytes());
             *  gzOStm.close();
             *      //
             *  String fn = "hdfs-to-local-concat" + gzip.getDefaultExtension();
             *  Path fnLocal = new Path(System.getProperty("test.concat.data","/tmp"), fn);
             *  localFs.copyToLocalFile(fnHDFS, fnLocal);
            // copy prebuilt (correct!) version of concat.gz to HDFS
            string fn      = "concat" + gzip.GetDefaultExtension();
            Path   fnLocal = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn);
            Path   fnHDFS  = new Path(workDir, fn);

            localFs.CopyFromLocalFile(fnLocal, fnHDFS);
            WriteFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "this is a test\nof gzip\n"
            FileInputFormat.SetInputPaths(jobConf, workDir);
            TextInputFormat format = new TextInputFormat();

            InputSplit[] splits = format.GetSplits(jobConf, 100);
            NUnit.Framework.Assert.AreEqual("compressed splits == 2", 2, splits.Length);
            FileSplit tmp = (FileSplit)splits[0];

            if (tmp.GetPath().GetName().Equals("part2.txt.gz"))
                splits[0] = splits[1];
                splits[1] = tmp;
            IList <Text> results = ReadSplit(format, splits[0], jobConf);

            NUnit.Framework.Assert.AreEqual("splits[0] num lines", 6, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[0][5]", "member #3", results[5].ToString(
            results = ReadSplit(format, splits[1], jobConf);
            NUnit.Framework.Assert.AreEqual("splits[1] num lines", 2, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[1][0]", "this is a test", results[0].ToString
            NUnit.Framework.Assert.AreEqual("splits[1][1]", "of gzip", results[1].ToString());