Esempio n. 1
0
        public void ToStringAndName([Values] BZip2Level level)
        {
            BZip2Codec codec = new BZip2Codec(level);

            Assert.AreEqual("bzip2", codec.GetName());
            Assert.AreEqual($"bzip2-{(int)level}", codec.ToString());
        }
        public virtual void TestMultipleClose()
        {
            Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2"
                                                                     );

            NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2"
                                             , testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, (string[])null);
            LineRecordReader reader = new LineRecordReader(conf, split);
            LongWritable     key    = new LongWritable();
            Text             value  = new Text();

            //noinspection StatementWithEmptyBody
            while (reader.Next(key, value))
            {
            }
            reader.Close();
            reader.Close();
            BZip2Codec codec = new BZip2Codec();

            codec.SetConf(conf);
            ICollection <Decompressor> decompressors = new HashSet <Decompressor>();

            for (int i = 0; i < 10; ++i)
            {
                decompressors.AddItem(CodecPool.GetDecompressor(codec));
            }
            NUnit.Framework.Assert.AreEqual(10, decompressors.Count);
        }
Esempio n. 3
0
        public void Equal([Values] BZip2Level level)
        {
            BZip2Codec codec1 = new BZip2Codec(level);
            BZip2Codec codec2 = new BZip2Codec(level);

            Assert.IsTrue(codec1.Equals(codec1));
            Assert.IsTrue(codec2.Equals(codec2));
            Assert.IsTrue(codec1.Equals(codec2));
            Assert.IsTrue(codec2.Equals(codec1));
        }
Esempio n. 4
0
        public void CompressDecompress([ValueSource(nameof(_testLengths))] int length, [Values] BZip2Level level)
        {
            byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray();

            BZip2Codec codec = new BZip2Codec(level);

            byte[] compressed   = codec.Compress(data);
            byte[] uncompressed = codec.Decompress(compressed, compressed.Length);

            Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed));
        }
        public virtual void TestBzip2()
        {
            JobConf          jobConf = new JobConf(defaultConf);
            CompressionCodec bzip2   = new BZip2Codec();

            ReflectionUtils.SetConf(bzip2, jobConf);
            localFs.Delete(workDir, true);
            System.Console.Out.WriteLine(ColorBrCyan + "testBzip2() using non-native CBZip2InputStream (presumably)"
                                         + ColorNormal);
            // copy prebuilt (correct!) version of concat.bz2 to HDFS
            string fn      = "concat" + bzip2.GetDefaultExtension();
            Path   fnLocal = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn);
            Path   fnHDFS  = new Path(workDir, fn);

            localFs.CopyFromLocalFile(fnLocal, fnHDFS);
            WriteFile(localFs, new Path(workDir, "part2.txt.bz2"), bzip2, "this is a test\nof bzip2\n"
                      );
            FileInputFormat.SetInputPaths(jobConf, workDir);
            TextInputFormat format = new TextInputFormat();

            // extends FileInputFormat
            format.Configure(jobConf);
            format.SetMinSplitSize(256);
            // work around 2-byte splits issue
            // [135 splits for a 208-byte file and a 62-byte file(!)]
            InputSplit[] splits = format.GetSplits(jobConf, 100);
            NUnit.Framework.Assert.AreEqual("compressed splits == 2", 2, splits.Length);
            FileSplit tmp = (FileSplit)splits[0];

            if (tmp.GetPath().GetName().Equals("part2.txt.bz2"))
            {
                splits[0] = splits[1];
                splits[1] = tmp;
            }
            IList <Text> results = ReadSplit(format, splits[0], jobConf);

            NUnit.Framework.Assert.AreEqual("splits[0] num lines", 6, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[0][5]", "member #3", results[5].ToString(
                                                ));
            results = ReadSplit(format, splits[1], jobConf);
            NUnit.Framework.Assert.AreEqual("splits[1] num lines", 2, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[1][0]", "this is a test", results[0].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual("splits[1][1]", "of bzip2", results[1].ToString()
                                            );
        }
Esempio n. 6
0
        public void CompressDecompressStream([ValueSource(nameof(_testLengths))] int length, [Values] BZip2Level level)
        {
            byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray();

            BZip2Codec codec = new BZip2Codec(level);

            using (MemoryStream inputStream = new MemoryStream(data))
                using (MemoryStream outputStream = new MemoryStream())
                {
                    codec.Compress(inputStream, outputStream);

                    byte[] compressed   = outputStream.ToArray();
                    byte[] uncompressed = codec.Decompress(compressed, compressed.Length);

                    Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed));
                }
        }
Esempio n. 7
0
        public virtual void TestMultipleClose()
        {
            Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2"
                                                                     );

            NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2"
                                             , testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // read the data and check whether BOM is skipped
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, null);
            LineRecordReader reader = new LineRecordReader();

            reader.Initialize(split, context);
            //noinspection StatementWithEmptyBody
            while (reader.NextKeyValue())
            {
            }
            reader.Close();
            reader.Close();
            BZip2Codec codec = new BZip2Codec();

            codec.SetConf(conf);
            ICollection <Decompressor> decompressors = new HashSet <Decompressor>();

            for (int i = 0; i < 10; ++i)
            {
                decompressors.AddItem(CodecPool.GetDecompressor(codec));
            }
            NUnit.Framework.Assert.AreEqual(10, decompressors.Count);
        }
        public virtual void TestMoreBzip2()
        {
            JobConf          jobConf = new JobConf(defaultConf);
            CompressionCodec bzip2   = new BZip2Codec();

            ReflectionUtils.SetConf(bzip2, jobConf);
            localFs.Delete(workDir, true);
            System.Console.Out.WriteLine(ColorBrMagenta + "testMoreBzip2() using non-native CBZip2InputStream (presumably)"
                                         + ColorNormal);
            // copy single-member test file to HDFS
            string fn1      = "testConcatThenCompress.txt" + bzip2.GetDefaultExtension();
            Path   fnLocal1 = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn1);
            Path   fnHDFS1  = new Path(workDir, fn1);

            localFs.CopyFromLocalFile(fnLocal1, fnHDFS1);
            // copy multiple-member test file to HDFS
            string fn2      = "testCompressThenConcat.txt" + bzip2.GetDefaultExtension();
            Path   fnLocal2 = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn2);
            Path   fnHDFS2  = new Path(workDir, fn2);

            localFs.CopyFromLocalFile(fnLocal2, fnHDFS2);
            FileInputFormat.SetInputPaths(jobConf, workDir);
            // here's first pair of BlockDecompressorStreams:
            FileInputStream in1 = new FileInputStream(fnLocal1.ToString());
            FileInputStream in2 = new FileInputStream(fnLocal2.ToString());

            NUnit.Framework.Assert.AreEqual("concat bytes available", 2567, in1.Available());
            NUnit.Framework.Assert.AreEqual("concat bytes available", 3056, in2.Available());

            /*
             * // FIXME
             * // The while-loop below dies at the beginning of the 2nd concatenated
             * // member (after 17 lines successfully read) with:
             * //
             * //   java.io.IOException: bad block header
             * //   at org.apache.hadoop.io.compress.bzip2.CBZip2InputStream.initBlock(
             * //   CBZip2InputStream.java:527)
             * //
             * // It is not critical to concatenated-gzip support, HADOOP-6835, so it's
             * // simply commented out for now (and HADOOP-6852 filed).  If and when the
             * // latter issue is resolved--perhaps by fixing an error here--this code
             * // should be reenabled.  Note that the doMultipleBzip2BufferSizes() test
             * // below uses the same testCompressThenConcat.txt.bz2 file but works fine.
             *
             * CompressionInputStream cin2 = bzip2.createInputStream(in2);
             * LineReader in = new LineReader(cin2);
             * Text out = new Text();
             *
             * int numBytes, totalBytes=0, lineNum=0;
             * while ((numBytes = in.readLine(out)) > 0) {
             ++lineNum;
             * totalBytes += numBytes;
             * }
             * in.close();
             * assertEquals("total uncompressed bytes in concatenated test file",
             * 5346, totalBytes);
             * assertEquals("total uncompressed lines in concatenated test file",
             * 84, lineNum);
             */
            // test CBZip2InputStream with lots of different input-buffer sizes
            DoMultipleBzip2BufferSizes(jobConf, false);
        }
Esempio n. 9
0
        public void HashCode([Values] BZip2Level level)
        {
            BZip2Codec codec = new BZip2Codec(level);

            Assert.AreNotEqual(0, codec.GetHashCode());
        }
Esempio n. 10
0
        public void DefaultLevel()
        {
            BZip2Codec codec = new BZip2Codec();

            Assert.AreEqual(BZip2Level.Default, codec.Level);
        }