public void ToStringAndName([Values] BZip2Level level) { BZip2Codec codec = new BZip2Codec(level); Assert.AreEqual("bzip2", codec.GetName()); Assert.AreEqual($"bzip2-{(int)level}", codec.ToString()); }
public virtual void TestMultipleClose() { Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2" ); NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2" , testFileUrl); FilePath testFile = new FilePath(testFileUrl.GetFile()); Path testFilePath = new Path(testFile.GetAbsolutePath()); long testFileSize = testFile.Length(); Configuration conf = new Configuration(); conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue); FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (string[])null); LineRecordReader reader = new LineRecordReader(conf, split); LongWritable key = new LongWritable(); Text value = new Text(); //noinspection StatementWithEmptyBody while (reader.Next(key, value)) { } reader.Close(); reader.Close(); BZip2Codec codec = new BZip2Codec(); codec.SetConf(conf); ICollection <Decompressor> decompressors = new HashSet <Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.AddItem(CodecPool.GetDecompressor(codec)); } NUnit.Framework.Assert.AreEqual(10, decompressors.Count); }
public void Equal([Values] BZip2Level level) { BZip2Codec codec1 = new BZip2Codec(level); BZip2Codec codec2 = new BZip2Codec(level); Assert.IsTrue(codec1.Equals(codec1)); Assert.IsTrue(codec2.Equals(codec2)); Assert.IsTrue(codec1.Equals(codec2)); Assert.IsTrue(codec2.Equals(codec1)); }
public void CompressDecompress([ValueSource(nameof(_testLengths))] int length, [Values] BZip2Level level) { byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); BZip2Codec codec = new BZip2Codec(level); byte[] compressed = codec.Compress(data); byte[] uncompressed = codec.Decompress(compressed, compressed.Length); Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); }
public virtual void TestBzip2() { JobConf jobConf = new JobConf(defaultConf); CompressionCodec bzip2 = new BZip2Codec(); ReflectionUtils.SetConf(bzip2, jobConf); localFs.Delete(workDir, true); System.Console.Out.WriteLine(ColorBrCyan + "testBzip2() using non-native CBZip2InputStream (presumably)" + ColorNormal); // copy prebuilt (correct!) version of concat.bz2 to HDFS string fn = "concat" + bzip2.GetDefaultExtension(); Path fnLocal = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn); Path fnHDFS = new Path(workDir, fn); localFs.CopyFromLocalFile(fnLocal, fnHDFS); WriteFile(localFs, new Path(workDir, "part2.txt.bz2"), bzip2, "this is a test\nof bzip2\n" ); FileInputFormat.SetInputPaths(jobConf, workDir); TextInputFormat format = new TextInputFormat(); // extends FileInputFormat format.Configure(jobConf); format.SetMinSplitSize(256); // work around 2-byte splits issue // [135 splits for a 208-byte file and a 62-byte file(!)] InputSplit[] splits = format.GetSplits(jobConf, 100); NUnit.Framework.Assert.AreEqual("compressed splits == 2", 2, splits.Length); FileSplit tmp = (FileSplit)splits[0]; if (tmp.GetPath().GetName().Equals("part2.txt.bz2")) { splits[0] = splits[1]; splits[1] = tmp; } IList <Text> results = ReadSplit(format, splits[0], jobConf); NUnit.Framework.Assert.AreEqual("splits[0] num lines", 6, results.Count); NUnit.Framework.Assert.AreEqual("splits[0][5]", "member #3", results[5].ToString( )); results = ReadSplit(format, splits[1], jobConf); NUnit.Framework.Assert.AreEqual("splits[1] num lines", 2, results.Count); NUnit.Framework.Assert.AreEqual("splits[1][0]", "this is a test", results[0].ToString ()); NUnit.Framework.Assert.AreEqual("splits[1][1]", "of bzip2", results[1].ToString() ); }
public void CompressDecompressStream([ValueSource(nameof(_testLengths))] int length, [Values] BZip2Level level) { byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); BZip2Codec codec = new BZip2Codec(level); using (MemoryStream inputStream = new MemoryStream(data)) using (MemoryStream outputStream = new MemoryStream()) { codec.Compress(inputStream, outputStream); byte[] compressed = outputStream.ToArray(); byte[] uncompressed = codec.Decompress(compressed, compressed.Length); Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); } }
public virtual void TestMultipleClose() { Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2" ); NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2" , testFileUrl); FilePath testFile = new FilePath(testFileUrl.GetFile()); Path testFilePath = new Path(testFile.GetAbsolutePath()); long testFileSize = testFile.Length(); Configuration conf = new Configuration(); conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID() ); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null); LineRecordReader reader = new LineRecordReader(); reader.Initialize(split, context); //noinspection StatementWithEmptyBody while (reader.NextKeyValue()) { } reader.Close(); reader.Close(); BZip2Codec codec = new BZip2Codec(); codec.SetConf(conf); ICollection <Decompressor> decompressors = new HashSet <Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.AddItem(CodecPool.GetDecompressor(codec)); } NUnit.Framework.Assert.AreEqual(10, decompressors.Count); }
public virtual void TestMoreBzip2() { JobConf jobConf = new JobConf(defaultConf); CompressionCodec bzip2 = new BZip2Codec(); ReflectionUtils.SetConf(bzip2, jobConf); localFs.Delete(workDir, true); System.Console.Out.WriteLine(ColorBrMagenta + "testMoreBzip2() using non-native CBZip2InputStream (presumably)" + ColorNormal); // copy single-member test file to HDFS string fn1 = "testConcatThenCompress.txt" + bzip2.GetDefaultExtension(); Path fnLocal1 = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn1); Path fnHDFS1 = new Path(workDir, fn1); localFs.CopyFromLocalFile(fnLocal1, fnHDFS1); // copy multiple-member test file to HDFS string fn2 = "testCompressThenConcat.txt" + bzip2.GetDefaultExtension(); Path fnLocal2 = new Path(Runtime.GetProperty("test.concat.data", "/tmp"), fn2); Path fnHDFS2 = new Path(workDir, fn2); localFs.CopyFromLocalFile(fnLocal2, fnHDFS2); FileInputFormat.SetInputPaths(jobConf, workDir); // here's first pair of BlockDecompressorStreams: FileInputStream in1 = new FileInputStream(fnLocal1.ToString()); FileInputStream in2 = new FileInputStream(fnLocal2.ToString()); NUnit.Framework.Assert.AreEqual("concat bytes available", 2567, in1.Available()); NUnit.Framework.Assert.AreEqual("concat bytes available", 3056, in2.Available()); /* * // FIXME * // The while-loop below dies at the beginning of the 2nd concatenated * // member (after 17 lines successfully read) with: * // * // java.io.IOException: bad block header * // at org.apache.hadoop.io.compress.bzip2.CBZip2InputStream.initBlock( * // CBZip2InputStream.java:527) * // * // It is not critical to concatenated-gzip support, HADOOP-6835, so it's * // simply commented out for now (and HADOOP-6852 filed). If and when the * // latter issue is resolved--perhaps by fixing an error here--this code * // should be reenabled. Note that the doMultipleBzip2BufferSizes() test * // below uses the same testCompressThenConcat.txt.bz2 file but works fine. * * CompressionInputStream cin2 = bzip2.createInputStream(in2); * LineReader in = new LineReader(cin2); * Text out = new Text(); * * int numBytes, totalBytes=0, lineNum=0; * while ((numBytes = in.readLine(out)) > 0) { ++lineNum; * totalBytes += numBytes; * } * in.close(); * assertEquals("total uncompressed bytes in concatenated test file", * 5346, totalBytes); * assertEquals("total uncompressed lines in concatenated test file", * 84, lineNum); */ // test CBZip2InputStream with lots of different input-buffer sizes DoMultipleBzip2BufferSizes(jobConf, false); }
public void HashCode([Values] BZip2Level level) { BZip2Codec codec = new BZip2Codec(level); Assert.AreNotEqual(0, codec.GetHashCode()); }
public void DefaultLevel() { BZip2Codec codec = new BZip2Codec(); Assert.AreEqual(BZip2Level.Default, codec.Level); }