public virtual void TestGzipCompatibility() { Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); Log.Info("seed: " + seed); DataOutputBuffer dflbuf = new DataOutputBuffer(); GZIPOutputStream gzout = new GZIPOutputStream(dflbuf); byte[] b = new byte[r.Next(128 * 1024 + 1)]; r.NextBytes(b); gzout.Write(b); gzout.Close(); DataInputBuffer gzbuf = new DataInputBuffer(); gzbuf.Reset(dflbuf.GetData(), dflbuf.GetLength()); Configuration conf = new Configuration(); conf.SetBoolean(CommonConfigurationKeys.IoNativeLibAvailableKey, false); CompressionCodec codec = ReflectionUtils.NewInstance <GzipCodec>(conf); Decompressor decom = codec.CreateDecompressor(); NUnit.Framework.Assert.IsNotNull(decom); Assert.Equal(typeof(BuiltInGzipDecompressor), decom.GetType()); InputStream gzin = codec.CreateInputStream(gzbuf, decom); dflbuf.Reset(); IOUtils.CopyBytes(gzin, dflbuf, 4096); byte[] dflchk = Arrays.CopyOf(dflbuf.GetData(), dflbuf.GetLength()); Assert.AssertArrayEquals(b, dflchk); }
/// <exception cref="System.IO.IOException"/> public virtual void TestGzipCodecWrite(bool useNative) { // Create a gzipped file using a compressor from the CodecPool, // and try to read it back via the regular GZIPInputStream. // Use native libs per the parameter Configuration conf = new Configuration(); conf.SetBoolean(CommonConfigurationKeys.IoNativeLibAvailableKey, useNative); if (useNative) { if (!ZlibFactory.IsNativeZlibLoaded(conf)) { Log.Warn("testGzipCodecWrite skipped: native libs not loaded"); return; } } else { NUnit.Framework.Assert.IsFalse("ZlibFactory is using native libs against request" , ZlibFactory.IsNativeZlibLoaded(conf)); } // Ensure that the CodecPool has a BuiltInZlibDeflater in it. Compressor zlibCompressor = ZlibFactory.GetZlibCompressor(conf); NUnit.Framework.Assert.IsNotNull("zlibCompressor is null!", zlibCompressor); Assert.True("ZlibFactory returned unexpected deflator", useNative ? zlibCompressor is ZlibCompressor : zlibCompressor is BuiltInZlibDeflater); CodecPool.ReturnCompressor(zlibCompressor); // Create a GZIP text file via the Compressor interface. CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.GetCodec(new Path("foo.gz")); Assert.True("Codec for .gz file is not GzipCodec", codec is GzipCodec ); string msg = "This is the message we are going to compress."; string tmpDir = Runtime.GetProperty("test.build.data", "/tmp/"); string fileName = new Path(new Path(tmpDir), "testGzipCodecWrite.txt.gz").ToString (); BufferedWriter w = null; Compressor gzipCompressor = CodecPool.GetCompressor(codec); if (null != gzipCompressor) { // If it gives us back a Compressor, we should be able to use this // to write files we can then read back with Java's gzip tools. OutputStream os = new CompressorStream(new FileOutputStream(fileName), gzipCompressor ); w = new BufferedWriter(new OutputStreamWriter(os)); w.Write(msg); w.Close(); CodecPool.ReturnCompressor(gzipCompressor); VerifyGzipFile(fileName, msg); } // Create a gzip text file via codec.getOutputStream(). w = new BufferedWriter(new OutputStreamWriter(codec.CreateOutputStream(new FileOutputStream (fileName)))); w.Write(msg); w.Close(); VerifyGzipFile(fileName, msg); }
public virtual void TestCodecPoolAndGzipDecompressor() { // BuiltInZlibInflater should not be used as the GzipCodec decompressor. // Assert that this is the case. // Don't use native libs for this test. Configuration conf = new Configuration(); conf.SetBoolean("hadoop.native.lib", false); NUnit.Framework.Assert.IsFalse("ZlibFactory is using native libs against request" , ZlibFactory.IsNativeZlibLoaded(conf)); // This should give us a BuiltInZlibInflater. Decompressor zlibDecompressor = ZlibFactory.GetZlibDecompressor(conf); NUnit.Framework.Assert.IsNotNull("zlibDecompressor is null!", zlibDecompressor); Assert.True("ZlibFactory returned unexpected inflator", zlibDecompressor is BuiltInZlibInflater); // its createOutputStream() just wraps the existing stream in a // java.util.zip.GZIPOutputStream. CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.GetCodec(new Path("foo.gz")); Assert.True("Codec for .gz file is not GzipCodec", codec is GzipCodec ); // make sure we don't get a null decompressor Decompressor codecDecompressor = codec.CreateDecompressor(); if (null == codecDecompressor) { NUnit.Framework.Assert.Fail("Got null codecDecompressor"); } // Asking the CodecPool for a decompressor for GzipCodec // should not return null Decompressor poolDecompressor = CodecPool.GetDecompressor(codec); if (null == poolDecompressor) { NUnit.Framework.Assert.Fail("Got null poolDecompressor"); } // return a couple decompressors CodecPool.ReturnDecompressor(zlibDecompressor); CodecPool.ReturnDecompressor(poolDecompressor); Decompressor poolDecompressor2 = CodecPool.GetDecompressor(codec); if (poolDecompressor.GetType() == typeof(BuiltInGzipDecompressor)) { if (poolDecompressor == poolDecompressor2) { NUnit.Framework.Assert.Fail("Reused java gzip decompressor in pool"); } } else { if (poolDecompressor != poolDecompressor2) { NUnit.Framework.Assert.Fail("Did not reuse native gzip decompressor in pool"); } } }
/// <summary> /// Find the relevant compression codec for the codec's canonical class name /// or by codec alias and returns its implemetation class. /// </summary> /// <remarks> /// Find the relevant compression codec for the codec's canonical class name /// or by codec alias and returns its implemetation class. /// <p/> /// Codec aliases are case insensitive. /// <p/> /// The code alias is the short class name (without the package name). /// If the short class name ends with 'Codec', then there are two aliases for /// the codec, the complete short class name and the short class name without /// the 'Codec' ending. For example for the 'GzipCodec' codec class name the /// alias are 'gzip' and 'gzipcodec'. /// </remarks> /// <param name="codecName">the canonical class name of the codec</param> /// <returns>the codec class</returns> public virtual Type GetCodecClassByName(string codecName) { CompressionCodec codec = GetCodecByName(codecName); if (codec == null) { return(null); } return(codec.GetType()); }
public virtual void TestGzipLongOverflow() { Log.Info("testGzipLongOverflow"); // Don't use native libs for this test. Configuration conf = new Configuration(); conf.SetBoolean(CommonConfigurationKeys.IoNativeLibAvailableKey, false); NUnit.Framework.Assert.IsFalse("ZlibFactory is using native libs against request" , ZlibFactory.IsNativeZlibLoaded(conf)); // Ensure that the CodecPool has a BuiltInZlibInflater in it. Decompressor zlibDecompressor = ZlibFactory.GetZlibDecompressor(conf); NUnit.Framework.Assert.IsNotNull("zlibDecompressor is null!", zlibDecompressor); Assert.True("ZlibFactory returned unexpected inflator", zlibDecompressor is BuiltInZlibInflater); CodecPool.ReturnDecompressor(zlibDecompressor); // Now create a GZip text file. string tmpDir = Runtime.GetProperty("test.build.data", "/tmp/"); Path f = new Path(new Path(tmpDir), "testGzipLongOverflow.bin.gz"); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream (new FileOutputStream(f.ToString())))); int Nbuf = 1024 * 4 + 1; char[] buf = new char[1024 * 1024]; for (int i = 0; i < buf.Length; i++) { buf[i] = '\0'; } for (int i_1 = 0; i_1 < Nbuf; i_1++) { bw.Write(buf); } bw.Close(); // Now read it back, using the CodecPool to establish the // decompressor to use. CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.GetCodec(f); Decompressor decompressor = CodecPool.GetDecompressor(codec); FileSystem fs = FileSystem.GetLocal(conf); InputStream @is = fs.Open(f); @is = codec.CreateInputStream(@is, decompressor); BufferedReader br = new BufferedReader(new InputStreamReader(@is)); for (int j = 0; j < Nbuf; j++) { int n = br.Read(buf); Assert.Equal("got wrong read length!", n, buf.Length); for (int i_2 = 0; i_2 < buf.Length; i_2++) { Assert.Equal("got wrong byte!", buf[i_2], '\0'); } } br.Close(); }
private void AddCodec(CompressionCodec codec) { string suffix = codec.GetDefaultExtension(); codecs[((StringBuilder) new StringBuilder(suffix).Reverse()).ToString()] = codec; codecsByClassName[codec.GetType().GetCanonicalName()] = codec; string codecName = codec.GetType().Name; codecsByName[StringUtils.ToLowerCase(codecName)] = codec; if (codecName.EndsWith("Codec")) { codecName = Runtime.Substring(codecName, 0, codecName.Length - "Codec".Length ); codecsByName[StringUtils.ToLowerCase(codecName)] = codec; } }
/// <summary> /// Find the relevant compression codec for the codec's canonical class name /// or by codec alias. /// </summary> /// <remarks> /// Find the relevant compression codec for the codec's canonical class name /// or by codec alias. /// <p/> /// Codec aliases are case insensitive. /// <p/> /// The code alias is the short class name (without the package name). /// If the short class name ends with 'Codec', then there are two aliases for /// the codec, the complete short class name and the short class name without /// the 'Codec' ending. For example for the 'GzipCodec' codec class name the /// alias are 'gzip' and 'gzipcodec'. /// </remarks> /// <param name="codecName">the canonical class name of the codec</param> /// <returns>the codec object</returns> public virtual CompressionCodec GetCodecByName(string codecName) { if (codecsByClassName == null) { return(null); } CompressionCodec codec = GetCodecByClassName(codecName); if (codec == null) { // trying to get the codec by name in case the name was specified // instead a class codec = codecsByName[StringUtils.ToLowerCase(codecName)]; } return(codec); }
/// <exception cref="System.IO.IOException"/> private static void CodecTestWithNOCompression(Configuration conf, string codecClass ) { // Create a compressor with NO_COMPRESSION and make sure that // output is not compressed by comparing the size with the // original input CompressionCodec codec = null; ZlibFactory.SetCompressionLevel(conf, ZlibCompressor.CompressionLevel.NoCompression ); try { codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName(codecClass ), conf); } catch (TypeLoadException) { throw new IOException("Illegal codec!"); } Compressor c = codec.CreateCompressor(); // ensure same compressor placed earlier ByteArrayOutputStream bos = new ByteArrayOutputStream(); CompressionOutputStream cos = null; // write trivially compressable data byte[] b = new byte[1 << 15]; Arrays.Fill(b, unchecked ((byte)43)); try { cos = codec.CreateOutputStream(bos, c); cos.Write(b); } finally { if (cos != null) { cos.Close(); } } byte[] outbytes = bos.ToByteArray(); // verify data were not compressed Assert.True("Compressed bytes contrary to configuration(NO_COMPRESSION)" , outbytes.Length >= b.Length); }
/// <summary> /// Get a /// <see cref="Decompressor"/> /// for the given /// <see cref="CompressionCodec"/> /// from the /// pool or a new one. /// </summary> /// <param name="codec"> /// the <code>CompressionCodec</code> for which to get the /// <code>Decompressor</code> /// </param> /// <returns> /// <code>Decompressor</code> for the given /// <code>CompressionCodec</code> the pool or a new one /// </returns> public static Decompressor GetDecompressor(CompressionCodec codec) { Decompressor decompressor = Borrow(decompressorPool, codec.GetDecompressorType()); if (decompressor == null) { decompressor = codec.CreateDecompressor(); Log.Info("Got brand-new decompressor [" + codec.GetDefaultExtension() + "]"); } else { if (Log.IsDebugEnabled()) { Log.Debug("Got recycled decompressor"); } } UpdateLeaseCount(decompressorCounts, decompressor, 1); return(decompressor); }
/// <exception cref="System.IO.IOException"/> private static void GzipReinitTest(Configuration conf, CompressionCodec codec) { // Add codec to cache ZlibFactory.SetCompressionLevel(conf, ZlibCompressor.CompressionLevel.BestCompression ); ZlibFactory.SetCompressionStrategy(conf, ZlibCompressor.CompressionStrategy.DefaultStrategy ); Compressor c1 = CodecPool.GetCompressor(codec); CodecPool.ReturnCompressor(c1); // reset compressor's compression level to perform no compression ZlibFactory.SetCompressionLevel(conf, ZlibCompressor.CompressionLevel.NoCompression ); Compressor c2 = CodecPool.GetCompressor(codec, conf); // ensure same compressor placed earlier Assert.True("Got mismatched ZlibCompressor", c1 == c2); ByteArrayOutputStream bos = new ByteArrayOutputStream(); CompressionOutputStream cos = null; // write trivially compressable data byte[] b = new byte[1 << 15]; Arrays.Fill(b, unchecked ((byte)43)); try { cos = codec.CreateOutputStream(bos, c2); cos.Write(b); } finally { if (cos != null) { cos.Close(); } CodecPool.ReturnCompressor(c2); } byte[] outbytes = bos.ToByteArray(); // verify data were not compressed Assert.True("Compressed bytes contrary to configuration", outbytes .Length >= b.Length); }
public virtual void TestGzipCodecRead() { // Create a gzipped file and try to read it back, using a decompressor // from the CodecPool. // Don't use native libs for this test. Configuration conf = new Configuration(); conf.SetBoolean(CommonConfigurationKeys.IoNativeLibAvailableKey, false); NUnit.Framework.Assert.IsFalse("ZlibFactory is using native libs against request" , ZlibFactory.IsNativeZlibLoaded(conf)); // Ensure that the CodecPool has a BuiltInZlibInflater in it. Decompressor zlibDecompressor = ZlibFactory.GetZlibDecompressor(conf); NUnit.Framework.Assert.IsNotNull("zlibDecompressor is null!", zlibDecompressor); Assert.True("ZlibFactory returned unexpected inflator", zlibDecompressor is BuiltInZlibInflater); CodecPool.ReturnDecompressor(zlibDecompressor); // Now create a GZip text file. string tmpDir = Runtime.GetProperty("test.build.data", "/tmp/"); Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz"); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream (new FileOutputStream(f.ToString())))); string msg = "This is the message in the file!"; bw.Write(msg); bw.Close(); // Now read it back, using the CodecPool to establish the // decompressor to use. CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.GetCodec(f); Decompressor decompressor = CodecPool.GetDecompressor(codec); FileSystem fs = FileSystem.GetLocal(conf); InputStream @is = fs.Open(f); @is = codec.CreateInputStream(@is, decompressor); BufferedReader br = new BufferedReader(new InputStreamReader(@is)); string line = br.ReadLine(); Assert.Equal("Didn't get the same message back!", msg, line); br.Close(); }
/// <summary> /// Get a /// <see cref="Compressor"/> /// for the given /// <see cref="CompressionCodec"/> /// from the /// pool or a new one. /// </summary> /// <param name="codec"> /// the <code>CompressionCodec</code> for which to get the /// <code>Compressor</code> /// </param> /// <param name="conf">the <code>Configuration</code> object which contains confs for creating or reinit the compressor /// </param> /// <returns> /// <code>Compressor</code> for the given /// <code>CompressionCodec</code> from the pool or a new one /// </returns> public static Compressor GetCompressor(CompressionCodec codec, Configuration conf ) { Compressor compressor = Borrow(compressorPool, codec.GetCompressorType()); if (compressor == null) { compressor = codec.CreateCompressor(); Log.Info("Got brand-new compressor [" + codec.GetDefaultExtension() + "]"); } else { compressor.Reinit(conf); if (Log.IsDebugEnabled()) { Log.Debug("Got recycled compressor"); } } UpdateLeaseCount(compressorCounts, compressor, 1); return(compressor); }
/// <summary>Write infLen bytes (deflated) to file in test dir using codec.</summary> /// <remarks> /// Write infLen bytes (deflated) to file in test dir using codec. /// Records are of the form /// <i><b64 rand><i+i><b64 rand> /// </remarks> /// <exception cref="System.IO.IOException"/> private static Path WriteSplitTestFile(FileSystem fs, Random rand, CompressionCodec codec, long infLen) { int RecSize = 1024; Path wd = new Path(new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs), codec.GetType().Name); Path file = new Path(wd, "test" + codec.GetDefaultExtension()); byte[] b = new byte[RecSize]; Base64 b64 = new Base64(0, null); DataOutputStream fout = null; Compressor cmp = CodecPool.GetCompressor(codec); try { fout = new DataOutputStream(codec.CreateOutputStream(fs.Create(file, true), cmp)); DataOutputBuffer dob = new DataOutputBuffer(RecSize * 4 / 3 + 4); int seq = 0; while (infLen > 0) { rand.NextBytes(b); byte[] b64enc = b64.Encode(b); // ensures rand printable, no LF dob.Reset(); dob.WriteInt(seq); System.Array.Copy(dob.GetData(), 0, b64enc, 0, dob.GetLength()); fout.Write(b64enc); fout.Write('\n'); ++seq; infLen -= b64enc.Length; } Log.Info("Wrote " + seq + " records to " + file); } finally { IOUtils.Cleanup(Log, fout); CodecPool.ReturnCompressor(cmp); } return(file); }
/// <summary> /// Find the relevant compression codec for the given file based on its /// filename suffix. /// </summary> /// <param name="file">the filename to check</param> /// <returns>the codec object</returns> public virtual CompressionCodec GetCodec(Path file) { CompressionCodec result = null; if (codecs != null) { string filename = file.GetName(); string reversedFilename = ((StringBuilder) new StringBuilder(filename).Reverse()). ToString(); SortedDictionary <string, CompressionCodec> subMap = codecs.HeadMap(reversedFilename ); if (!subMap.IsEmpty()) { string potentialSuffix = subMap.LastKey(); if (reversedFilename.StartsWith(potentialSuffix)) { result = codecs[potentialSuffix]; } } } return(result); }
/// <exception cref="System.IO.IOException"/> internal virtual void GzipConcatTest(Configuration conf, Type decomClass) { Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); Log.Info(decomClass + " seed: " + seed); int Concat = r.Next(4) + 3; int Buflen = 128 * 1024; DataOutputBuffer dflbuf = new DataOutputBuffer(); DataOutputBuffer chkbuf = new DataOutputBuffer(); byte[] b = new byte[Buflen]; for (int i = 0; i < Concat; ++i) { GZIPOutputStream gzout = new GZIPOutputStream(dflbuf); r.NextBytes(b); int len = r.Next(Buflen); int off = r.Next(Buflen - len); chkbuf.Write(b, off, len); gzout.Write(b, off, len); gzout.Close(); } byte[] chk = Arrays.CopyOf(chkbuf.GetData(), chkbuf.GetLength()); CompressionCodec codec = ReflectionUtils.NewInstance <GzipCodec>(conf); Decompressor decom = codec.CreateDecompressor(); NUnit.Framework.Assert.IsNotNull(decom); Assert.Equal(decomClass, decom.GetType()); DataInputBuffer gzbuf = new DataInputBuffer(); gzbuf.Reset(dflbuf.GetData(), dflbuf.GetLength()); InputStream gzin = codec.CreateInputStream(gzbuf, decom); dflbuf.Reset(); IOUtils.CopyBytes(gzin, dflbuf, 4096); byte[] dflchk = Arrays.CopyOf(dflbuf.GetData(), dflbuf.GetLength()); Assert.AssertArrayEquals(chk, dflchk); }
/// <summary>Create an output stream with a codec taken from the global CodecPool.</summary> /// <param name="codec">The codec to use to create the output stream.</param> /// <param name="conf">The configuration to use if we need to create a new codec.</param> /// <param name="out">The output stream to wrap.</param> /// <returns>The new output stream</returns> /// <exception cref="System.IO.IOException"/> internal static CompressionOutputStream CreateOutputStreamWithCodecPool(CompressionCodec codec, Configuration conf, OutputStream @out) { Compressor compressor = CodecPool.GetCompressor(codec, conf); CompressionOutputStream stream = null; try { stream = codec.CreateOutputStream(@out, compressor); } finally { if (stream == null) { CodecPool.ReturnCompressor(compressor); } else { stream.SetTrackedCompressor(compressor); } } return(stream); }
/// <summary>Create an input stream with a codec taken from the global CodecPool.</summary> /// <param name="codec">The codec to use to create the input stream.</param> /// <param name="conf">The configuration to use if we need to create a new codec.</param> /// <param name="in">The input stream to wrap.</param> /// <returns>The new input stream</returns> /// <exception cref="System.IO.IOException"/> internal static CompressionInputStream CreateInputStreamWithCodecPool(CompressionCodec codec, Configuration conf, InputStream @in) { Decompressor decompressor = CodecPool.GetDecompressor(codec); CompressionInputStream stream = null; try { stream = codec.CreateInputStream(@in, decompressor); } finally { if (stream == null) { CodecPool.ReturnDecompressor(decompressor); } else { stream.SetTrackedDecompressor(decompressor); } } return(stream); }
public static void TestFinding() { CompressionCodecFactory factory = new CompressionCodecFactory(new Configuration() ); CompressionCodec codec = factory.GetCodec(new Path("/tmp/foo.bar")); Assert.Equal("default factory foo codec", null, codec); codec = factory.GetCodecByClassName(typeof(TestCodecFactory.BarCodec).GetCanonicalName ()); Assert.Equal("default factory foo codec", null, codec); codec = factory.GetCodec(new Path("/tmp/foo.gz")); CheckCodec("default factory for .gz", typeof(GzipCodec), codec); codec = factory.GetCodecByClassName(typeof(GzipCodec).GetCanonicalName()); CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec); codec = factory.GetCodecByName("gzip"); CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec); codec = factory.GetCodecByName("GZIP"); CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec); codec = factory.GetCodecByName("GZIPCodec"); CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec); codec = factory.GetCodecByName("gzipcodec"); CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec); Type klass = factory.GetCodecClassByName("gzipcodec"); Assert.Equal(typeof(GzipCodec), klass); codec = factory.GetCodec(new Path("/tmp/foo.bz2")); CheckCodec("default factory for .bz2", typeof(BZip2Codec), codec); codec = factory.GetCodecByClassName(typeof(BZip2Codec).GetCanonicalName()); CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec); codec = factory.GetCodecByName("bzip2"); CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec); codec = factory.GetCodecByName("bzip2codec"); CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec); codec = factory.GetCodecByName("BZIP2"); CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec); codec = factory.GetCodecByName("BZIP2CODEC"); CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec); codec = factory.GetCodecByClassName(typeof(DeflateCodec).GetCanonicalName()); CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec); codec = factory.GetCodecByName("deflate"); CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec); codec = factory.GetCodecByName("deflatecodec"); CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec); codec = factory.GetCodecByName("DEFLATE"); CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec); codec = factory.GetCodecByName("DEFLATECODEC"); CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec); factory = SetClasses(new Type[0]); // gz, bz2, snappy, lz4 are picked up by service loader, but bar isn't codec = factory.GetCodec(new Path("/tmp/foo.bar")); Assert.Equal("empty factory bar codec", null, codec); codec = factory.GetCodecByClassName(typeof(TestCodecFactory.BarCodec).GetCanonicalName ()); Assert.Equal("empty factory bar codec", null, codec); codec = factory.GetCodec(new Path("/tmp/foo.gz")); CheckCodec("empty factory gz codec", typeof(GzipCodec), codec); codec = factory.GetCodecByClassName(typeof(GzipCodec).GetCanonicalName()); CheckCodec("empty factory gz codec", typeof(GzipCodec), codec); codec = factory.GetCodec(new Path("/tmp/foo.bz2")); CheckCodec("empty factory for .bz2", typeof(BZip2Codec), codec); codec = factory.GetCodecByClassName(typeof(BZip2Codec).GetCanonicalName()); CheckCodec("empty factory for bzip2 codec", typeof(BZip2Codec), codec); codec = factory.GetCodec(new Path("/tmp/foo.snappy")); CheckCodec("empty factory snappy codec", typeof(SnappyCodec), codec); codec = factory.GetCodecByClassName(typeof(SnappyCodec).GetCanonicalName()); CheckCodec("empty factory snappy codec", typeof(SnappyCodec), codec); codec = factory.GetCodec(new Path("/tmp/foo.lz4")); CheckCodec("empty factory lz4 codec", typeof(Lz4Codec), codec); codec = factory.GetCodecByClassName(typeof(Lz4Codec).GetCanonicalName()); CheckCodec("empty factory lz4 codec", typeof(Lz4Codec), codec); factory = SetClasses(new Type[] { typeof(TestCodecFactory.BarCodec), typeof(TestCodecFactory.FooCodec ), typeof(TestCodecFactory.FooBarCodec) }); codec = factory.GetCodec(new Path("/tmp/.foo.bar.gz")); CheckCodec("full factory gz codec", typeof(GzipCodec), codec); codec = factory.GetCodecByClassName(typeof(GzipCodec).GetCanonicalName()); CheckCodec("full codec gz codec", typeof(GzipCodec), codec); codec = factory.GetCodec(new Path("/tmp/foo.bz2")); CheckCodec("full factory for .bz2", typeof(BZip2Codec), codec); codec = factory.GetCodecByClassName(typeof(BZip2Codec).GetCanonicalName()); CheckCodec("full codec bzip2 codec", typeof(BZip2Codec), codec); codec = factory.GetCodec(new Path("/tmp/foo.bar")); CheckCodec("full factory bar codec", typeof(TestCodecFactory.BarCodec), codec); codec = factory.GetCodecByClassName(typeof(TestCodecFactory.BarCodec).GetCanonicalName ()); CheckCodec("full factory bar codec", typeof(TestCodecFactory.BarCodec), codec); codec = factory.GetCodecByName("bar"); CheckCodec("full factory bar codec", typeof(TestCodecFactory.BarCodec), codec); codec = factory.GetCodecByName("BAR"); CheckCodec("full factory bar codec", typeof(TestCodecFactory.BarCodec), codec); codec = factory.GetCodec(new Path("/tmp/foo/baz.foo.bar")); CheckCodec("full factory foo bar codec", typeof(TestCodecFactory.FooBarCodec), codec ); codec = factory.GetCodecByClassName(typeof(TestCodecFactory.FooBarCodec).GetCanonicalName ()); CheckCodec("full factory foo bar codec", typeof(TestCodecFactory.FooBarCodec), codec ); codec = factory.GetCodecByName("foobar"); CheckCodec("full factory foo bar codec", typeof(TestCodecFactory.FooBarCodec), codec ); codec = factory.GetCodecByName("FOOBAR"); CheckCodec("full factory foo bar codec", typeof(TestCodecFactory.FooBarCodec), codec ); codec = factory.GetCodec(new Path("/tmp/foo.foo")); CheckCodec("full factory foo codec", typeof(TestCodecFactory.FooCodec), codec); codec = factory.GetCodecByClassName(typeof(TestCodecFactory.FooCodec).GetCanonicalName ()); CheckCodec("full factory foo codec", typeof(TestCodecFactory.FooCodec), codec); codec = factory.GetCodecByName("foo"); CheckCodec("full factory foo codec", typeof(TestCodecFactory.FooCodec), codec); codec = factory.GetCodecByName("FOO"); CheckCodec("full factory foo codec", typeof(TestCodecFactory.FooCodec), codec); factory = SetClasses(new Type[] { typeof(TestCodecFactory.NewGzipCodec) }); codec = factory.GetCodec(new Path("/tmp/foo.gz")); CheckCodec("overridden factory for .gz", typeof(TestCodecFactory.NewGzipCodec), codec ); codec = factory.GetCodecByClassName(typeof(TestCodecFactory.NewGzipCodec).GetCanonicalName ()); CheckCodec("overridden factory for gzip codec", typeof(TestCodecFactory.NewGzipCodec ), codec); Configuration conf = new Configuration(); conf.Set(CommonConfigurationKeys.IoCompressionCodecsKey, " org.apache.hadoop.io.compress.GzipCodec , " + " org.apache.hadoop.io.compress.DefaultCodec , " + " org.apache.hadoop.io.compress.BZip2Codec " ); try { CompressionCodecFactory.GetCodecClasses(conf); } catch (ArgumentException) { Fail("IllegalArgumentException is unexpected"); } }
/// <summary> /// Return the number of leased /// <see cref="Decompressor"/> /// s for this /// <see cref="CompressionCodec"/> /// </summary> public static int GetLeasedDecompressorsCount(CompressionCodec codec) { return((codec == null) ? 0 : GetLeaseCount(decompressorCounts, codec.GetDecompressorType ())); }
/// <summary>A little test program.</summary> /// <param name="args"/> /// <exception cref="System.Exception"/> public static void Main(string[] args) { Configuration conf = new Configuration(); Org.Apache.Hadoop.IO.Compress.CompressionCodecFactory factory = new Org.Apache.Hadoop.IO.Compress.CompressionCodecFactory (conf); bool encode = false; for (int i = 0; i < args.Length; ++i) { if ("-in".Equals(args[i])) { encode = true; } else { if ("-out".Equals(args[i])) { encode = false; } else { CompressionCodec codec = factory.GetCodec(new Path(args[i])); if (codec == null) { System.Console.Out.WriteLine("Codec for " + args[i] + " not found."); } else { if (encode) { CompressionOutputStream @out = null; InputStream @in = null; try { @out = codec.CreateOutputStream(new FileOutputStream(args[i])); byte[] buffer = new byte[100]; string inFilename = RemoveSuffix(args[i], codec.GetDefaultExtension()); @in = new FileInputStream(inFilename); int len = @in.Read(buffer); while (len > 0) { @out.Write(buffer, 0, len); len = @in.Read(buffer); } } finally { if (@out != null) { @out.Close(); } if (@in != null) { @in.Close(); } } } else { CompressionInputStream @in = null; try { @in = codec.CreateInputStream(new FileInputStream(args[i])); byte[] buffer = new byte[100]; int len = @in.Read(buffer); while (len > 0) { System.Console.Out.Write(buffer, 0, len); len = @in.Read(buffer); } } finally { if (@in != null) { @in.Close(); } } } } } } } }
/// <exception cref="System.IO.IOException"/> private static void CreateMapFile(Configuration conf, FileSystem fs, Path path, CompressionCodec codec, SequenceFile.CompressionType type, int records) { MapFile.Writer writer = new MapFile.Writer(conf, path, MapFile.Writer.KeyClass(typeof( Text)), MapFile.Writer.ValueClass(typeof(Text)), MapFile.Writer.Compression(type , codec)); Text key = new Text(); for (int j = 0; j < records; j++) { key.Set(string.Format("%03d", j)); writer.Append(key, key); } writer.Close(); }
/// <exception cref="System.IO.IOException"/> private static void CodecTest(Configuration conf, int seed, int count, string codecClass ) { // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName(codecClass ), conf); } catch (TypeLoadException) { throw new IOException("Illegal codec!"); } Log.Info("Created a Codec object of type: " + codecClass); // Generate data DataOutputBuffer data = new DataOutputBuffer(); RandomDatum.Generator generator = new RandomDatum.Generator(seed); for (int i = 0; i < count; ++i) { generator.Next(); RandomDatum key = generator.GetKey(); RandomDatum value = generator.GetValue(); key.Write(data); value.Write(data); } Log.Info("Generated " + count + " records"); // Compress data DataOutputBuffer compressedDataBuffer = new DataOutputBuffer(); CompressionOutputStream deflateFilter = codec.CreateOutputStream(compressedDataBuffer ); DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter )); deflateOut.Write(data.GetData(), 0, data.GetLength()); deflateOut.Flush(); deflateFilter.Finish(); Log.Info("Finished compressing data"); // De-compress data DataInputBuffer deCompressedDataBuffer = new DataInputBuffer(); deCompressedDataBuffer.Reset(compressedDataBuffer.GetData(), 0, compressedDataBuffer .GetLength()); CompressionInputStream inflateFilter = codec.CreateInputStream(deCompressedDataBuffer ); DataInputStream inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter )); // Check DataInputBuffer originalData = new DataInputBuffer(); originalData.Reset(data.GetData(), 0, data.GetLength()); DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData )); for (int i_1 = 0; i_1 < count; ++i_1) { RandomDatum k1 = new RandomDatum(); RandomDatum v1 = new RandomDatum(); k1.ReadFields(originalIn); v1.ReadFields(originalIn); RandomDatum k2 = new RandomDatum(); RandomDatum v2 = new RandomDatum(); k2.ReadFields(inflateIn); v2.ReadFields(inflateIn); Assert.True("original and compressed-then-decompressed-output not equal" , k1.Equals(k2) && v1.Equals(v2)); // original and compressed-then-decompressed-output have the same hashCode IDictionary <RandomDatum, string> m = new Dictionary <RandomDatum, string>(); m[k1] = k1.ToString(); m[v1] = v1.ToString(); string result = m[k2]; Assert.Equal("k1 and k2 hashcode not equal", result, k1.ToString ()); result = m[v2]; Assert.Equal("v1 and v2 hashcode not equal", result, v1.ToString ()); } // De-compress data byte-at-a-time originalData.Reset(data.GetData(), 0, data.GetLength()); deCompressedDataBuffer.Reset(compressedDataBuffer.GetData(), 0, compressedDataBuffer .GetLength()); inflateFilter = codec.CreateInputStream(deCompressedDataBuffer); // Check originalIn = new DataInputStream(new BufferedInputStream(originalData)); int expected; do { expected = originalIn.Read(); Assert.Equal("Inflated stream read by byte does not match", expected , inflateFilter.Read()); }while (expected != -1); Log.Info("SUCCESS! Completed checking " + count + " records"); }
private static void CheckCodec(string msg, Type expected, CompressionCodec actual ) { Assert.Equal(msg + " unexpected codec found", expected.FullName , actual.GetType().FullName); }
public static Compressor GetCompressor(CompressionCodec codec) { return(GetCompressor(codec, null)); }