Beispiel #1
0
        public virtual void TestGzipCompatibility()
        {
            Random r    = new Random();
            long   seed = r.NextLong();

            r.SetSeed(seed);
            Log.Info("seed: " + seed);
            DataOutputBuffer dflbuf = new DataOutputBuffer();
            GZIPOutputStream gzout  = new GZIPOutputStream(dflbuf);

            byte[] b = new byte[r.Next(128 * 1024 + 1)];
            r.NextBytes(b);
            gzout.Write(b);
            gzout.Close();
            DataInputBuffer gzbuf = new DataInputBuffer();

            gzbuf.Reset(dflbuf.GetData(), dflbuf.GetLength());
            Configuration conf = new Configuration();

            conf.SetBoolean(CommonConfigurationKeys.IoNativeLibAvailableKey, false);
            CompressionCodec codec = ReflectionUtils.NewInstance <GzipCodec>(conf);
            Decompressor     decom = codec.CreateDecompressor();

            NUnit.Framework.Assert.IsNotNull(decom);
            Assert.Equal(typeof(BuiltInGzipDecompressor), decom.GetType());
            InputStream gzin = codec.CreateInputStream(gzbuf, decom);

            dflbuf.Reset();
            IOUtils.CopyBytes(gzin, dflbuf, 4096);
            byte[] dflchk = Arrays.CopyOf(dflbuf.GetData(), dflbuf.GetLength());
            Assert.AssertArrayEquals(b, dflchk);
        }
Beispiel #2
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestGzipCodecWrite(bool useNative)
        {
            // Create a gzipped file using a compressor from the CodecPool,
            // and try to read it back via the regular GZIPInputStream.
            // Use native libs per the parameter
            Configuration conf = new Configuration();

            conf.SetBoolean(CommonConfigurationKeys.IoNativeLibAvailableKey, useNative);
            if (useNative)
            {
                if (!ZlibFactory.IsNativeZlibLoaded(conf))
                {
                    Log.Warn("testGzipCodecWrite skipped: native libs not loaded");
                    return;
                }
            }
            else
            {
                NUnit.Framework.Assert.IsFalse("ZlibFactory is using native libs against request"
                                               , ZlibFactory.IsNativeZlibLoaded(conf));
            }
            // Ensure that the CodecPool has a BuiltInZlibDeflater in it.
            Compressor zlibCompressor = ZlibFactory.GetZlibCompressor(conf);

            NUnit.Framework.Assert.IsNotNull("zlibCompressor is null!", zlibCompressor);
            Assert.True("ZlibFactory returned unexpected deflator", useNative
                                 ? zlibCompressor is ZlibCompressor : zlibCompressor is BuiltInZlibDeflater);
            CodecPool.ReturnCompressor(zlibCompressor);
            // Create a GZIP text file via the Compressor interface.
            CompressionCodecFactory ccf   = new CompressionCodecFactory(conf);
            CompressionCodec        codec = ccf.GetCodec(new Path("foo.gz"));

            Assert.True("Codec for .gz file is not GzipCodec", codec is GzipCodec
                        );
            string msg      = "This is the message we are going to compress.";
            string tmpDir   = Runtime.GetProperty("test.build.data", "/tmp/");
            string fileName = new Path(new Path(tmpDir), "testGzipCodecWrite.txt.gz").ToString
                                  ();
            BufferedWriter w = null;
            Compressor     gzipCompressor = CodecPool.GetCompressor(codec);

            if (null != gzipCompressor)
            {
                // If it gives us back a Compressor, we should be able to use this
                // to write files we can then read back with Java's gzip tools.
                OutputStream os = new CompressorStream(new FileOutputStream(fileName), gzipCompressor
                                                       );
                w = new BufferedWriter(new OutputStreamWriter(os));
                w.Write(msg);
                w.Close();
                CodecPool.ReturnCompressor(gzipCompressor);
                VerifyGzipFile(fileName, msg);
            }
            // Create a gzip text file via codec.getOutputStream().
            w = new BufferedWriter(new OutputStreamWriter(codec.CreateOutputStream(new FileOutputStream
                                                                                       (fileName))));
            w.Write(msg);
            w.Close();
            VerifyGzipFile(fileName, msg);
        }
Beispiel #3
0
        public virtual void TestCodecPoolAndGzipDecompressor()
        {
            // BuiltInZlibInflater should not be used as the GzipCodec decompressor.
            // Assert that this is the case.
            // Don't use native libs for this test.
            Configuration conf = new Configuration();

            conf.SetBoolean("hadoop.native.lib", false);
            NUnit.Framework.Assert.IsFalse("ZlibFactory is using native libs against request"
                                           , ZlibFactory.IsNativeZlibLoaded(conf));
            // This should give us a BuiltInZlibInflater.
            Decompressor zlibDecompressor = ZlibFactory.GetZlibDecompressor(conf);

            NUnit.Framework.Assert.IsNotNull("zlibDecompressor is null!", zlibDecompressor);
            Assert.True("ZlibFactory returned unexpected inflator", zlibDecompressor
                        is BuiltInZlibInflater);
            // its createOutputStream() just wraps the existing stream in a
            // java.util.zip.GZIPOutputStream.
            CompressionCodecFactory ccf   = new CompressionCodecFactory(conf);
            CompressionCodec        codec = ccf.GetCodec(new Path("foo.gz"));

            Assert.True("Codec for .gz file is not GzipCodec", codec is GzipCodec
                        );
            // make sure we don't get a null decompressor
            Decompressor codecDecompressor = codec.CreateDecompressor();

            if (null == codecDecompressor)
            {
                NUnit.Framework.Assert.Fail("Got null codecDecompressor");
            }
            // Asking the CodecPool for a decompressor for GzipCodec
            // should not return null
            Decompressor poolDecompressor = CodecPool.GetDecompressor(codec);

            if (null == poolDecompressor)
            {
                NUnit.Framework.Assert.Fail("Got null poolDecompressor");
            }
            // return a couple decompressors
            CodecPool.ReturnDecompressor(zlibDecompressor);
            CodecPool.ReturnDecompressor(poolDecompressor);
            Decompressor poolDecompressor2 = CodecPool.GetDecompressor(codec);

            if (poolDecompressor.GetType() == typeof(BuiltInGzipDecompressor))
            {
                if (poolDecompressor == poolDecompressor2)
                {
                    NUnit.Framework.Assert.Fail("Reused java gzip decompressor in pool");
                }
            }
            else
            {
                if (poolDecompressor != poolDecompressor2)
                {
                    NUnit.Framework.Assert.Fail("Did not reuse native gzip decompressor in pool");
                }
            }
        }
        /// <summary>
        /// Find the relevant compression codec for the codec's canonical class name
        /// or by codec alias and returns its implemetation class.
        /// </summary>
        /// <remarks>
        /// Find the relevant compression codec for the codec's canonical class name
        /// or by codec alias and returns its implemetation class.
        /// <p/>
        /// Codec aliases are case insensitive.
        /// <p/>
        /// The code alias is the short class name (without the package name).
        /// If the short class name ends with 'Codec', then there are two aliases for
        /// the codec, the complete short class name and the short class name without
        /// the 'Codec' ending. For example for the 'GzipCodec' codec class name the
        /// alias are 'gzip' and 'gzipcodec'.
        /// </remarks>
        /// <param name="codecName">the canonical class name of the codec</param>
        /// <returns>the codec class</returns>
        public virtual Type GetCodecClassByName(string codecName)
        {
            CompressionCodec codec = GetCodecByName(codecName);

            if (codec == null)
            {
                return(null);
            }
            return(codec.GetType());
        }
Beispiel #5
0
        public virtual void TestGzipLongOverflow()
        {
            Log.Info("testGzipLongOverflow");
            // Don't use native libs for this test.
            Configuration conf = new Configuration();

            conf.SetBoolean(CommonConfigurationKeys.IoNativeLibAvailableKey, false);
            NUnit.Framework.Assert.IsFalse("ZlibFactory is using native libs against request"
                                           , ZlibFactory.IsNativeZlibLoaded(conf));
            // Ensure that the CodecPool has a BuiltInZlibInflater in it.
            Decompressor zlibDecompressor = ZlibFactory.GetZlibDecompressor(conf);

            NUnit.Framework.Assert.IsNotNull("zlibDecompressor is null!", zlibDecompressor);
            Assert.True("ZlibFactory returned unexpected inflator", zlibDecompressor
                        is BuiltInZlibInflater);
            CodecPool.ReturnDecompressor(zlibDecompressor);
            // Now create a GZip text file.
            string         tmpDir = Runtime.GetProperty("test.build.data", "/tmp/");
            Path           f      = new Path(new Path(tmpDir), "testGzipLongOverflow.bin.gz");
            BufferedWriter bw     = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream
                                                                                  (new FileOutputStream(f.ToString()))));
            int Nbuf = 1024 * 4 + 1;

            char[] buf = new char[1024 * 1024];
            for (int i = 0; i < buf.Length; i++)
            {
                buf[i] = '\0';
            }
            for (int i_1 = 0; i_1 < Nbuf; i_1++)
            {
                bw.Write(buf);
            }
            bw.Close();
            // Now read it back, using the CodecPool to establish the
            // decompressor to use.
            CompressionCodecFactory ccf          = new CompressionCodecFactory(conf);
            CompressionCodec        codec        = ccf.GetCodec(f);
            Decompressor            decompressor = CodecPool.GetDecompressor(codec);
            FileSystem  fs  = FileSystem.GetLocal(conf);
            InputStream @is = fs.Open(f);

            @is = codec.CreateInputStream(@is, decompressor);
            BufferedReader br = new BufferedReader(new InputStreamReader(@is));

            for (int j = 0; j < Nbuf; j++)
            {
                int n = br.Read(buf);
                Assert.Equal("got wrong read length!", n, buf.Length);
                for (int i_2 = 0; i_2 < buf.Length; i_2++)
                {
                    Assert.Equal("got wrong byte!", buf[i_2], '\0');
                }
            }
            br.Close();
        }
        private void AddCodec(CompressionCodec codec)
        {
            string suffix = codec.GetDefaultExtension();

            codecs[((StringBuilder) new StringBuilder(suffix).Reverse()).ToString()] = codec;
            codecsByClassName[codec.GetType().GetCanonicalName()] = codec;
            string codecName = codec.GetType().Name;

            codecsByName[StringUtils.ToLowerCase(codecName)] = codec;
            if (codecName.EndsWith("Codec"))
            {
                codecName = Runtime.Substring(codecName, 0, codecName.Length - "Codec".Length
                                              );
                codecsByName[StringUtils.ToLowerCase(codecName)] = codec;
            }
        }
        /// <summary>
        /// Find the relevant compression codec for the codec's canonical class name
        /// or by codec alias.
        /// </summary>
        /// <remarks>
        /// Find the relevant compression codec for the codec's canonical class name
        /// or by codec alias.
        /// <p/>
        /// Codec aliases are case insensitive.
        /// <p/>
        /// The code alias is the short class name (without the package name).
        /// If the short class name ends with 'Codec', then there are two aliases for
        /// the codec, the complete short class name and the short class name without
        /// the 'Codec' ending. For example for the 'GzipCodec' codec class name the
        /// alias are 'gzip' and 'gzipcodec'.
        /// </remarks>
        /// <param name="codecName">the canonical class name of the codec</param>
        /// <returns>the codec object</returns>
        public virtual CompressionCodec GetCodecByName(string codecName)
        {
            if (codecsByClassName == null)
            {
                return(null);
            }
            CompressionCodec codec = GetCodecByClassName(codecName);

            if (codec == null)
            {
                // trying to get the codec by name in case the name was specified
                // instead a class
                codec = codecsByName[StringUtils.ToLowerCase(codecName)];
            }
            return(codec);
        }
Beispiel #8
0
        /// <exception cref="System.IO.IOException"/>
        private static void CodecTestWithNOCompression(Configuration conf, string codecClass
                                                       )
        {
            // Create a compressor with NO_COMPRESSION and make sure that
            // output is not compressed by comparing the size with the
            // original input
            CompressionCodec codec = null;

            ZlibFactory.SetCompressionLevel(conf, ZlibCompressor.CompressionLevel.NoCompression
                                            );
            try
            {
                codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName(codecClass
                                                                                          ), conf);
            }
            catch (TypeLoadException)
            {
                throw new IOException("Illegal codec!");
            }
            Compressor c = codec.CreateCompressor();
            // ensure same compressor placed earlier
            ByteArrayOutputStream   bos = new ByteArrayOutputStream();
            CompressionOutputStream cos = null;

            // write trivially compressable data
            byte[] b = new byte[1 << 15];
            Arrays.Fill(b, unchecked ((byte)43));
            try
            {
                cos = codec.CreateOutputStream(bos, c);
                cos.Write(b);
            }
            finally
            {
                if (cos != null)
                {
                    cos.Close();
                }
            }
            byte[] outbytes = bos.ToByteArray();
            // verify data were not compressed
            Assert.True("Compressed bytes contrary to configuration(NO_COMPRESSION)"
                        , outbytes.Length >= b.Length);
        }
Beispiel #9
0
        /// <summary>
        /// Get a
        /// <see cref="Decompressor"/>
        /// for the given
        /// <see cref="CompressionCodec"/>
        /// from the
        /// pool or a new one.
        /// </summary>
        /// <param name="codec">
        /// the <code>CompressionCodec</code> for which to get the
        /// <code>Decompressor</code>
        /// </param>
        /// <returns>
        /// <code>Decompressor</code> for the given
        /// <code>CompressionCodec</code> the pool or a new one
        /// </returns>
        public static Decompressor GetDecompressor(CompressionCodec codec)
        {
            Decompressor decompressor = Borrow(decompressorPool, codec.GetDecompressorType());

            if (decompressor == null)
            {
                decompressor = codec.CreateDecompressor();
                Log.Info("Got brand-new decompressor [" + codec.GetDefaultExtension() + "]");
            }
            else
            {
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("Got recycled decompressor");
                }
            }
            UpdateLeaseCount(decompressorCounts, decompressor, 1);
            return(decompressor);
        }
Beispiel #10
0
        /// <exception cref="System.IO.IOException"/>
        private static void GzipReinitTest(Configuration conf, CompressionCodec codec)
        {
            // Add codec to cache
            ZlibFactory.SetCompressionLevel(conf, ZlibCompressor.CompressionLevel.BestCompression
                                            );
            ZlibFactory.SetCompressionStrategy(conf, ZlibCompressor.CompressionStrategy.DefaultStrategy
                                               );
            Compressor c1 = CodecPool.GetCompressor(codec);

            CodecPool.ReturnCompressor(c1);
            // reset compressor's compression level to perform no compression
            ZlibFactory.SetCompressionLevel(conf, ZlibCompressor.CompressionLevel.NoCompression
                                            );
            Compressor c2 = CodecPool.GetCompressor(codec, conf);

            // ensure same compressor placed earlier
            Assert.True("Got mismatched ZlibCompressor", c1 == c2);
            ByteArrayOutputStream   bos = new ByteArrayOutputStream();
            CompressionOutputStream cos = null;

            // write trivially compressable data
            byte[] b = new byte[1 << 15];
            Arrays.Fill(b, unchecked ((byte)43));
            try
            {
                cos = codec.CreateOutputStream(bos, c2);
                cos.Write(b);
            }
            finally
            {
                if (cos != null)
                {
                    cos.Close();
                }
                CodecPool.ReturnCompressor(c2);
            }
            byte[] outbytes = bos.ToByteArray();
            // verify data were not compressed
            Assert.True("Compressed bytes contrary to configuration", outbytes
                        .Length >= b.Length);
        }
Beispiel #11
0
        public virtual void TestGzipCodecRead()
        {
            // Create a gzipped file and try to read it back, using a decompressor
            // from the CodecPool.
            // Don't use native libs for this test.
            Configuration conf = new Configuration();

            conf.SetBoolean(CommonConfigurationKeys.IoNativeLibAvailableKey, false);
            NUnit.Framework.Assert.IsFalse("ZlibFactory is using native libs against request"
                                           , ZlibFactory.IsNativeZlibLoaded(conf));
            // Ensure that the CodecPool has a BuiltInZlibInflater in it.
            Decompressor zlibDecompressor = ZlibFactory.GetZlibDecompressor(conf);

            NUnit.Framework.Assert.IsNotNull("zlibDecompressor is null!", zlibDecompressor);
            Assert.True("ZlibFactory returned unexpected inflator", zlibDecompressor
                        is BuiltInZlibInflater);
            CodecPool.ReturnDecompressor(zlibDecompressor);
            // Now create a GZip text file.
            string         tmpDir = Runtime.GetProperty("test.build.data", "/tmp/");
            Path           f      = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
            BufferedWriter bw     = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream
                                                                                  (new FileOutputStream(f.ToString()))));
            string msg = "This is the message in the file!";

            bw.Write(msg);
            bw.Close();
            // Now read it back, using the CodecPool to establish the
            // decompressor to use.
            CompressionCodecFactory ccf          = new CompressionCodecFactory(conf);
            CompressionCodec        codec        = ccf.GetCodec(f);
            Decompressor            decompressor = CodecPool.GetDecompressor(codec);
            FileSystem  fs  = FileSystem.GetLocal(conf);
            InputStream @is = fs.Open(f);

            @is = codec.CreateInputStream(@is, decompressor);
            BufferedReader br   = new BufferedReader(new InputStreamReader(@is));
            string         line = br.ReadLine();

            Assert.Equal("Didn't get the same message back!", msg, line);
            br.Close();
        }
Beispiel #12
0
        /// <summary>
        /// Get a
        /// <see cref="Compressor"/>
        /// for the given
        /// <see cref="CompressionCodec"/>
        /// from the
        /// pool or a new one.
        /// </summary>
        /// <param name="codec">
        /// the <code>CompressionCodec</code> for which to get the
        /// <code>Compressor</code>
        /// </param>
        /// <param name="conf">the <code>Configuration</code> object which contains confs for creating or reinit the compressor
        ///     </param>
        /// <returns>
        /// <code>Compressor</code> for the given
        /// <code>CompressionCodec</code> from the pool or a new one
        /// </returns>
        public static Compressor GetCompressor(CompressionCodec codec, Configuration conf
                                               )
        {
            Compressor compressor = Borrow(compressorPool, codec.GetCompressorType());

            if (compressor == null)
            {
                compressor = codec.CreateCompressor();
                Log.Info("Got brand-new compressor [" + codec.GetDefaultExtension() + "]");
            }
            else
            {
                compressor.Reinit(conf);
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("Got recycled compressor");
                }
            }
            UpdateLeaseCount(compressorCounts, compressor, 1);
            return(compressor);
        }
Beispiel #13
0
        /// <summary>Write infLen bytes (deflated) to file in test dir using codec.</summary>
        /// <remarks>
        /// Write infLen bytes (deflated) to file in test dir using codec.
        /// Records are of the form
        /// &lt;i&gt;&lt;b64 rand&gt;&lt;i+i&gt;&lt;b64 rand&gt;
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        private static Path WriteSplitTestFile(FileSystem fs, Random rand, CompressionCodec
                                               codec, long infLen)
        {
            int  RecSize = 1024;
            Path wd      = new Path(new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified
                                        (fs), codec.GetType().Name);
            Path file = new Path(wd, "test" + codec.GetDefaultExtension());

            byte[]           b    = new byte[RecSize];
            Base64           b64  = new Base64(0, null);
            DataOutputStream fout = null;
            Compressor       cmp  = CodecPool.GetCompressor(codec);

            try
            {
                fout = new DataOutputStream(codec.CreateOutputStream(fs.Create(file, true), cmp));
                DataOutputBuffer dob = new DataOutputBuffer(RecSize * 4 / 3 + 4);
                int seq = 0;
                while (infLen > 0)
                {
                    rand.NextBytes(b);
                    byte[] b64enc = b64.Encode(b);
                    // ensures rand printable, no LF
                    dob.Reset();
                    dob.WriteInt(seq);
                    System.Array.Copy(dob.GetData(), 0, b64enc, 0, dob.GetLength());
                    fout.Write(b64enc);
                    fout.Write('\n');
                    ++seq;
                    infLen -= b64enc.Length;
                }
                Log.Info("Wrote " + seq + " records to " + file);
            }
            finally
            {
                IOUtils.Cleanup(Log, fout);
                CodecPool.ReturnCompressor(cmp);
            }
            return(file);
        }
        /// <summary>
        /// Find the relevant compression codec for the given file based on its
        /// filename suffix.
        /// </summary>
        /// <param name="file">the filename to check</param>
        /// <returns>the codec object</returns>
        public virtual CompressionCodec GetCodec(Path file)
        {
            CompressionCodec result = null;

            if (codecs != null)
            {
                string filename         = file.GetName();
                string reversedFilename = ((StringBuilder) new StringBuilder(filename).Reverse()).
                                          ToString();
                SortedDictionary <string, CompressionCodec> subMap = codecs.HeadMap(reversedFilename
                                                                                    );
                if (!subMap.IsEmpty())
                {
                    string potentialSuffix = subMap.LastKey();
                    if (reversedFilename.StartsWith(potentialSuffix))
                    {
                        result = codecs[potentialSuffix];
                    }
                }
            }
            return(result);
        }
Beispiel #15
0
        /// <exception cref="System.IO.IOException"/>
        internal virtual void GzipConcatTest(Configuration conf, Type decomClass)
        {
            Random r    = new Random();
            long   seed = r.NextLong();

            r.SetSeed(seed);
            Log.Info(decomClass + " seed: " + seed);
            int Concat = r.Next(4) + 3;
            int Buflen = 128 * 1024;
            DataOutputBuffer dflbuf = new DataOutputBuffer();
            DataOutputBuffer chkbuf = new DataOutputBuffer();

            byte[] b = new byte[Buflen];
            for (int i = 0; i < Concat; ++i)
            {
                GZIPOutputStream gzout = new GZIPOutputStream(dflbuf);
                r.NextBytes(b);
                int len = r.Next(Buflen);
                int off = r.Next(Buflen - len);
                chkbuf.Write(b, off, len);
                gzout.Write(b, off, len);
                gzout.Close();
            }
            byte[]           chk   = Arrays.CopyOf(chkbuf.GetData(), chkbuf.GetLength());
            CompressionCodec codec = ReflectionUtils.NewInstance <GzipCodec>(conf);
            Decompressor     decom = codec.CreateDecompressor();

            NUnit.Framework.Assert.IsNotNull(decom);
            Assert.Equal(decomClass, decom.GetType());
            DataInputBuffer gzbuf = new DataInputBuffer();

            gzbuf.Reset(dflbuf.GetData(), dflbuf.GetLength());
            InputStream gzin = codec.CreateInputStream(gzbuf, decom);

            dflbuf.Reset();
            IOUtils.CopyBytes(gzin, dflbuf, 4096);
            byte[] dflchk = Arrays.CopyOf(dflbuf.GetData(), dflbuf.GetLength());
            Assert.AssertArrayEquals(chk, dflchk);
        }
Beispiel #16
0
            /// <summary>Create an output stream with a codec taken from the global CodecPool.</summary>
            /// <param name="codec">The codec to use to create the output stream.</param>
            /// <param name="conf">The configuration to use if we need to create a new codec.</param>
            /// <param name="out">The output stream to wrap.</param>
            /// <returns>The new output stream</returns>
            /// <exception cref="System.IO.IOException"/>
            internal static CompressionOutputStream CreateOutputStreamWithCodecPool(CompressionCodec
                                                                                    codec, Configuration conf, OutputStream @out)
            {
                Compressor compressor          = CodecPool.GetCompressor(codec, conf);
                CompressionOutputStream stream = null;

                try
                {
                    stream = codec.CreateOutputStream(@out, compressor);
                }
                finally
                {
                    if (stream == null)
                    {
                        CodecPool.ReturnCompressor(compressor);
                    }
                    else
                    {
                        stream.SetTrackedCompressor(compressor);
                    }
                }
                return(stream);
            }
Beispiel #17
0
            /// <summary>Create an input stream with a codec taken from the global CodecPool.</summary>
            /// <param name="codec">The codec to use to create the input stream.</param>
            /// <param name="conf">The configuration to use if we need to create a new codec.</param>
            /// <param name="in">The input stream to wrap.</param>
            /// <returns>The new input stream</returns>
            /// <exception cref="System.IO.IOException"/>
            internal static CompressionInputStream CreateInputStreamWithCodecPool(CompressionCodec
                                                                                  codec, Configuration conf, InputStream @in)
            {
                Decompressor           decompressor = CodecPool.GetDecompressor(codec);
                CompressionInputStream stream       = null;

                try
                {
                    stream = codec.CreateInputStream(@in, decompressor);
                }
                finally
                {
                    if (stream == null)
                    {
                        CodecPool.ReturnDecompressor(decompressor);
                    }
                    else
                    {
                        stream.SetTrackedDecompressor(decompressor);
                    }
                }
                return(stream);
            }
Beispiel #18
0
        public static void TestFinding()
        {
            CompressionCodecFactory factory = new CompressionCodecFactory(new Configuration()
                                                                          );
            CompressionCodec codec = factory.GetCodec(new Path("/tmp/foo.bar"));

            Assert.Equal("default factory foo codec", null, codec);
            codec = factory.GetCodecByClassName(typeof(TestCodecFactory.BarCodec).GetCanonicalName
                                                    ());
            Assert.Equal("default factory foo codec", null, codec);
            codec = factory.GetCodec(new Path("/tmp/foo.gz"));
            CheckCodec("default factory for .gz", typeof(GzipCodec), codec);
            codec = factory.GetCodecByClassName(typeof(GzipCodec).GetCanonicalName());
            CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec);
            codec = factory.GetCodecByName("gzip");
            CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec);
            codec = factory.GetCodecByName("GZIP");
            CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec);
            codec = factory.GetCodecByName("GZIPCodec");
            CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec);
            codec = factory.GetCodecByName("gzipcodec");
            CheckCodec("default factory for gzip codec", typeof(GzipCodec), codec);
            Type klass = factory.GetCodecClassByName("gzipcodec");

            Assert.Equal(typeof(GzipCodec), klass);
            codec = factory.GetCodec(new Path("/tmp/foo.bz2"));
            CheckCodec("default factory for .bz2", typeof(BZip2Codec), codec);
            codec = factory.GetCodecByClassName(typeof(BZip2Codec).GetCanonicalName());
            CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec);
            codec = factory.GetCodecByName("bzip2");
            CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec);
            codec = factory.GetCodecByName("bzip2codec");
            CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec);
            codec = factory.GetCodecByName("BZIP2");
            CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec);
            codec = factory.GetCodecByName("BZIP2CODEC");
            CheckCodec("default factory for bzip2 codec", typeof(BZip2Codec), codec);
            codec = factory.GetCodecByClassName(typeof(DeflateCodec).GetCanonicalName());
            CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec);
            codec = factory.GetCodecByName("deflate");
            CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec);
            codec = factory.GetCodecByName("deflatecodec");
            CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec);
            codec = factory.GetCodecByName("DEFLATE");
            CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec);
            codec = factory.GetCodecByName("DEFLATECODEC");
            CheckCodec("default factory for deflate codec", typeof(DeflateCodec), codec);
            factory = SetClasses(new Type[0]);
            // gz, bz2, snappy, lz4 are picked up by service loader, but bar isn't
            codec = factory.GetCodec(new Path("/tmp/foo.bar"));
            Assert.Equal("empty factory bar codec", null, codec);
            codec = factory.GetCodecByClassName(typeof(TestCodecFactory.BarCodec).GetCanonicalName
                                                    ());
            Assert.Equal("empty factory bar codec", null, codec);
            codec = factory.GetCodec(new Path("/tmp/foo.gz"));
            CheckCodec("empty factory gz codec", typeof(GzipCodec), codec);
            codec = factory.GetCodecByClassName(typeof(GzipCodec).GetCanonicalName());
            CheckCodec("empty factory gz codec", typeof(GzipCodec), codec);
            codec = factory.GetCodec(new Path("/tmp/foo.bz2"));
            CheckCodec("empty factory for .bz2", typeof(BZip2Codec), codec);
            codec = factory.GetCodecByClassName(typeof(BZip2Codec).GetCanonicalName());
            CheckCodec("empty factory for bzip2 codec", typeof(BZip2Codec), codec);
            codec = factory.GetCodec(new Path("/tmp/foo.snappy"));
            CheckCodec("empty factory snappy codec", typeof(SnappyCodec), codec);
            codec = factory.GetCodecByClassName(typeof(SnappyCodec).GetCanonicalName());
            CheckCodec("empty factory snappy codec", typeof(SnappyCodec), codec);
            codec = factory.GetCodec(new Path("/tmp/foo.lz4"));
            CheckCodec("empty factory lz4 codec", typeof(Lz4Codec), codec);
            codec = factory.GetCodecByClassName(typeof(Lz4Codec).GetCanonicalName());
            CheckCodec("empty factory lz4 codec", typeof(Lz4Codec), codec);
            factory = SetClasses(new Type[] { typeof(TestCodecFactory.BarCodec), typeof(TestCodecFactory.FooCodec
                                                                                        ), typeof(TestCodecFactory.FooBarCodec) });
            codec   = factory.GetCodec(new Path("/tmp/.foo.bar.gz"));
            CheckCodec("full factory gz codec", typeof(GzipCodec), codec);
            codec = factory.GetCodecByClassName(typeof(GzipCodec).GetCanonicalName());
            CheckCodec("full codec gz codec", typeof(GzipCodec), codec);
            codec = factory.GetCodec(new Path("/tmp/foo.bz2"));
            CheckCodec("full factory for .bz2", typeof(BZip2Codec), codec);
            codec = factory.GetCodecByClassName(typeof(BZip2Codec).GetCanonicalName());
            CheckCodec("full codec bzip2 codec", typeof(BZip2Codec), codec);
            codec = factory.GetCodec(new Path("/tmp/foo.bar"));
            CheckCodec("full factory bar codec", typeof(TestCodecFactory.BarCodec), codec);
            codec = factory.GetCodecByClassName(typeof(TestCodecFactory.BarCodec).GetCanonicalName
                                                    ());
            CheckCodec("full factory bar codec", typeof(TestCodecFactory.BarCodec), codec);
            codec = factory.GetCodecByName("bar");
            CheckCodec("full factory bar codec", typeof(TestCodecFactory.BarCodec), codec);
            codec = factory.GetCodecByName("BAR");
            CheckCodec("full factory bar codec", typeof(TestCodecFactory.BarCodec), codec);
            codec = factory.GetCodec(new Path("/tmp/foo/baz.foo.bar"));
            CheckCodec("full factory foo bar codec", typeof(TestCodecFactory.FooBarCodec), codec
                       );
            codec = factory.GetCodecByClassName(typeof(TestCodecFactory.FooBarCodec).GetCanonicalName
                                                    ());
            CheckCodec("full factory foo bar codec", typeof(TestCodecFactory.FooBarCodec), codec
                       );
            codec = factory.GetCodecByName("foobar");
            CheckCodec("full factory foo bar codec", typeof(TestCodecFactory.FooBarCodec), codec
                       );
            codec = factory.GetCodecByName("FOOBAR");
            CheckCodec("full factory foo bar codec", typeof(TestCodecFactory.FooBarCodec), codec
                       );
            codec = factory.GetCodec(new Path("/tmp/foo.foo"));
            CheckCodec("full factory foo codec", typeof(TestCodecFactory.FooCodec), codec);
            codec = factory.GetCodecByClassName(typeof(TestCodecFactory.FooCodec).GetCanonicalName
                                                    ());
            CheckCodec("full factory foo codec", typeof(TestCodecFactory.FooCodec), codec);
            codec = factory.GetCodecByName("foo");
            CheckCodec("full factory foo codec", typeof(TestCodecFactory.FooCodec), codec);
            codec = factory.GetCodecByName("FOO");
            CheckCodec("full factory foo codec", typeof(TestCodecFactory.FooCodec), codec);
            factory = SetClasses(new Type[] { typeof(TestCodecFactory.NewGzipCodec) });
            codec   = factory.GetCodec(new Path("/tmp/foo.gz"));
            CheckCodec("overridden factory for .gz", typeof(TestCodecFactory.NewGzipCodec), codec
                       );
            codec = factory.GetCodecByClassName(typeof(TestCodecFactory.NewGzipCodec).GetCanonicalName
                                                    ());
            CheckCodec("overridden factory for gzip codec", typeof(TestCodecFactory.NewGzipCodec
                                                                   ), codec);
            Configuration conf = new Configuration();

            conf.Set(CommonConfigurationKeys.IoCompressionCodecsKey, "   org.apache.hadoop.io.compress.GzipCodec   , "
                     + "    org.apache.hadoop.io.compress.DefaultCodec  , " + " org.apache.hadoop.io.compress.BZip2Codec   "
                     );
            try
            {
                CompressionCodecFactory.GetCodecClasses(conf);
            }
            catch (ArgumentException)
            {
                Fail("IllegalArgumentException is unexpected");
            }
        }
Beispiel #19
0
 /// <summary>
 /// Return the number of leased
 /// <see cref="Decompressor"/>
 /// s for this
 /// <see cref="CompressionCodec"/>
 /// </summary>
 public static int GetLeasedDecompressorsCount(CompressionCodec codec)
 {
     return((codec == null) ? 0 : GetLeaseCount(decompressorCounts, codec.GetDecompressorType
                                                    ()));
 }
        /// <summary>A little test program.</summary>
        /// <param name="args"/>
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Configuration conf = new Configuration();

            Org.Apache.Hadoop.IO.Compress.CompressionCodecFactory factory = new Org.Apache.Hadoop.IO.Compress.CompressionCodecFactory
                                                                                (conf);
            bool encode = false;

            for (int i = 0; i < args.Length; ++i)
            {
                if ("-in".Equals(args[i]))
                {
                    encode = true;
                }
                else
                {
                    if ("-out".Equals(args[i]))
                    {
                        encode = false;
                    }
                    else
                    {
                        CompressionCodec codec = factory.GetCodec(new Path(args[i]));
                        if (codec == null)
                        {
                            System.Console.Out.WriteLine("Codec for " + args[i] + " not found.");
                        }
                        else
                        {
                            if (encode)
                            {
                                CompressionOutputStream @out = null;
                                InputStream             @in  = null;
                                try
                                {
                                    @out = codec.CreateOutputStream(new FileOutputStream(args[i]));
                                    byte[] buffer     = new byte[100];
                                    string inFilename = RemoveSuffix(args[i], codec.GetDefaultExtension());
                                    @in = new FileInputStream(inFilename);
                                    int len = @in.Read(buffer);
                                    while (len > 0)
                                    {
                                        @out.Write(buffer, 0, len);
                                        len = @in.Read(buffer);
                                    }
                                }
                                finally
                                {
                                    if (@out != null)
                                    {
                                        @out.Close();
                                    }
                                    if (@in != null)
                                    {
                                        @in.Close();
                                    }
                                }
                            }
                            else
                            {
                                CompressionInputStream @in = null;
                                try
                                {
                                    @in = codec.CreateInputStream(new FileInputStream(args[i]));
                                    byte[] buffer = new byte[100];
                                    int    len    = @in.Read(buffer);
                                    while (len > 0)
                                    {
                                        System.Console.Out.Write(buffer, 0, len);
                                        len = @in.Read(buffer);
                                    }
                                }
                                finally
                                {
                                    if (@in != null)
                                    {
                                        @in.Close();
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
Beispiel #21
0
        /// <exception cref="System.IO.IOException"/>
        private static void CreateMapFile(Configuration conf, FileSystem fs, Path path, CompressionCodec
                                          codec, SequenceFile.CompressionType type, int records)
        {
            MapFile.Writer writer = new MapFile.Writer(conf, path, MapFile.Writer.KeyClass(typeof(
                                                                                               Text)), MapFile.Writer.ValueClass(typeof(Text)), MapFile.Writer.Compression(type
                                                                                                                                                                           , codec));
            Text key = new Text();

            for (int j = 0; j < records; j++)
            {
                key.Set(string.Format("%03d", j));
                writer.Append(key, key);
            }
            writer.Close();
        }
Beispiel #22
0
        /// <exception cref="System.IO.IOException"/>
        private static void CodecTest(Configuration conf, int seed, int count, string codecClass
                                      )
        {
            // Create the codec
            CompressionCodec codec = null;

            try
            {
                codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName(codecClass
                                                                                          ), conf);
            }
            catch (TypeLoadException)
            {
                throw new IOException("Illegal codec!");
            }
            Log.Info("Created a Codec object of type: " + codecClass);
            // Generate data
            DataOutputBuffer data = new DataOutputBuffer();

            RandomDatum.Generator generator = new RandomDatum.Generator(seed);
            for (int i = 0; i < count; ++i)
            {
                generator.Next();
                RandomDatum key   = generator.GetKey();
                RandomDatum value = generator.GetValue();
                key.Write(data);
                value.Write(data);
            }
            Log.Info("Generated " + count + " records");
            // Compress data
            DataOutputBuffer        compressedDataBuffer = new DataOutputBuffer();
            CompressionOutputStream deflateFilter        = codec.CreateOutputStream(compressedDataBuffer
                                                                                    );
            DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter
                                                                                        ));

            deflateOut.Write(data.GetData(), 0, data.GetLength());
            deflateOut.Flush();
            deflateFilter.Finish();
            Log.Info("Finished compressing data");
            // De-compress data
            DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();

            deCompressedDataBuffer.Reset(compressedDataBuffer.GetData(), 0, compressedDataBuffer
                                         .GetLength());
            CompressionInputStream inflateFilter = codec.CreateInputStream(deCompressedDataBuffer
                                                                           );
            DataInputStream inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter
                                                                                    ));
            // Check
            DataInputBuffer originalData = new DataInputBuffer();

            originalData.Reset(data.GetData(), 0, data.GetLength());
            DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData
                                                                                     ));

            for (int i_1 = 0; i_1 < count; ++i_1)
            {
                RandomDatum k1 = new RandomDatum();
                RandomDatum v1 = new RandomDatum();
                k1.ReadFields(originalIn);
                v1.ReadFields(originalIn);
                RandomDatum k2 = new RandomDatum();
                RandomDatum v2 = new RandomDatum();
                k2.ReadFields(inflateIn);
                v2.ReadFields(inflateIn);
                Assert.True("original and compressed-then-decompressed-output not equal"
                            , k1.Equals(k2) && v1.Equals(v2));
                // original and compressed-then-decompressed-output have the same hashCode
                IDictionary <RandomDatum, string> m = new Dictionary <RandomDatum, string>();
                m[k1] = k1.ToString();
                m[v1] = v1.ToString();
                string result = m[k2];
                Assert.Equal("k1 and k2 hashcode not equal", result, k1.ToString
                                 ());
                result = m[v2];
                Assert.Equal("v1 and v2 hashcode not equal", result, v1.ToString
                                 ());
            }
            // De-compress data byte-at-a-time
            originalData.Reset(data.GetData(), 0, data.GetLength());
            deCompressedDataBuffer.Reset(compressedDataBuffer.GetData(), 0, compressedDataBuffer
                                         .GetLength());
            inflateFilter = codec.CreateInputStream(deCompressedDataBuffer);
            // Check
            originalIn = new DataInputStream(new BufferedInputStream(originalData));
            int expected;

            do
            {
                expected = originalIn.Read();
                Assert.Equal("Inflated stream read by byte does not match", expected
                             , inflateFilter.Read());
            }while (expected != -1);
            Log.Info("SUCCESS! Completed checking " + count + " records");
        }
Beispiel #23
0
 private static void CheckCodec(string msg, Type expected, CompressionCodec actual
                                )
 {
     Assert.Equal(msg + " unexpected codec found", expected.FullName
                  , actual.GetType().FullName);
 }
Beispiel #24
0
 public static Compressor GetCompressor(CompressionCodec codec)
 {
     return(GetCompressor(codec, null));
 }