Exemplo n.º 1
0
            /// <exception cref="System.IO.IOException"/>
            public Compressor GetCompressor()
            {
                CompressionCodec codec = GetCodec();

                if (codec != null)
                {
                    Compressor compressor = CodecPool.GetCompressor(codec);
                    if (compressor != null)
                    {
                        if (compressor.Finished())
                        {
                            // Somebody returns the compressor to CodecPool but is still using
                            // it.
                            Log.Warn("Compressor obtained from CodecPool already finished()");
                        }
                        else
                        {
                            if (Log.IsDebugEnabled())
                            {
                                Log.Debug("Got a compressor: " + compressor.GetHashCode());
                            }
                        }
                        compressor.Reset();
                    }
                    return(compressor);
                }
                return(null);
            }
Exemplo n.º 2
0
        public virtual void TestMultipleClose()
        {
            Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2"
                                                                     );

            NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2"
                                             , testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, (string[])null);
            LineRecordReader reader = new LineRecordReader(conf, split);
            LongWritable     key    = new LongWritable();
            Text             value  = new Text();

            //noinspection StatementWithEmptyBody
            while (reader.Next(key, value))
            {
            }
            reader.Close();
            reader.Close();
            BZip2Codec codec = new BZip2Codec();

            codec.SetConf(conf);
            ICollection <Decompressor> decompressors = new HashSet <Decompressor>();

            for (int i = 0; i < 10; ++i)
            {
                decompressors.AddItem(CodecPool.GetDecompressor(codec));
            }
            NUnit.Framework.Assert.AreEqual(10, decompressors.Count);
        }
Exemplo n.º 3
0
 /// <summary>Construct an IFile Reader.</summary>
 /// <param name="conf">Configuration File</param>
 /// <param name="in">The input stream</param>
 /// <param name="length">
 /// Length of the data in the stream, including the checksum
 /// bytes.
 /// </param>
 /// <param name="codec">codec</param>
 /// <param name="readsCounter">Counter for records read from disk</param>
 /// <exception cref="System.IO.IOException"/>
 public Reader(Configuration conf, FSDataInputStream @in, long length, CompressionCodec
               codec, Counters.Counter readsCounter)
 {
     // Count records read from disk
     // Possibly decompressed stream that we read
     readRecordsCounter = readsCounter;
     checksumIn         = new IFileInputStream(@in, length, conf);
     if (codec != null)
     {
         decompressor = CodecPool.GetDecompressor(codec);
         if (decompressor != null)
         {
             this.@in = codec.CreateInputStream(checksumIn, decompressor);
         }
         else
         {
             Log.Warn("Could not obtain decompressor from CodecPool");
             this.@in = checksumIn;
         }
     }
     else
     {
         this.@in = checksumIn;
     }
     this.dataIn     = new DataInputStream(this.@in);
     this.fileLength = length;
     if (conf != null)
     {
         bufferSize = conf.GetInt("io.file.buffer.size", DefaultBufferSize);
     }
 }
Exemplo n.º 4
0
 public void ReturnDecompressor(Decompressor decompressor)
 {
     if (decompressor != null)
     {
         if (Log.IsDebugEnabled())
         {
             Log.Debug("Returned a decompressor: " + decompressor.GetHashCode());
         }
         CodecPool.ReturnDecompressor(decompressor);
     }
 }
Exemplo n.º 5
0
 public void ReturnCompressor(Compressor compressor)
 {
     if (compressor != null)
     {
         if (Log.IsDebugEnabled())
         {
             Log.Debug("Return a compressor: " + compressor.GetHashCode());
         }
         CodecPool.ReturnCompressor(compressor);
     }
 }
Exemplo n.º 6
0
        /// <exception cref="System.IO.IOException"/>
        public override void Initialize(InputSplit genericSplit, TaskAttemptContext context
                                        )
        {
            FileSplit     split = (FileSplit)genericSplit;
            Configuration job   = context.GetConfiguration();

            this.maxLineLength = job.GetInt(MaxLineLength, int.MaxValue);
            start = split.GetStart();
            end   = start + split.GetLength();
            Path file = split.GetPath();
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            CompressionCodec codec = new CompressionCodecFactory(job).GetCodec(file);

            if (null != codec)
            {
                isCompressedInput = true;
                decompressor      = CodecPool.GetDecompressor(codec);
                if (codec is SplittableCompressionCodec)
                {
                    SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream
                                                          (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock);
                    @in          = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                    start        = cIn.GetAdjustedStart();
                    end          = cIn.GetAdjustedEnd();
                    filePosition = cIn;
                }
                else
                {
                    @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, this
                                              .recordDelimiterBytes);
                    filePosition = fileIn;
                }
            }
            else
            {
                fileIn.Seek(start);
                @in = new UncompressedSplitLineReader(fileIn, job, this.recordDelimiterBytes, split
                                                      .GetLength());
                filePosition = fileIn;
            }
            // If this is not the first split, we always throw away first record
            // because we always (except the last split) read one extra line in
            // next() method.
            if (start != 0)
            {
                start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start));
            }
            this.pos = start;
        }
Exemplo n.º 7
0
        /// <exception cref="System.IO.IOException"/>
        public LineRecordReader(Configuration job, FileSplit split, byte[] recordDelimiter
                                )
        {
            this.maxLineLength = job.GetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            start = split.GetStart();
            end   = start + split.GetLength();
            Path file = split.GetPath();

            compressionCodecs = new CompressionCodecFactory(job);
            codec             = compressionCodecs.GetCodec(file);
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            if (IsCompressedInput())
            {
                decompressor = CodecPool.GetDecompressor(codec);
                if (codec is SplittableCompressionCodec)
                {
                    SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream
                                                          (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock);
                    @in          = new CompressedSplitLineReader(cIn, job, recordDelimiter);
                    start        = cIn.GetAdjustedStart();
                    end          = cIn.GetAdjustedEnd();
                    filePosition = cIn;
                }
                else
                {
                    // take pos from compressed stream
                    @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, recordDelimiter
                                              );
                    filePosition = fileIn;
                }
            }
            else
            {
                fileIn.Seek(start);
                @in = new UncompressedSplitLineReader(fileIn, job, recordDelimiter, split.GetLength
                                                          ());
                filePosition = fileIn;
            }
            // If this is not the first split, we always throw away first record
            // because we always (except the last split) read one extra line in
            // next() method.
            if (start != 0)
            {
                start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start));
            }
            this.pos = start;
        }
        // This is also called from the old FixedLengthRecordReader API implementation
        /// <exception cref="System.IO.IOException"/>
        public virtual void Initialize(Configuration job, long splitStart, long splitLength
                                       , Path file)
        {
            start = splitStart;
            end   = start + splitLength;
            long partialRecordLength = start % recordLength;
            long numBytesToSkip      = 0;

            if (partialRecordLength != 0)
            {
                numBytesToSkip = recordLength - partialRecordLength;
            }
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            CompressionCodec codec = new CompressionCodecFactory(job).GetCodec(file);

            if (null != codec)
            {
                isCompressedInput = true;
                decompressor      = CodecPool.GetDecompressor(codec);
                CompressionInputStream cIn = codec.CreateInputStream(fileIn, decompressor);
                filePosition = cIn;
                inputStream  = cIn;
                numRecordsRemainingInSplit = long.MaxValue;
                Log.Info("Compressed input; cannot compute number of records in the split");
            }
            else
            {
                fileIn.Seek(start);
                filePosition = fileIn;
                inputStream  = fileIn;
                long splitSize = end - start - numBytesToSkip;
                numRecordsRemainingInSplit = (splitSize + recordLength - 1) / recordLength;
                if (numRecordsRemainingInSplit < 0)
                {
                    numRecordsRemainingInSplit = 0;
                }
                Log.Info("Expecting " + numRecordsRemainingInSplit + " records each with a length of "
                         + recordLength + " bytes in the split with an effective size of " + splitSize +
                         " bytes");
            }
            if (numBytesToSkip != 0)
            {
                start += inputStream.Skip(numBytesToSkip);
            }
            this.pos = start;
        }
Exemplo n.º 9
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     // When IFile writer is created by BackupStore, we do not have
     // Key and Value classes set. So, check before closing the
     // serializers
     if (keyClass != null)
     {
         keySerializer.Close();
         valueSerializer.Close();
     }
     // Write EOF_MARKER for key/value length
     WritableUtils.WriteVInt(@out, EofMarker);
     WritableUtils.WriteVInt(@out, EofMarker);
     decompressedBytesWritten += 2 * WritableUtils.GetVIntSize(EofMarker);
     //Flush the stream
     @out.Flush();
     if (compressOutput)
     {
         // Flush
         compressedOut.Finish();
         compressedOut.ResetState();
     }
     // Close the underlying stream iff we own it...
     if (ownOutputStream)
     {
         @out.Close();
     }
     else
     {
         // Write the checksum
         checksumOut.Finish();
     }
     compressedBytesWritten = rawOut.GetPos() - start;
     if (compressOutput)
     {
         // Return back the compressor
         CodecPool.ReturnCompressor(compressor);
         compressor = null;
     }
     @out = null;
     if (writtenRecordsCounter != null)
     {
         writtenRecordsCounter.Increment(numRecordsWritten);
     }
 }
Exemplo n.º 10
0
 public InMemoryMapOutput(Configuration conf, TaskAttemptID mapId, MergeManagerImpl
                          <K, V> merger, int size, CompressionCodec codec, bool primaryMapOutput)
     : base(mapId, (long)size, primaryMapOutput)
 {
     // Decompression of map-outputs
     this.conf   = conf;
     this.merger = merger;
     this.codec  = codec;
     byteStream  = new BoundedByteArrayOutputStream(size);
     memory      = byteStream.GetBuffer();
     if (codec != null)
     {
         decompressor = CodecPool.GetDecompressor(codec);
     }
     else
     {
         decompressor = null;
     }
 }
Exemplo n.º 11
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     // Close the underlying stream
     @in.Close();
     // Release the buffer
     dataIn = null;
     buffer = null;
     if (readRecordsCounter != null)
     {
         readRecordsCounter.Increment(numRecordsRead);
     }
     // Return the decompressor
     if (decompressor != null)
     {
         decompressor.Reset();
         CodecPool.ReturnDecompressor(decompressor);
         decompressor = null;
     }
 }
Exemplo n.º 12
0
 /// <exception cref="System.IO.IOException"/>
 public override void Close()
 {
     lock (this)
     {
         try
         {
             if (@in != null)
             {
                 @in.Close();
             }
         }
         finally
         {
             if (decompressor != null)
             {
                 CodecPool.ReturnDecompressor(decompressor);
                 decompressor = null;
             }
         }
     }
 }
Exemplo n.º 13
0
 /// <exception cref="System.IO.IOException"/>
 public override void Close()
 {
     lock (this)
     {
         try
         {
             if (inputStream != null)
             {
                 inputStream.Close();
                 inputStream = null;
             }
         }
         finally
         {
             if (decompressor != null)
             {
                 CodecPool.ReturnDecompressor(decompressor);
                 decompressor = null;
             }
         }
     }
 }
Exemplo n.º 14
0
 /// <exception cref="System.IO.IOException"/>
 public Writer(Configuration conf, FSDataOutputStream @out, Type keyClass, Type valueClass
               , CompressionCodec codec, Counters.Counter writesCounter, bool ownOutputStream)
 {
     this.writtenRecordsCounter = writesCounter;
     this.checksumOut           = new IFileOutputStream(@out);
     this.rawOut = @out;
     this.start  = this.rawOut.GetPos();
     if (codec != null)
     {
         this.compressor = CodecPool.GetCompressor(codec);
         if (this.compressor != null)
         {
             this.compressor.Reset();
             this.compressedOut  = codec.CreateOutputStream(checksumOut, compressor);
             this.@out           = new FSDataOutputStream(this.compressedOut, null);
             this.compressOutput = true;
         }
         else
         {
             Log.Warn("Could not obtain compressor from CodecPool");
             this.@out = new FSDataOutputStream(checksumOut, null);
         }
     }
     else
     {
         this.@out = new FSDataOutputStream(checksumOut, null);
     }
     this.keyClass   = keyClass;
     this.valueClass = valueClass;
     if (keyClass != null)
     {
         SerializationFactory serializationFactory = new SerializationFactory(conf);
         this.keySerializer = serializationFactory.GetSerializer(keyClass);
         this.keySerializer.Open(buffer);
         this.valueSerializer = serializationFactory.GetSerializer(valueClass);
         this.valueSerializer.Open(buffer);
     }
     this.ownOutputStream = ownOutputStream;
 }
Exemplo n.º 15
0
        public virtual void TestMultipleClose()
        {
            Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2"
                                                                     );

            NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2"
                                             , testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // read the data and check whether BOM is skipped
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, null);
            LineRecordReader reader = new LineRecordReader();

            reader.Initialize(split, context);
            //noinspection StatementWithEmptyBody
            while (reader.NextKeyValue())
            {
            }
            reader.Close();
            reader.Close();
            BZip2Codec codec = new BZip2Codec();

            codec.SetConf(conf);
            ICollection <Decompressor> decompressors = new HashSet <Decompressor>();

            for (int i = 0; i < 10; ++i)
            {
                decompressors.AddItem(CodecPool.GetDecompressor(codec));
            }
            NUnit.Framework.Assert.AreEqual(10, decompressors.Count);
        }
Exemplo n.º 16
0
        /// <exception cref="System.IO.IOException"/>
        public override void Shuffle(MapHost host, InputStream input, long compressedLength
                                     , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter)
        {
            IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, conf);

            input = checksumIn;
            // Are map-outputs compressed?
            if (codec != null)
            {
                decompressor.Reset();
                input = codec.CreateInputStream(input, decompressor);
            }
            try
            {
                IOUtils.ReadFully(input, memory, 0, memory.Length);
                metrics.InputBytes(memory.Length);
                reporter.Progress();
                Log.Info("Read " + memory.Length + " bytes from map-output for " + GetMapId());
                if (input.Read() >= 0)
                {
                    throw new IOException("Unexpected extra bytes from input stream for " + GetMapId(
                                              ));
                }
            }
            catch (IOException ioe)
            {
                // Close the streams
                IOUtils.Cleanup(Log, input);
                // Re-throw
                throw;
            }
            finally
            {
                CodecPool.ReturnDecompressor(decompressor);
            }
        }