Beispiel #1
0
        protected internal override bool IsSplitable(FileSystem fs, Path file)
        {
            CompressionCodec codec = compressionCodecs.GetCodec(file);

            if (null == codec)
            {
                return(true);
            }
            return(codec is SplittableCompressionCodec);
        }
Beispiel #2
0
        /// <exception cref="System.IO.IOException"/>
        public LineRecordReader(Configuration job, FileSplit split, byte[] recordDelimiter
                                )
        {
            this.maxLineLength = job.GetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            start = split.GetStart();
            end   = start + split.GetLength();
            Path file = split.GetPath();

            compressionCodecs = new CompressionCodecFactory(job);
            codec             = compressionCodecs.GetCodec(file);
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            if (IsCompressedInput())
            {
                decompressor = CodecPool.GetDecompressor(codec);
                if (codec is SplittableCompressionCodec)
                {
                    SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream
                                                          (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock);
                    @in          = new CompressedSplitLineReader(cIn, job, recordDelimiter);
                    start        = cIn.GetAdjustedStart();
                    end          = cIn.GetAdjustedEnd();
                    filePosition = cIn;
                }
                else
                {
                    // take pos from compressed stream
                    @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, recordDelimiter
                                              );
                    filePosition = fileIn;
                }
            }
            else
            {
                fileIn.Seek(start);
                @in = new UncompressedSplitLineReader(fileIn, job, recordDelimiter, split.GetLength
                                                          ());
                filePosition = fileIn;
            }
            // If this is not the first split, we always throw away first record
            // because we always (except the last split) read one extra line in
            // next() method.
            if (start != 0)
            {
                start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start));
            }
            this.pos = start;
        }
Beispiel #3
0
            /// <exception cref="System.IO.IOException"/>
            protected internal override InputStream GetInputStream(PathData item)
            {
                FSDataInputStream i = (FSDataInputStream)base.GetInputStream(item);
                // Handle 0 and 1-byte files
                short leadBytes;

                try
                {
                    leadBytes = i.ReadShort();
                }
                catch (EOFException)
                {
                    i.Seek(0);
                    return(i);
                }
                switch (leadBytes)
                {
                case unchecked ((int)(0x1f8b)):
                {
                    // Check type of stream first
                    // RFC 1952
                    // Must be gzip
                    i.Seek(0);
                    return(new GZIPInputStream(i));
                }

                case unchecked ((int)(0x5345)):
                {
                    // 'S' 'E'
                    // Might be a SequenceFile
                    if (i.ReadByte() == 'Q')
                    {
                        i.Close();
                        return(new Display.TextRecordInputStream(this, item.stat));
                    }
                    goto default;
                }

                default:
                {
                    // Check the type of compression instead, depending on Codec class's
                    // own detection methods, based on the provided path.
                    CompressionCodecFactory cf    = new CompressionCodecFactory(GetConf());
                    CompressionCodec        codec = cf.GetCodec(item.path);
                    if (codec != null)
                    {
                        i.Seek(0);
                        return(codec.CreateInputStream(i));
                    }
                    break;
                }

                case unchecked ((int)(0x4f62)):
                {
                    // 'O' 'b'
                    if (i.ReadByte() == 'j')
                    {
                        i.Close();
                        return(new Display.AvroFileInputStream(item.stat));
                    }
                    break;
                }
                }
                // File is non-compressed, or not a file container we know.
                i.Seek(0);
                return(i);
            }