Example #1
0
        public override long Seek(long offset, System.IO.SeekOrigin origin)
        {
            if (!CanSeek)
            {
                throw new NotSupportedException("This stream doesn't support seeking");
            }
            if (is_disposed)
            {
                throw new ObjectDisposedException("The stream is closed");
            }
            Seekable seekable = stream as Seekable;

            SeekType seek_type;

            switch (origin)
            {
            case System.IO.SeekOrigin.Current:
                seek_type = SeekType.Cur;
                break;

            case System.IO.SeekOrigin.End:
                seek_type = SeekType.End;
                break;

            case System.IO.SeekOrigin.Begin:
            default:
                seek_type = SeekType.Set;
                break;
            }
            seekable.Seek(offset, seek_type, null);
            return(Position);
        }
Example #2
0
 /// <exception cref="System.IO.IOException"/>
 public LineRecordReader(InputStream @in, long offset, long endOffset, Configuration
                         job, byte[] recordDelimiter)
 {
     this.maxLineLength = job.GetInt(LineRecordReader.MaxLineLength, int.MaxValue);
     this.@in           = new SplitLineReader(@in, job, recordDelimiter);
     this.start         = offset;
     this.pos           = offset;
     this.end           = endOffset;
     filePosition       = null;
 }
Example #3
0
 public LineRecordReader(InputStream @in, long offset, long endOffset, int maxLineLength
                         , byte[] recordDelimiter)
 {
     this.maxLineLength = maxLineLength;
     this.@in           = new SplitLineReader(@in, recordDelimiter);
     this.start         = offset;
     this.pos           = offset;
     this.end           = endOffset;
     filePosition       = null;
 }
Example #4
0
        /// <exception cref="System.IO.IOException"/>
        public override void Initialize(InputSplit genericSplit, TaskAttemptContext context
                                        )
        {
            FileSplit     split = (FileSplit)genericSplit;
            Configuration job   = context.GetConfiguration();

            this.maxLineLength = job.GetInt(MaxLineLength, int.MaxValue);
            start = split.GetStart();
            end   = start + split.GetLength();
            Path file = split.GetPath();
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            CompressionCodec codec = new CompressionCodecFactory(job).GetCodec(file);

            if (null != codec)
            {
                isCompressedInput = true;
                decompressor      = CodecPool.GetDecompressor(codec);
                if (codec is SplittableCompressionCodec)
                {
                    SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream
                                                          (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock);
                    @in          = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                    start        = cIn.GetAdjustedStart();
                    end          = cIn.GetAdjustedEnd();
                    filePosition = cIn;
                }
                else
                {
                    @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, this
                                              .recordDelimiterBytes);
                    filePosition = fileIn;
                }
            }
            else
            {
                fileIn.Seek(start);
                @in = new UncompressedSplitLineReader(fileIn, job, this.recordDelimiterBytes, split
                                                      .GetLength());
                filePosition = fileIn;
            }
            // If this is not the first split, we always throw away first record
            // because we always (except the last split) read one extra line in
            // next() method.
            if (start != 0)
            {
                start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start));
            }
            this.pos = start;
        }
Example #5
0
        /// <exception cref="System.IO.IOException"/>
        public LineRecordReader(Configuration job, FileSplit split, byte[] recordDelimiter
                                )
        {
            this.maxLineLength = job.GetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            start = split.GetStart();
            end   = start + split.GetLength();
            Path file = split.GetPath();

            compressionCodecs = new CompressionCodecFactory(job);
            codec             = compressionCodecs.GetCodec(file);
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            if (IsCompressedInput())
            {
                decompressor = CodecPool.GetDecompressor(codec);
                if (codec is SplittableCompressionCodec)
                {
                    SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream
                                                          (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock);
                    @in          = new CompressedSplitLineReader(cIn, job, recordDelimiter);
                    start        = cIn.GetAdjustedStart();
                    end          = cIn.GetAdjustedEnd();
                    filePosition = cIn;
                }
                else
                {
                    // take pos from compressed stream
                    @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, recordDelimiter
                                              );
                    filePosition = fileIn;
                }
            }
            else
            {
                fileIn.Seek(start);
                @in = new UncompressedSplitLineReader(fileIn, job, recordDelimiter, split.GetLength
                                                          ());
                filePosition = fileIn;
            }
            // If this is not the first split, we always throw away first record
            // because we always (except the last split) read one extra line in
            // next() method.
            if (start != 0)
            {
                start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start));
            }
            this.pos = start;
        }
        // This is also called from the old FixedLengthRecordReader API implementation
        /// <exception cref="System.IO.IOException"/>
        public virtual void Initialize(Configuration job, long splitStart, long splitLength
                                       , Path file)
        {
            start = splitStart;
            end   = start + splitLength;
            long partialRecordLength = start % recordLength;
            long numBytesToSkip      = 0;

            if (partialRecordLength != 0)
            {
                numBytesToSkip = recordLength - partialRecordLength;
            }
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            CompressionCodec codec = new CompressionCodecFactory(job).GetCodec(file);

            if (null != codec)
            {
                isCompressedInput = true;
                decompressor      = CodecPool.GetDecompressor(codec);
                CompressionInputStream cIn = codec.CreateInputStream(fileIn, decompressor);
                filePosition = cIn;
                inputStream  = cIn;
                numRecordsRemainingInSplit = long.MaxValue;
                Log.Info("Compressed input; cannot compute number of records in the split");
            }
            else
            {
                fileIn.Seek(start);
                filePosition = fileIn;
                inputStream  = fileIn;
                long splitSize = end - start - numBytesToSkip;
                numRecordsRemainingInSplit = (splitSize + recordLength - 1) / recordLength;
                if (numRecordsRemainingInSplit < 0)
                {
                    numRecordsRemainingInSplit = 0;
                }
                Log.Info("Expecting " + numRecordsRemainingInSplit + " records each with a length of "
                         + recordLength + " bytes in the split with an effective size of " + splitSize +
                         " bytes");
            }
            if (numBytesToSkip != 0)
            {
                start += inputStream.Skip(numBytesToSkip);
            }
            this.pos = start;
        }