Ejemplo n.º 1
0
        /// <exception cref="System.IO.IOException"/>
        public LineRecordReader(Configuration job, FileSplit split, byte[] recordDelimiter
                                )
        {
            this.maxLineLength = job.GetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            start = split.GetStart();
            end   = start + split.GetLength();
            Path file = split.GetPath();

            compressionCodecs = new CompressionCodecFactory(job);
            codec             = compressionCodecs.GetCodec(file);
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            if (IsCompressedInput())
            {
                decompressor = CodecPool.GetDecompressor(codec);
                if (codec is SplittableCompressionCodec)
                {
                    SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream
                                                          (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock);
                    @in          = new CompressedSplitLineReader(cIn, job, recordDelimiter);
                    start        = cIn.GetAdjustedStart();
                    end          = cIn.GetAdjustedEnd();
                    filePosition = cIn;
                }
                else
                {
                    // take pos from compressed stream
                    @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, recordDelimiter
                                              );
                    filePosition = fileIn;
                }
            }
            else
            {
                fileIn.Seek(start);
                @in = new UncompressedSplitLineReader(fileIn, job, recordDelimiter, split.GetLength
                                                          ());
                filePosition = fileIn;
            }
            // If this is not the first split, we always throw away first record
            // because we always (except the last split) read one extra line in
            // next() method.
            if (start != 0)
            {
                start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start));
            }
            this.pos = start;
        }
Ejemplo n.º 2
0
 /// <exception cref="System.IO.IOException"/>
 public FixedLengthRecordReader(Configuration job, FileSplit split, int recordLength
                                )
 {
     // Make use of the new API implementation to avoid code duplication.
     this.recordLength = recordLength;
     reader            = new Org.Apache.Hadoop.Mapreduce.Lib.Input.FixedLengthRecordReader(recordLength
                                                                                           );
     reader.Initialize(job, split.GetStart(), split.GetLength(), split.GetPath());
 }
        /// <exception cref="System.IO.IOException"/>
        public SequenceFileRecordReader(Configuration conf, FileSplit split)
        {
            Path       path = split.GetPath();
            FileSystem fs   = path.GetFileSystem(conf);

            this.@in  = new SequenceFile.Reader(fs, path, conf);
            this.end  = split.GetStart() + split.GetLength();
            this.conf = conf;
            if (split.GetStart() > @in.GetPosition())
            {
                @in.Sync(split.GetStart());
            }
            // sync to start
            this.start = @in.GetPosition();
            more       = start < end;
        }
Ejemplo n.º 4
0
            /// <exception cref="System.IO.IOException"/>
            public SequenceFileAsBinaryRecordReader(Configuration conf, FileSplit split)
            {
                Path       path = split.GetPath();
                FileSystem fs   = path.GetFileSystem(conf);

                this.@in = new SequenceFile.Reader(fs, path, conf);
                this.end = split.GetStart() + split.GetLength();
                if (split.GetStart() > @in.GetPosition())
                {
                    @in.Sync(split.GetStart());
                }
                // sync to start
                this.start = @in.GetPosition();
                vbytes     = @in.CreateValueBytes();
                done       = start >= end;
            }
        /// <exception cref="System.Exception"/>
        public virtual void TestLocality()
        {
            JobConf job = new JobConf(conf);

            dfs = NewDFSCluster(job);
            FileSystem fs = dfs.GetFileSystem();

            System.Console.Out.WriteLine("FileSystem " + fs.GetUri());
            Path   inputDir = new Path("/foo/");
            string fileName = "part-0000";

            CreateInputs(fs, inputDir, fileName);
            // split it using a file input format
            TextInputFormat.AddInputPath(job, inputDir);
            TextInputFormat inFormat = new TextInputFormat();

            inFormat.Configure(job);
            InputSplit[] splits     = inFormat.GetSplits(job, 1);
            FileStatus   fileStatus = fs.GetFileStatus(new Path(inputDir, fileName));

            BlockLocation[] locations = fs.GetFileBlockLocations(fileStatus, 0, fileStatus.GetLen
                                                                     ());
            System.Console.Out.WriteLine("Made splits");
            // make sure that each split is a block and the locations match
            for (int i = 0; i < splits.Length; ++i)
            {
                FileSplit fileSplit = (FileSplit)splits[i];
                System.Console.Out.WriteLine("File split: " + fileSplit);
                foreach (string h in fileSplit.GetLocations())
                {
                    System.Console.Out.WriteLine("Location: " + h);
                }
                System.Console.Out.WriteLine("Block: " + locations[i]);
                NUnit.Framework.Assert.AreEqual(locations[i].GetOffset(), fileSplit.GetStart());
                NUnit.Framework.Assert.AreEqual(locations[i].GetLength(), fileSplit.GetLength());
                string[] blockLocs = locations[i].GetHosts();
                string[] splitLocs = fileSplit.GetLocations();
                NUnit.Framework.Assert.AreEqual(2, blockLocs.Length);
                NUnit.Framework.Assert.AreEqual(2, splitLocs.Length);
                NUnit.Framework.Assert.IsTrue((blockLocs[0].Equals(splitLocs[0]) && blockLocs[1].
                                               Equals(splitLocs[1])) || (blockLocs[1].Equals(splitLocs[0]) && blockLocs[0].Equals
                                                                             (splitLocs[1])));
            }
            NUnit.Framework.Assert.AreEqual("Expected value of " + FileInputFormat.NumInputFiles
                                            , 1, job.GetLong(FileInputFormat.NumInputFiles, 0));
        }