/// <exception cref="System.IO.IOException"/>
        public SequenceFileRecordReader(Configuration conf, FileSplit split)
            Path       path = split.GetPath();
            FileSystem fs   = path.GetFileSystem(conf);

            this.@in  = new SequenceFile.Reader(fs, path, conf);
            this.end  = split.GetStart() + split.GetLength();
            this.conf = conf;
            if (split.GetStart() > @in.GetPosition())
            // sync to start
            this.start = @in.GetPosition();
            more       = start < end;
예제 #2
            /// <exception cref="System.IO.IOException"/>
            public SequenceFileAsBinaryRecordReader(Configuration conf, FileSplit split)
                Path       path = split.GetPath();
                FileSystem fs   = path.GetFileSystem(conf);

                this.@in = new SequenceFile.Reader(fs, path, conf);
                this.end = split.GetStart() + split.GetLength();
                if (split.GetStart() > @in.GetPosition())
                // sync to start
                this.start = @in.GetPosition();
                vbytes     = @in.CreateValueBytes();
                done       = start >= end;
 /// <exception cref="System.IO.IOException"/>
 public FixedLengthRecordReader(Configuration job, FileSplit split, int recordLength
     // Make use of the new API implementation to avoid code duplication.
     this.recordLength = recordLength;
     reader            = new Org.Apache.Hadoop.Mapreduce.Lib.Input.FixedLengthRecordReader(recordLength
     reader.Initialize(job, split.GetStart(), split.GetLength(), split.GetPath());
예제 #4
        /// <exception cref="System.IO.IOException"/>
        public LineRecordReader(Configuration job, FileSplit split, byte[] recordDelimiter
            this.maxLineLength = job.GetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            start = split.GetStart();
            end   = start + split.GetLength();
            Path file = split.GetPath();

            compressionCodecs = new CompressionCodecFactory(job);
            codec             = compressionCodecs.GetCodec(file);
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            if (IsCompressedInput())
                decompressor = CodecPool.GetDecompressor(codec);
                if (codec is SplittableCompressionCodec)
                    SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream
                                                          (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock);
                    @in          = new CompressedSplitLineReader(cIn, job, recordDelimiter);
                    start        = cIn.GetAdjustedStart();
                    end          = cIn.GetAdjustedEnd();
                    filePosition = cIn;
                    // take pos from compressed stream
                    @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, recordDelimiter
                    filePosition = fileIn;
                @in = new UncompressedSplitLineReader(fileIn, job, recordDelimiter, split.GetLength
                filePosition = fileIn;
            // If this is not the first split, we always throw away first record
            // because we always (except the last split) read one extra line in
            // next() method.
            if (start != 0)
                start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start));
            this.pos = start;
        /// <exception cref="System.Exception"/>
        public virtual void TestLocality()
            JobConf job = new JobConf(conf);

            dfs = NewDFSCluster(job);
            FileSystem fs = dfs.GetFileSystem();

            System.Console.Out.WriteLine("FileSystem " + fs.GetUri());
            Path   inputDir = new Path("/foo/");
            string fileName = "part-0000";

            CreateInputs(fs, inputDir, fileName);
            // split it using a file input format
            TextInputFormat.AddInputPath(job, inputDir);
            TextInputFormat inFormat = new TextInputFormat();

            InputSplit[] splits     = inFormat.GetSplits(job, 1);
            FileStatus   fileStatus = fs.GetFileStatus(new Path(inputDir, fileName));

            BlockLocation[] locations = fs.GetFileBlockLocations(fileStatus, 0, fileStatus.GetLen
            System.Console.Out.WriteLine("Made splits");
            // make sure that each split is a block and the locations match
            for (int i = 0; i < splits.Length; ++i)
                FileSplit fileSplit = (FileSplit)splits[i];
                System.Console.Out.WriteLine("File split: " + fileSplit);
                foreach (string h in fileSplit.GetLocations())
                    System.Console.Out.WriteLine("Location: " + h);
                System.Console.Out.WriteLine("Block: " + locations[i]);
                NUnit.Framework.Assert.AreEqual(locations[i].GetOffset(), fileSplit.GetStart());
                NUnit.Framework.Assert.AreEqual(locations[i].GetLength(), fileSplit.GetLength());
                string[] blockLocs = locations[i].GetHosts();
                string[] splitLocs = fileSplit.GetLocations();
                NUnit.Framework.Assert.AreEqual(2, blockLocs.Length);
                NUnit.Framework.Assert.AreEqual(2, splitLocs.Length);
                NUnit.Framework.Assert.IsTrue((blockLocs[0].Equals(splitLocs[0]) && blockLocs[1].
                                               Equals(splitLocs[1])) || (blockLocs[1].Equals(splitLocs[0]) && blockLocs[0].Equals
            NUnit.Framework.Assert.AreEqual("Expected value of " + FileInputFormat.NumInputFiles
                                            , 1, job.GetLong(FileInputFormat.NumInputFiles, 0));