/// <exception cref="System.IO.IOException"/> public SequenceFileRecordReader(Configuration conf, FileSplit split) { Path path = split.GetPath(); FileSystem fs = path.GetFileSystem(conf); this.@in = new SequenceFile.Reader(fs, path, conf); this.end = split.GetStart() + split.GetLength(); this.conf = conf; if (split.GetStart() > @in.GetPosition()) { @in.Sync(split.GetStart()); } // sync to start this.start = @in.GetPosition(); more = start < end; }
/// <exception cref="System.IO.IOException"/> public SequenceFileAsBinaryRecordReader(Configuration conf, FileSplit split) { Path path = split.GetPath(); FileSystem fs = path.GetFileSystem(conf); this.@in = new SequenceFile.Reader(fs, path, conf); this.end = split.GetStart() + split.GetLength(); if (split.GetStart() > @in.GetPosition()) { @in.Sync(split.GetStart()); } // sync to start this.start = @in.GetPosition(); vbytes = @in.CreateValueBytes(); done = start >= end; }
/// <exception cref="System.IO.IOException"/> public FixedLengthRecordReader(Configuration job, FileSplit split, int recordLength ) { // Make use of the new API implementation to avoid code duplication. this.recordLength = recordLength; reader = new Org.Apache.Hadoop.Mapreduce.Lib.Input.FixedLengthRecordReader(recordLength ); reader.Initialize(job, split.GetStart(), split.GetLength(), split.GetPath()); }
/// <exception cref="System.IO.IOException"/> public LineRecordReader(Configuration job, FileSplit split, byte[] recordDelimiter ) { this.maxLineLength = job.GetInt(LineRecordReader.MaxLineLength, int.MaxValue); start = split.GetStart(); end = start + split.GetLength(); Path file = split.GetPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.GetCodec(file); // open the file and seek to the start of the split FileSystem fs = file.GetFileSystem(job); fileIn = fs.Open(file); if (IsCompressedInput()) { decompressor = CodecPool.GetDecompressor(codec); if (codec is SplittableCompressionCodec) { SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock); @in = new CompressedSplitLineReader(cIn, job, recordDelimiter); start = cIn.GetAdjustedStart(); end = cIn.GetAdjustedEnd(); filePosition = cIn; } else { // take pos from compressed stream @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, recordDelimiter ); filePosition = fileIn; } } else { fileIn.Seek(start); @in = new UncompressedSplitLineReader(fileIn, job, recordDelimiter, split.GetLength ()); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start)); } this.pos = start; }
/// <exception cref="System.Exception"/> public virtual void TestLocality() { JobConf job = new JobConf(conf); dfs = NewDFSCluster(job); FileSystem fs = dfs.GetFileSystem(); System.Console.Out.WriteLine("FileSystem " + fs.GetUri()); Path inputDir = new Path("/foo/"); string fileName = "part-0000"; CreateInputs(fs, inputDir, fileName); // split it using a file input format TextInputFormat.AddInputPath(job, inputDir); TextInputFormat inFormat = new TextInputFormat(); inFormat.Configure(job); InputSplit[] splits = inFormat.GetSplits(job, 1); FileStatus fileStatus = fs.GetFileStatus(new Path(inputDir, fileName)); BlockLocation[] locations = fs.GetFileBlockLocations(fileStatus, 0, fileStatus.GetLen ()); System.Console.Out.WriteLine("Made splits"); // make sure that each split is a block and the locations match for (int i = 0; i < splits.Length; ++i) { FileSplit fileSplit = (FileSplit)splits[i]; System.Console.Out.WriteLine("File split: " + fileSplit); foreach (string h in fileSplit.GetLocations()) { System.Console.Out.WriteLine("Location: " + h); } System.Console.Out.WriteLine("Block: " + locations[i]); NUnit.Framework.Assert.AreEqual(locations[i].GetOffset(), fileSplit.GetStart()); NUnit.Framework.Assert.AreEqual(locations[i].GetLength(), fileSplit.GetLength()); string[] blockLocs = locations[i].GetHosts(); string[] splitLocs = fileSplit.GetLocations(); NUnit.Framework.Assert.AreEqual(2, blockLocs.Length); NUnit.Framework.Assert.AreEqual(2, splitLocs.Length); NUnit.Framework.Assert.IsTrue((blockLocs[0].Equals(splitLocs[0]) && blockLocs[1]. Equals(splitLocs[1])) || (blockLocs[1].Equals(splitLocs[0]) && blockLocs[0].Equals (splitLocs[1]))); } NUnit.Framework.Assert.AreEqual("Expected value of " + FileInputFormat.NumInputFiles , 1, job.GetLong(FileInputFormat.NumInputFiles, 0)); }