示例#1
0
        public virtual void TestForEmptyFile()
        {
            Configuration      conf    = new Configuration();
            FileSystem         fileSys = FileSystem.Get(conf);
            Path               file    = new Path("test" + "/file");
            FSDataOutputStream @out    = fileSys.Create(file, true, conf.GetInt("io.file.buffer.size"
                                                                                , 4096), (short)1, (long)1024);

            @out.Write(new byte[0]);
            @out.Close();
            // split it using a File input format
            TestMRCJCFileInputFormat.DummyInputFormat inFormat = new TestMRCJCFileInputFormat.DummyInputFormat
                                                                     (this);
            Job job = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(job, "test");
            IList <InputSplit> splits = inFormat.GetSplits(job);

            NUnit.Framework.Assert.AreEqual(1, splits.Count);
            FileSplit fileSplit = (FileSplit)splits[0];

            NUnit.Framework.Assert.AreEqual(0, fileSplit.GetLocations().Length);
            NUnit.Framework.Assert.AreEqual(file.GetName(), fileSplit.GetPath().GetName());
            NUnit.Framework.Assert.AreEqual(0, fileSplit.GetStart());
            NUnit.Framework.Assert.AreEqual(0, fileSplit.GetLength());
            fileSys.Delete(file.GetParent(), true);
        }
        /// <exception cref="System.IO.IOException"/>
        public override void Initialize(InputSplit genericSplit, TaskAttemptContext context
                                        )
        {
            FileSplit     split = (FileSplit)genericSplit;
            Configuration job   = context.GetConfiguration();
            Path          file  = split.GetPath();

            Initialize(job, split.GetStart(), split.GetLength(), file);
        }
示例#3
0
        /// <exception cref="System.IO.IOException"/>
        public override void Initialize(InputSplit genericSplit, TaskAttemptContext context
                                        )
        {
            FileSplit     split = (FileSplit)genericSplit;
            Configuration job   = context.GetConfiguration();

            this.maxLineLength = job.GetInt(MaxLineLength, int.MaxValue);
            start = split.GetStart();
            end   = start + split.GetLength();
            Path file = split.GetPath();
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            CompressionCodec codec = new CompressionCodecFactory(job).GetCodec(file);

            if (null != codec)
            {
                isCompressedInput = true;
                decompressor      = CodecPool.GetDecompressor(codec);
                if (codec is SplittableCompressionCodec)
                {
                    SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream
                                                          (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock);
                    @in          = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                    start        = cIn.GetAdjustedStart();
                    end          = cIn.GetAdjustedEnd();
                    filePosition = cIn;
                }
                else
                {
                    @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, this
                                              .recordDelimiterBytes);
                    filePosition = fileIn;
                }
            }
            else
            {
                fileIn.Seek(start);
                @in = new UncompressedSplitLineReader(fileIn, job, this.recordDelimiterBytes, split
                                                      .GetLength());
                filePosition = fileIn;
            }
            // If this is not the first split, we always throw away first record
            // because we always (except the last split) read one extra line in
            // next() method.
            if (start != 0)
            {
                start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start));
            }
            this.pos = start;
        }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public override void Initialize(InputSplit split, TaskAttemptContext context)
        {
            FileSplit fileSplit = (FileSplit)split;

            conf = context.GetConfiguration();
            Path       path = fileSplit.GetPath();
            FileSystem fs   = path.GetFileSystem(conf);

            this.@in = new SequenceFile.Reader(fs, path, conf);
            this.end = fileSplit.GetStart() + fileSplit.GetLength();
            if (fileSplit.GetStart() > @in.GetPosition())
            {
                @in.Sync(fileSplit.GetStart());
            }
            // sync to start
            this.start = @in.GetPosition();
            more       = start < end;
        }
示例#5
0
        private bool FileSplitIsValid(TaskAttemptContext context)
        {
            Configuration conf   = context.GetConfiguration();
            long          offset = conf.GetLong(MRJobConfig.MapInputStart, 0L);

            if (fileSplit.GetStart() != offset)
            {
                return(false);
            }
            long length = conf.GetLong(MRJobConfig.MapInputPath, 0L);

            if (fileSplit.GetLength() != length)
            {
                return(false);
            }
            string path = conf.Get(MRJobConfig.MapInputFile);

            if (!fileSplit.GetPath().ToString().Equals(path))
            {
                return(false);
            }
            return(true);
        }