FileSplit, Org.Apache.Hadoop.Mapreduce.Lib.Input C# (CSharp) Code-Beispiele

Beispiel #1

0

Datei anzeigen

Datei: TestFixedLengthInputFormat.cs Projekt: orf53975/hadoop.net

        /// <summary>Test using the gzip codec with two input files.</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestGzipWithTwoInputs()
        {
            CompressionCodec gzip = new GzipCodec();

            localFs.Delete(workDir, true);
            Job job = Job.GetInstance(defaultConf);
            FixedLengthInputFormat format = new FixedLengthInputFormat();

            FixedLengthInputFormat.SetRecordLength(job.GetConfiguration(), 5);
            ReflectionUtils.SetConf(gzip, job.GetConfiguration());
            FileInputFormat.SetInputPaths(job, workDir);
            // Create files with fixed length records with 5 byte long records.
            WriteFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "one  two  threefour five six  seveneightnine ten  "
                      );
            WriteFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "ten  nine eightsevensix  five four threetwo  one  "
                      );
            IList <InputSplit> splits = format.GetSplits(job);

            NUnit.Framework.Assert.AreEqual("compressed splits == 2", 2, splits.Count);
            FileSplit tmp = (FileSplit)splits[0];

            if (tmp.GetPath().GetName().Equals("part2.txt.gz"))
            {
                splits.Set(0, splits[1]);
                splits.Set(1, tmp);
            }
            IList <string> results = ReadSplit(format, splits[0], job);

            NUnit.Framework.Assert.AreEqual("splits[0] length", 10, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[0][5]", "six  ", results[5]);
            results = ReadSplit(format, splits[1], job);
            NUnit.Framework.Assert.AreEqual("splits[1] length", 10, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[1][0]", "ten  ", results[0]);
            NUnit.Framework.Assert.AreEqual("splits[1][1]", "nine ", results[1]);
        }

Beispiel #2

0

Datei anzeigen

        public virtual void TestForEmptyFile()
        {
            Configuration      conf    = new Configuration();
            FileSystem         fileSys = FileSystem.Get(conf);
            Path               file    = new Path("test" + "/file");
            FSDataOutputStream @out    = fileSys.Create(file, true, conf.GetInt("io.file.buffer.size"
                                                                                , 4096), (short)1, (long)1024);

            @out.Write(new byte[0]);
            @out.Close();
            // split it using a File input format
            TestMRCJCFileInputFormat.DummyInputFormat inFormat = new TestMRCJCFileInputFormat.DummyInputFormat
                                                                     (this);
            Job job = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(job, "test");
            IList <InputSplit> splits = inFormat.GetSplits(job);

            NUnit.Framework.Assert.AreEqual(1, splits.Count);
            FileSplit fileSplit = (FileSplit)splits[0];

            NUnit.Framework.Assert.AreEqual(0, fileSplit.GetLocations().Length);
            NUnit.Framework.Assert.AreEqual(file.GetName(), fileSplit.GetPath().GetName());
            NUnit.Framework.Assert.AreEqual(0, fileSplit.GetStart());
            NUnit.Framework.Assert.AreEqual(0, fileSplit.GetLength());
            fileSys.Delete(file.GetParent(), true);
        }

Beispiel #3

0

Datei anzeigen

        // Use the LineRecordReader to read records from the file
        /// <exception cref="System.IO.IOException"/>
        public virtual AList <string> ReadRecords(Uri testFileUrl, int splitSize)
        {
            // Set up context
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            long          testFileSize = testFile.Length();
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            Configuration conf         = new Configuration();

            conf.SetInt("io.file.buffer.size", 1);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // Gather the records returned by the record reader
            AList <string> records = new AList <string>();
            long           offset  = 0;

            while (offset < testFileSize)
            {
                FileSplit        split  = new FileSplit(testFilePath, offset, splitSize, null);
                LineRecordReader reader = new LineRecordReader();
                reader.Initialize(split, context);
                while (reader.NextKeyValue())
                {
                    records.AddItem(reader.GetCurrentValue().ToString());
                }
                offset += splitSize;
            }
            return(records);
        }

Beispiel #4

0

Datei anzeigen

 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 protected internal CombineFileRecordReaderWrapper(FileInputFormat <K, V> inputFormat
                                                   , CombineFileSplit split, TaskAttemptContext context, int idx)
 {
     fileSplit = new FileSplit(split.GetPath(idx), split.GetOffset(idx), split.GetLength
                                   (idx), split.GetLocations());
     delegate_ = inputFormat.CreateRecordReader(fileSplit, context);
 }

Beispiel #5

0

Datei anzeigen

Datei: FixedLengthRecordReader.cs Projekt: orf53975/hadoop.net

        /// <exception cref="System.IO.IOException"/>
        public override void Initialize(InputSplit genericSplit, TaskAttemptContext context
                                        )
        {
            FileSplit     split = (FileSplit)genericSplit;
            Configuration job   = context.GetConfiguration();
            Path          file  = split.GetPath();

            Initialize(job, split.GetStart(), split.GetLength(), file);
        }

Beispiel #6

0

Datei anzeigen

        /// <exception cref="System.IO.IOException"/>
        private void TestSplitRecordsForFile(Configuration conf, long firstSplitLength, long
                                             testFileSize, Path testFilePath)
        {
            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            NUnit.Framework.Assert.IsTrue("unexpected test data at " + testFilePath, testFileSize
                                          > firstSplitLength);
            string delimiter = conf.Get("textinputformat.record.delimiter");

            byte[] recordDelimiterBytes = null;
            if (null != delimiter)
            {
                recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets.Utf8
                                                                         );
            }
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // read the data without splitting to count the records
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, (string[])null);
            LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);

            reader.Initialize(split, context);
            int numRecordsNoSplits = 0;

            while (reader.NextKeyValue())
            {
                ++numRecordsNoSplits;
            }
            reader.Close();
            // count the records in the first split
            split  = new FileSplit(testFilePath, 0, firstSplitLength, (string[])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            int numRecordsFirstSplit = 0;

            while (reader.NextKeyValue())
            {
                ++numRecordsFirstSplit;
            }
            reader.Close();
            // count the records in the second split
            split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength
                                  , (string[])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            int numRecordsRemainingSplits = 0;

            while (reader.NextKeyValue())
            {
                ++numRecordsRemainingSplits;
            }
            reader.Close();
            NUnit.Framework.Assert.AreEqual("Unexpected number of records in split ", numRecordsNoSplits
                                            , numRecordsFirstSplit + numRecordsRemainingSplits);
        }

Beispiel #7

0

Datei anzeigen

Datei: LineRecordReader.cs Projekt: orf53975/hadoop.net

        /// <exception cref="System.IO.IOException"/>
        public override void Initialize(InputSplit genericSplit, TaskAttemptContext context
                                        )
        {
            FileSplit     split = (FileSplit)genericSplit;
            Configuration job   = context.GetConfiguration();

            this.maxLineLength = job.GetInt(MaxLineLength, int.MaxValue);
            start = split.GetStart();
            end   = start + split.GetLength();
            Path file = split.GetPath();
            // open the file and seek to the start of the split
            FileSystem fs = file.GetFileSystem(job);

            fileIn = fs.Open(file);
            CompressionCodec codec = new CompressionCodecFactory(job).GetCodec(file);

            if (null != codec)
            {
                isCompressedInput = true;
                decompressor      = CodecPool.GetDecompressor(codec);
                if (codec is SplittableCompressionCodec)
                {
                    SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).CreateInputStream
                                                          (fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.Byblock);
                    @in          = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                    start        = cIn.GetAdjustedStart();
                    end          = cIn.GetAdjustedEnd();
                    filePosition = cIn;
                }
                else
                {
                    @in = new SplitLineReader(codec.CreateInputStream(fileIn, decompressor), job, this
                                              .recordDelimiterBytes);
                    filePosition = fileIn;
                }
            }
            else
            {
                fileIn.Seek(start);
                @in = new UncompressedSplitLineReader(fileIn, job, this.recordDelimiterBytes, split
                                                      .GetLength());
                filePosition = fileIn;
            }
            // If this is not the first split, we always throw away first record
            // because we always (except the last split) read one extra line in
            // next() method.
            if (start != 0)
            {
                start += @in.ReadLine(new Text(), 0, MaxBytesToConsume(start));
            }
            this.pos = start;
        }

Beispiel #8

0

Datei anzeigen

Datei: SequenceFileRecordReader.cs Projekt: orf53975/hadoop.net

        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public override void Initialize(InputSplit split, TaskAttemptContext context)
        {
            FileSplit fileSplit = (FileSplit)split;

            conf = context.GetConfiguration();
            Path       path = fileSplit.GetPath();
            FileSystem fs   = path.GetFileSystem(conf);

            this.@in = new SequenceFile.Reader(fs, path, conf);
            this.end = fileSplit.GetStart() + fileSplit.GetLength();
            if (fileSplit.GetStart() > @in.GetPosition())
            {
                @in.Sync(fileSplit.GetStart());
            }
            // sync to start
            this.start = @in.GetPosition();
            more       = start < end;
        }

Beispiel #9

0

Datei anzeigen

        public virtual void TestGzip()
        {
            Configuration    conf = new Configuration(defaultConf);
            CompressionCodec gzip = new GzipCodec();

            ReflectionUtils.SetConf(gzip, conf);
            localFs.Delete(workDir, true);
            WriteFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "line-1\tthe quick\nline-2\tbrown\nline-3\t"
                      + "fox jumped\nline-4\tover\nline-5\t the lazy\nline-6\t dog\n");
            WriteFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "line-1\tthis is a test\nline-1\tof gzip\n"
                      );
            Job job = Job.GetInstance(conf);

            FileInputFormat.SetInputPaths(job, workDir);
            KeyValueTextInputFormat format = new KeyValueTextInputFormat();
            IList <InputSplit>      splits = format.GetSplits(job);

            NUnit.Framework.Assert.AreEqual("compressed splits == 2", 2, splits.Count);
            FileSplit tmp = (FileSplit)splits[0];

            if (tmp.GetPath().GetName().Equals("part2.txt.gz"))
            {
                splits.Set(0, splits[1]);
                splits.Set(1, tmp);
            }
            IList <Text> results = ReadSplit(format, splits[0], job);

            NUnit.Framework.Assert.AreEqual("splits[0] length", 6, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[0][0]", "the quick", results[0].ToString(
                                                ));
            NUnit.Framework.Assert.AreEqual("splits[0][1]", "brown", results[1].ToString());
            NUnit.Framework.Assert.AreEqual("splits[0][2]", "fox jumped", results[2].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual("splits[0][3]", "over", results[3].ToString());
            NUnit.Framework.Assert.AreEqual("splits[0][4]", " the lazy", results[4].ToString(
                                                ));
            NUnit.Framework.Assert.AreEqual("splits[0][5]", " dog", results[5].ToString());
            results = ReadSplit(format, splits[1], job);
            NUnit.Framework.Assert.AreEqual("splits[1] length", 2, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[1][0]", "this is a test", results[0].ToString
                                                ());
            NUnit.Framework.Assert.AreEqual("splits[1][1]", "of gzip", results[1].ToString());
        }

Beispiel #10

0

Datei anzeigen

        public virtual void TestStripBOM()
        {
            // the test data contains a BOM at the start of the file
            // confirm the BOM is skipped by LineRecordReader
            string Utf8Bom     = "\uFEFF";
            Uri    testFileUrl = GetType().GetClassLoader().GetResource("testBOM.txt");

            NUnit.Framework.Assert.IsNotNull("Cannot find testBOM.txt", testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // read the data and check whether BOM is skipped
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, (string[])null);
            LineRecordReader reader = new LineRecordReader();

            reader.Initialize(split, context);
            int  numRecords = 0;
            bool firstLine  = true;
            bool skipBOM    = true;

            while (reader.NextKeyValue())
            {
                if (firstLine)
                {
                    firstLine = false;
                    if (reader.GetCurrentValue().ToString().StartsWith(Utf8Bom))
                    {
                        skipBOM = false;
                    }
                }
                ++numRecords;
            }
            reader.Close();
            NUnit.Framework.Assert.IsTrue("BOM is not skipped", skipBOM);
        }

Beispiel #11

0

Datei anzeigen

        public virtual void TestMultipleClose()
        {
            Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2"
                                                                     );

            NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2"
                                             , testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            // read the data and check whether BOM is skipped
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, null);
            LineRecordReader reader = new LineRecordReader();

            reader.Initialize(split, context);
            //noinspection StatementWithEmptyBody
            while (reader.NextKeyValue())
            {
            }
            reader.Close();
            reader.Close();
            BZip2Codec codec = new BZip2Codec();

            codec.SetConf(conf);
            ICollection <Decompressor> decompressors = new HashSet <Decompressor>();

            for (int i = 0; i < 10; ++i)
            {
                decompressors.AddItem(CodecPool.GetDecompressor(codec));
            }
            NUnit.Framework.Assert.AreEqual(10, decompressors.Count);
        }

Beispiel #12

0

Datei anzeigen

        public virtual void TestUncompressedInputDefaultDelimiterPosValue()
        {
            Configuration conf      = new Configuration();
            string        inputData = "1234567890\r\n12\r\n345";
            Path          inputFile = CreateInputFile(conf, inputData);

            conf.SetInt("io.file.buffer.size", 10);
            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            FileSplit          split   = new FileSplit(inputFile, 0, 15, (string[])null);
            TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                    );
            LineRecordReader reader = new LineRecordReader(null);

            reader.Initialize(split, context);
            LongWritable key;
            Text         value;

            reader.NextKeyValue();
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            // Get first record:"1234567890"
            NUnit.Framework.Assert.AreEqual(10, value.GetLength());
            NUnit.Framework.Assert.AreEqual(0, key.Get());
            reader.NextKeyValue();
            // Get second record:"12"
            NUnit.Framework.Assert.AreEqual(2, value.GetLength());
            // Key should be 12 right after "1234567890\r\n"
            NUnit.Framework.Assert.AreEqual(12, key.Get());
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            // Key should be 16 right after "1234567890\r\n12\r\n"
            NUnit.Framework.Assert.AreEqual(16, key.Get());
            split  = new FileSplit(inputFile, 15, 4, (string[])null);
            reader = new LineRecordReader(null);
            reader.Initialize(split, context);
            // The second split dropped the first record "\n"
            reader.NextKeyValue();
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            // Get third record:"345"
            NUnit.Framework.Assert.AreEqual(3, value.GetLength());
            // Key should be 16 right after "1234567890\r\n12\r\n"
            NUnit.Framework.Assert.AreEqual(16, key.Get());
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            // Key should be 19 right after "1234567890\r\n12\r\n345"
            NUnit.Framework.Assert.AreEqual(19, key.Get());
            inputData = "123456789\r\r\n";
            inputFile = CreateInputFile(conf, inputData);
            split     = new FileSplit(inputFile, 0, 12, (string[])null);
            reader    = new LineRecordReader(null);
            reader.Initialize(split, context);
            reader.NextKeyValue();
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            // Get first record:"123456789"
            NUnit.Framework.Assert.AreEqual(9, value.GetLength());
            NUnit.Framework.Assert.AreEqual(0, key.Get());
            reader.NextKeyValue();
            // Get second record:""
            NUnit.Framework.Assert.AreEqual(0, value.GetLength());
            // Key should be 10 right after "123456789\r"
            NUnit.Framework.Assert.AreEqual(10, key.Get());
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            // Key should be 12 right after "123456789\r\r\n"
            NUnit.Framework.Assert.AreEqual(12, key.Get());
        }

Beispiel #13

0

Datei anzeigen

        public virtual void TestUncompressedInputCustomDelimiterPosValue()
        {
            Configuration conf = new Configuration();

            conf.SetInt("io.file.buffer.size", 10);
            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            string inputData = "abcdefghij++kl++mno";
            Path   inputFile = CreateInputFile(conf, inputData);
            string delimiter = "++";

            byte[] recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets
                                                                            .Utf8);
            int                splitLength = 15;
            FileSplit          split       = new FileSplit(inputFile, 0, splitLength, (string[])null);
            TaskAttemptContext context     = new TaskAttemptContextImpl(conf, new TaskAttemptID()
                                                                        );
            LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);

            reader.Initialize(split, context);
            // Get first record: "abcdefghij"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            LongWritable key   = reader.GetCurrentKey();
            Text         value = reader.GetCurrentValue();

            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 10, value.GetLength
                                                ());
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 0, key.Get());
            // Get second record: "kl"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 2, value.GetLength
                                                ());
            // Key should be 12 right after "abcdefghij++"
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 12, key.Get()
                                            );
            // Get third record: "mno"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 3, value.GetLength
                                                ());
            // Key should be 16 right after "abcdefghij++kl++"
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 16, key.Get()
                                            );
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            // Key should be 19 right after "abcdefghij++kl++mno"
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 19, key.Get()
                                            );
            // after refresh should be empty
            key = reader.GetCurrentKey();
            NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
            reader.Close();
            split = new FileSplit(inputFile, splitLength, inputData.Length - splitLength, (string
                                                                                           [])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            // No record is in the second split because the second split dropped
            // the first record, which was already reported by the first split.
            NUnit.Framework.Assert.IsFalse("Unexpected record returned", reader.NextKeyValue(
                                               ));
            key = reader.GetCurrentKey();
            NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
            reader.Close();
            // multi char delimiter with starting part of the delimiter in the data
            inputData   = "abcd+efgh++ijk++mno";
            inputFile   = CreateInputFile(conf, inputData);
            splitLength = 5;
            split       = new FileSplit(inputFile, 0, splitLength, (string[])null);
            reader      = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            // Get first record: "abcd+efgh"
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 0, key.Get());
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 9, value.GetLength
                                                ());
            // should have jumped over the delimiter, no record
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 11, key.Get()
                                            );
            // after refresh should be empty
            key = reader.GetCurrentKey();
            NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
            reader.Close();
            // next split: check for duplicate or dropped records
            split = new FileSplit(inputFile, splitLength, inputData.Length - splitLength, (string
                                                                                           [])null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.Initialize(split, context);
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            key   = reader.GetCurrentKey();
            value = reader.GetCurrentValue();
            // Get second record: "ijk" first in this split
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 11, key.Get()
                                            );
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 3, value.GetLength
                                                ());
            // Get third record: "mno" second in this split
            NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                              ));
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 16, key.Get()
                                            );
            NUnit.Framework.Assert.AreEqual("Wrong length for record value", 3, value.GetLength
                                                ());
            // should be at the end of the input
            NUnit.Framework.Assert.IsFalse(reader.NextKeyValue());
            NUnit.Framework.Assert.AreEqual("Wrong position after record read", 19, key.Get()
                                            );
            reader.Close();
            inputData            = "abcd|efgh|+|ij|kl|+|mno|pqr";
            inputFile            = CreateInputFile(conf, inputData);
            delimiter            = "|+|";
            recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets.Utf8
                                                                     );
            // walking over the buffer and split sizes checks for proper processing
            // of the ambiguous bytes of the delimiter
            for (int bufferSize = 1; bufferSize <= inputData.Length; bufferSize++)
            {
                for (int splitSize = 1; splitSize < inputData.Length; splitSize++)
                {
                    // track where we are in the inputdata
                    int keyPosition = 0;
                    conf.SetInt("io.file.buffer.size", bufferSize);
                    split  = new FileSplit(inputFile, 0, bufferSize, (string[])null);
                    reader = new LineRecordReader(recordDelimiterBytes);
                    reader.Initialize(split, context);
                    // Get the first record: "abcd|efgh" always possible
                    NUnit.Framework.Assert.IsTrue("Expected record got nothing", reader.NextKeyValue(
                                                      ));
                    key   = reader.GetCurrentKey();
                    value = reader.GetCurrentValue();
                    NUnit.Framework.Assert.IsTrue("abcd|efgh".Equals(value.ToString()));
                    // Position should be 0 right at the start
                    NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                    key.Get());
                    // Position should be 12 right after the first "|+|"
                    keyPosition = 12;
                    // get the next record: "ij|kl" if the split/buffer allows it
                    if (reader.NextKeyValue())
                    {
                        // check the record info: "ij|kl"
                        NUnit.Framework.Assert.IsTrue("ij|kl".Equals(value.ToString()));
                        NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                        key.Get());
                        // Position should be 20 after the second "|+|"
                        keyPosition = 20;
                    }
                    // get the third record: "mno|pqr" if the split/buffer allows it
                    if (reader.NextKeyValue())
                    {
                        // check the record info: "mno|pqr"
                        NUnit.Framework.Assert.IsTrue("mno|pqr".Equals(value.ToString()));
                        NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                        key.Get());
                        // Position should be the end of the input
                        keyPosition = inputData.Length;
                    }
                    NUnit.Framework.Assert.IsFalse("Unexpected record returned", reader.NextKeyValue(
                                                       ));
                    // no more records can be read we should be at the last position
                    NUnit.Framework.Assert.AreEqual("Wrong position after record read", keyPosition,
                                                    key.Get());
                    // after refresh should be empty
                    key = reader.GetCurrentKey();
                    NUnit.Framework.Assert.IsNull("Unexpected key returned", key);
                    reader.Close();
                }
            }
        }

C# (CSharp) Org.Apache.Hadoop.Mapreduce.Lib.Input FileSplit Beispiele