예제 #1
0
        /// <summary>Test with no record length set.</summary>
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestNoRecordLength()
        {
            localFs.Delete(workDir, true);
            Path file = new Path(workDir, new string("testFormat.txt"));

            CreateFile(file, null, 10, 10);
            // Set the fixed length record length config property
            JobConf job = new JobConf(defaultConf);

            FileInputFormat.SetInputPaths(job, workDir);
            FixedLengthInputFormat format = new FixedLengthInputFormat();

            format.Configure(job);
            InputSplit[] splits          = format.GetSplits(job, 1);
            bool         exceptionThrown = false;

            foreach (InputSplit split in splits)
            {
                try
                {
                    RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                               job, voidReporter);
                }
                catch (IOException ioe)
                {
                    exceptionThrown = true;
                    Log.Info("Exception message:" + ioe.Message);
                }
            }
            NUnit.Framework.Assert.IsTrue("Exception for not setting record length:", exceptionThrown
                                          );
        }
예제 #2
0
        /// <summary>Test using the gzip codec with two input files.</summary>
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestGzipWithTwoInputs()
        {
            CompressionCodec gzip = new GzipCodec();

            localFs.Delete(workDir, true);
            FixedLengthInputFormat format = new FixedLengthInputFormat();
            JobConf job = new JobConf(defaultConf);

            FixedLengthInputFormat.SetRecordLength(job, 5);
            FileInputFormat.SetInputPaths(job, workDir);
            ReflectionUtils.SetConf(gzip, job);
            format.Configure(job);
            // Create files with fixed length records with 5 byte long records.
            WriteFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "one  two  threefour five six  seveneightnine ten  "
                      );
            WriteFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "ten  nine eightsevensix  five four threetwo  one  "
                      );
            InputSplit[] splits = format.GetSplits(job, 100);
            NUnit.Framework.Assert.AreEqual("compressed splits == 2", 2, splits.Length);
            FileSplit tmp = (FileSplit)splits[0];

            if (tmp.GetPath().GetName().Equals("part2.txt.gz"))
            {
                splits[0] = splits[1];
                splits[1] = tmp;
            }
            IList <string> results = ReadSplit(format, splits[0], job);

            NUnit.Framework.Assert.AreEqual("splits[0] length", 10, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[0][5]", "six  ", results[5]);
            results = ReadSplit(format, splits[1], job);
            NUnit.Framework.Assert.AreEqual("splits[1] length", 10, results.Count);
            NUnit.Framework.Assert.AreEqual("splits[1][0]", "ten  ", results[0]);
            NUnit.Framework.Assert.AreEqual("splits[1][1]", "nine ", results[1]);
        }
예제 #3
0
        /// <exception cref="System.IO.IOException"/>
        private void RunPartialRecordTest(CompressionCodec codec)
        {
            localFs.Delete(workDir, true);
            // Create a file with fixed length records with 5 byte long
            // records with a partial record at the end.
            StringBuilder fileName = new StringBuilder("testFormat.txt");

            if (codec != null)
            {
                fileName.Append(".gz");
            }
            FixedLengthInputFormat format = new FixedLengthInputFormat();
            JobConf job = new JobConf(defaultConf);

            FixedLengthInputFormat.SetRecordLength(job, 5);
            FileInputFormat.SetInputPaths(job, workDir);
            if (codec != null)
            {
                ReflectionUtils.SetConf(codec, job);
            }
            format.Configure(job);
            WriteFile(localFs, new Path(workDir, fileName.ToString()), codec, "one  two  threefour five six  seveneightnine ten"
                      );
            InputSplit[] splits = format.GetSplits(job, 100);
            if (codec != null)
            {
                NUnit.Framework.Assert.AreEqual("compressed splits == 1", 1, splits.Length);
            }
            bool exceptionThrown = false;

            foreach (InputSplit split in splits)
            {
                try
                {
                    IList <string> results = ReadSplit(format, split, job);
                }
                catch (IOException ioe)
                {
                    exceptionThrown = true;
                    Log.Info("Exception message:" + ioe.Message);
                }
            }
            NUnit.Framework.Assert.IsTrue("Exception for partial record:", exceptionThrown);
        }
예제 #4
0
        /// <exception cref="System.IO.IOException"/>
        private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit
                                                split, JobConf job)
        {
            IList <string> result = new AList <string>();
            RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                       job, voidReporter);
            LongWritable  key   = reader.CreateKey();
            BytesWritable value = reader.CreateValue();

            try
            {
                while (reader.Next(key, value))
                {
                    result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength
                                                                         ()));
                }
            }
            finally
            {
                reader.Close();
            }
            return(result);
        }
예제 #5
0
        /// <exception cref="System.IO.IOException"/>
        private void RunRandomTests(CompressionCodec codec)
        {
            StringBuilder fileName = new StringBuilder("testFormat.txt");

            if (codec != null)
            {
                fileName.Append(".gz");
            }
            localFs.Delete(workDir, true);
            Path file = new Path(workDir, fileName.ToString());
            int  seed = new Random().Next();

            Log.Info("Seed = " + seed);
            Random        random   = new Random(seed);
            int           MaxTests = 20;
            LongWritable  key      = new LongWritable();
            BytesWritable value    = new BytesWritable();

            for (int i = 0; i < MaxTests; i++)
            {
                Log.Info("----------------------------------------------------------");
                // Maximum total records of 999
                int totalRecords = random.Next(999) + 1;
                // Test an empty file
                if (i == 8)
                {
                    totalRecords = 0;
                }
                // Maximum bytes in a record of 100K
                int recordLength = random.Next(1024 * 100) + 1;
                // For the 11th test, force a record length of 1
                if (i == 10)
                {
                    recordLength = 1;
                }
                // The total bytes in the test file
                int fileSize = (totalRecords * recordLength);
                Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
                // Create the job
                JobConf job = new JobConf(defaultConf);
                if (codec != null)
                {
                    ReflectionUtils.SetConf(codec, job);
                }
                // Create the test file
                AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords);
                NUnit.Framework.Assert.IsTrue(localFs.Exists(file));
                //set the fixed length record length config property for the job
                FixedLengthInputFormat.SetRecordLength(job, recordLength);
                int numSplits = 1;
                // Arbitrarily set number of splits.
                if (i > 0)
                {
                    if (i == (MaxTests - 1))
                    {
                        // Test a split size that is less than record len
                        numSplits = (int)(fileSize / Math.Floor(recordLength / 2));
                    }
                    else
                    {
                        if (MaxTests % i == 0)
                        {
                            // Let us create a split size that is forced to be
                            // smaller than the end file itself, (ensures 1+ splits)
                            numSplits = fileSize / (fileSize - random.Next(fileSize));
                        }
                        else
                        {
                            // Just pick a random split size with no upper bound
                            numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue));
                        }
                    }
                    Log.Info("Number of splits set to: " + numSplits);
                }
                // Setup the input path
                FileInputFormat.SetInputPaths(job, workDir);
                // Try splitting the file in a variety of sizes
                FixedLengthInputFormat format = new FixedLengthInputFormat();
                format.Configure(job);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("Actual number of splits = " + splits.Length);
                // Test combined split lengths = total file size
                long recordOffset = 0;
                int  recordNumber = 0;
                foreach (InputSplit split in splits)
                {
                    RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                               job, voidReporter);
                    Type clazz = reader.GetType();
                    NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:"
                                                    , typeof(FixedLengthRecordReader), clazz);
                    // Plow through the records in this split
                    while (reader.Next(key, value))
                    {
                        NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength
                                                                               ), key.Get());
                        string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value
                                                                               .GetLength());
                        NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength
                                                            ());
                        NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber
                                                      < totalRecords);
                        string origRecord = recordList[recordNumber];
                        NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString
                                                        );
                        recordNumber++;
                    }
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Total original records should be total read records:"
                                                , recordList.Count, recordNumber);
            }
        }