Exemplo n.º 1
0
        /// <summary>Map file name and offset into statistical data.</summary>
        /// <remarks>
        /// Map file name and offset into statistical data.
        /// <p>
        /// The map task is to get the
        /// <tt>key</tt>, which contains the file name, and the
        /// <tt>value</tt>, which is the offset within the file.
        /// The parameters are passed to the abstract method
        /// <see cref="IOMapperBase{T}.DoIO(Org.Apache.Hadoop.Mapred.Reporter, string, long)"
        ///     />
        /// , which performs the io operation,
        /// usually read or write data, and then
        /// <see cref="IOMapperBase{T}.CollectStats(Org.Apache.Hadoop.Mapred.OutputCollector{K, V}, string, long, object)
        ///     "/>
        ///
        /// is called to prepare stat data for a subsequent reducer.
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        public virtual void Map(Text key, LongWritable value, OutputCollector <Text, Text>
                                output, Reporter reporter)
        {
            string name      = key.ToString();
            long   longValue = value.Get();

            reporter.SetStatus("starting " + name + " ::host = " + hostName);
            this.stream = GetIOStream(name);
            T    statValue = null;
            long tStart    = Runtime.CurrentTimeMillis();

            try
            {
                statValue = DoIO(reporter, name, longValue);
            }
            finally
            {
                if (stream != null)
                {
                    stream.Close();
                }
            }
            long tEnd     = Runtime.CurrentTimeMillis();
            long execTime = tEnd - tStart;

            CollectStats(output, name, execTime, statValue);
            reporter.SetStatus("finished " + name + " ::host = " + hostName);
        }
        public virtual void TestInitNextRecordReader()
        {
            JobConf conf = new JobConf();

            Path[]       paths      = new Path[3];
            long[]       fileLength = new long[3];
            FilePath[]   files      = new FilePath[3];
            LongWritable key        = new LongWritable(1);
            Text         value      = new Text();

            try
            {
                for (int i = 0; i < 3; i++)
                {
                    fileLength[i] = i;
                    FilePath dir = new FilePath(outDir.ToString());
                    dir.Mkdir();
                    files[i] = new FilePath(dir, "testfile" + i);
                    FileWriter fileWriter = new FileWriter(files[i]);
                    fileWriter.Close();
                    paths[i] = new Path(outDir + "/testfile" + i);
                }
                CombineFileSplit        combineFileSplit = new CombineFileSplit(conf, paths, fileLength);
                Reporter                reporter         = Org.Mockito.Mockito.Mock <Reporter>();
                CombineFileRecordReader cfrr             = new CombineFileRecordReader(conf, combineFileSplit
                                                                                       , reporter, typeof(TestCombineFileRecordReader.TextRecordReaderWrapper));
                Org.Mockito.Mockito.Verify(reporter).Progress();
                NUnit.Framework.Assert.IsFalse(cfrr.Next(key, value));
                Org.Mockito.Mockito.Verify(reporter, Org.Mockito.Mockito.Times(3)).Progress();
            }
            finally
            {
                FileUtil.FullyDelete(new FilePath(outDir.ToString()));
            }
        }
Exemplo n.º 3
0
        public virtual void TestMultipleClose()
        {
            Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2"
                                                                     );

            NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2"
                                             , testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, (string[])null);
            LineRecordReader reader = new LineRecordReader(conf, split);
            LongWritable     key    = new LongWritable();
            Text             value  = new Text();

            //noinspection StatementWithEmptyBody
            while (reader.Next(key, value))
            {
            }
            reader.Close();
            reader.Close();
            BZip2Codec codec = new BZip2Codec();

            codec.SetConf(conf);
            ICollection <Decompressor> decompressors = new HashSet <Decompressor>();

            for (int i = 0; i < 10; ++i)
            {
                decompressors.AddItem(CodecPool.GetDecompressor(codec));
            }
            NUnit.Framework.Assert.AreEqual(10, decompressors.Count);
        }
Exemplo n.º 4
0
        // Use the LineRecordReader to read records from the file
        /// <exception cref="System.IO.IOException"/>
        public virtual AList <string> ReadRecords(Uri testFileUrl, int splitSize)
        {
            // Set up context
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            long          testFileSize = testFile.Length();
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            Configuration conf         = new Configuration();

            conf.SetInt("io.file.buffer.size", 1);
            // Gather the records returned by the record reader
            AList <string> records = new AList <string>();
            long           offset  = 0;
            LongWritable   key     = new LongWritable();
            Text           value   = new Text();

            while (offset < testFileSize)
            {
                FileSplit        split  = new FileSplit(testFilePath, offset, splitSize, (string[])null);
                LineRecordReader reader = new LineRecordReader(conf, split);
                while (reader.Next(key, value))
                {
                    records.AddItem(value.ToString());
                }
                offset += splitSize;
            }
            return(records);
        }
        private void addEvent(int operation, long currentTransaction, long rowId, Object row)
        {
            this.operation.set(operation);
            this.currentTransaction.set(currentTransaction);
            // If this is an insert, originalTransaction should be set to this transaction.  If not,
            // it will be reset by the following if anyway.
            long originalTransaction = currentTransaction;

            if (operation == DELETE_OPERATION || operation == UPDATE_OPERATION)
            {
                Object rowIdValue = rowInspector.getStructFieldData(row, recIdField);
                originalTransaction = origTxnInspector.get(
                    recIdInspector.getStructFieldData(rowIdValue, originalTxnField));
                rowId = rowIdInspector.get(recIdInspector.getStructFieldData(rowIdValue, rowIdField));
            }
            else if (operation == INSERT_OPERATION)
            {
                rowId += rowIdOffset;
            }
            this.rowId.set(rowId);
            this.originalTransaction.set(originalTransaction);
            item.setFieldValue(OrcRecordUpdater.ROW, (operation == DELETE_OPERATION ? null : row));
            indexBuilder.addKey(operation, originalTransaction, bucket.get(), rowId);
            writer.addRow(item);
        }
Exemplo n.º 6
0
        /// <summary>write the long value</summary>
        /// <exception cref="System.IO.IOException"/>
        internal static void WriteLong(long value, DataOutputStream @out)
        {
            LongWritable uLong = TlData.Get().ULong;

            uLong.Set(value);
            uLong.Write(@out);
        }
Exemplo n.º 7
0
        /// <summary>read the long value</summary>
        /// <exception cref="System.IO.IOException"/>
        internal static long ReadLong(DataInput @in)
        {
            LongWritable uLong = TlData.Get().ULong;

            uLong.ReadFields(@in);
            return(uLong.Get());
        }
Exemplo n.º 8
0
 /// <summary>
 /// <inheritDoc/>
 ///
 /// </summary>
 /// <exception cref="System.IO.IOException"/>
 public override bool NextKeyValue()
 {
     try
     {
         if (key == null)
         {
             key = new LongWritable();
         }
         if (value == null)
         {
             value = CreateValue();
         }
         if (null == this.results)
         {
             // First time into this method, run the query.
             this.results = ExecuteQuery(GetSelectQuery());
         }
         if (!results.Next())
         {
             return(false);
         }
         // Set the key field value as the output key value
         key.Set(pos + split.GetStart());
         value.ReadFields(results);
         pos++;
     }
     catch (SQLException e)
     {
         throw new IOException("SQLException in nextKeyValue", e);
     }
     return(true);
 }
Exemplo n.º 9
0
        /// <summary>
        /// Transfers data from FileChannel using
        /// <see cref="FileChannel.TransferTo(long, long, WritableByteChannel)
        ///     "/>
        /// .
        /// Updates <code>waitForWritableTime</code> and <code>transferToTime</code>
        /// with the time spent blocked on the network and the time spent transferring
        /// data from disk to network respectively.
        /// Similar to readFully(), this waits till requested amount of
        /// data is transfered.
        /// </summary>
        /// <param name="fileCh">FileChannel to transfer data from.</param>
        /// <param name="position">position within the channel where the transfer begins</param>
        /// <param name="count">number of bytes to transfer.</param>
        /// <param name="waitForWritableTime">
        /// nanoseconds spent waiting for the socket
        /// to become writable
        /// </param>
        /// <param name="transferTime">nanoseconds spent transferring data</param>
        /// <exception cref="System.IO.EOFException">
        ///
        /// If end of input file is reached before requested number of
        /// bytes are transfered.
        /// </exception>
        /// <exception cref="SocketTimeoutException">
        ///
        /// If this channel blocks transfer longer than timeout for
        /// this stream.
        /// </exception>
        /// <exception cref="System.IO.IOException">
        /// Includes any exception thrown by
        /// <see cref="FileChannel.TransferTo(long, long, WritableByteChannel)
        ///     "/>
        /// .
        /// </exception>
        public virtual void TransferToFully(FileChannel fileCh, long position, int count,
                                            LongWritable waitForWritableTime, LongWritable transferToTime)
        {
            long waitTime     = 0;
            long transferTime = 0;

            while (count > 0)
            {
                /*
                 * Ideally we should wait after transferTo returns 0. But because of
                 * a bug in JRE on Linux (http://bugs.sun.com/view_bug.do?bug_id=5103988),
                 * which throws an exception instead of returning 0, we wait for the
                 * channel to be writable before writing to it. If you ever see
                 * IOException with message "Resource temporarily unavailable"
                 * thrown here, please let us know.
                 *
                 * Once we move to JAVA SE 7, wait should be moved to correct place.
                 */
                long start = Runtime.NanoTime();
                WaitForWritable();
                long wait        = Runtime.NanoTime();
                int  nTransfered = (int)fileCh.TransferTo(position, count, GetChannel());
                if (nTransfered == 0)
                {
                    //check if end of file is reached.
                    if (position >= fileCh.Size())
                    {
                        throw new EOFException("EOF Reached. file size is " + fileCh.Size() + " and " + count
                                               + " more bytes left to be " + "transfered.");
                    }
                }
                else
                {
                    //otherwise assume the socket is full.
                    //waitForWritable(); // see comment above.
                    if (nTransfered < 0)
                    {
                        throw new IOException("Unexpected return of " + nTransfered + " from transferTo()"
                                              );
                    }
                    else
                    {
                        position += nTransfered;
                        count    -= nTransfered;
                    }
                }
                long transfer = Runtime.NanoTime();
                waitTime     += wait - start;
                transferTime += transfer - wait;
            }
            if (waitForWritableTime != null)
            {
                waitForWritableTime.Set(waitTime);
            }
            if (transferToTime != null)
            {
                transferToTime.Set(transferTime);
            }
        }
Exemplo n.º 10
0
        /// <exception cref="System.IO.IOException"/>
        public KeyValueLineRecordReader(Configuration job, FileSplit split)
        {
            lineRecordReader = new LineRecordReader(job, split);
            dummyKey         = lineRecordReader.CreateKey();
            innerValue       = lineRecordReader.CreateValue();
            string sepStr = job.Get("mapreduce.input.keyvaluelinerecordreader.key.value.separator"
                                    , "\t");

            this.separator = unchecked ((byte)sepStr[0]);
        }
Exemplo n.º 11
0
        public virtual void TestUncompressedInputDefaultDelimiterPosValue()
        {
            Configuration conf      = new Configuration();
            string        inputData = "1234567890\r\n12\r\n345";
            Path          inputFile = CreateInputFile(conf, inputData);

            conf.SetInt("io.file.buffer.size", 10);
            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            FileSplit        split  = new FileSplit(inputFile, 0, 15, (string[])null);
            LineRecordReader reader = new LineRecordReader(conf, split, null);
            LongWritable     key    = new LongWritable();
            Text             value  = new Text();

            reader.Next(key, value);
            // Get first record:"1234567890"
            NUnit.Framework.Assert.AreEqual(10, value.GetLength());
            // Position should be 12 right after "1234567890\r\n"
            NUnit.Framework.Assert.AreEqual(12, reader.GetPos());
            reader.Next(key, value);
            // Get second record:"12"
            NUnit.Framework.Assert.AreEqual(2, value.GetLength());
            // Position should be 16 right after "1234567890\r\n12\r\n"
            NUnit.Framework.Assert.AreEqual(16, reader.GetPos());
            NUnit.Framework.Assert.IsFalse(reader.Next(key, value));
            split  = new FileSplit(inputFile, 15, 4, (string[])null);
            reader = new LineRecordReader(conf, split, null);
            // The second split dropped the first record "\n"
            // The position should be 16 right after "1234567890\r\n12\r\n"
            NUnit.Framework.Assert.AreEqual(16, reader.GetPos());
            reader.Next(key, value);
            // Get third record:"345"
            NUnit.Framework.Assert.AreEqual(3, value.GetLength());
            // Position should be 19 right after "1234567890\r\n12\r\n345"
            NUnit.Framework.Assert.AreEqual(19, reader.GetPos());
            NUnit.Framework.Assert.IsFalse(reader.Next(key, value));
            NUnit.Framework.Assert.AreEqual(19, reader.GetPos());
            inputData = "123456789\r\r\n";
            inputFile = CreateInputFile(conf, inputData);
            split     = new FileSplit(inputFile, 0, 12, (string[])null);
            reader    = new LineRecordReader(conf, split, null);
            reader.Next(key, value);
            // Get first record:"123456789"
            NUnit.Framework.Assert.AreEqual(9, value.GetLength());
            // Position should be 10 right after "123456789\r"
            NUnit.Framework.Assert.AreEqual(10, reader.GetPos());
            reader.Next(key, value);
            // Get second record:""
            NUnit.Framework.Assert.AreEqual(0, value.GetLength());
            // Position should be 12 right after "123456789\r\r\n"
            NUnit.Framework.Assert.AreEqual(12, reader.GetPos());
            NUnit.Framework.Assert.IsFalse(reader.Next(key, value));
            NUnit.Framework.Assert.AreEqual(12, reader.GetPos());
        }
Exemplo n.º 12
0
        /// <exception cref="System.IO.IOException"/>
        private void TestSplitRecordsForFile(Configuration conf, long firstSplitLength, long
                                             testFileSize, Path testFilePath)
        {
            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            NUnit.Framework.Assert.IsTrue("unexpected test data at " + testFilePath, testFileSize
                                          > firstSplitLength);
            string delimiter = conf.Get("textinputformat.record.delimiter");

            byte[] recordDelimiterBytes = null;
            if (null != delimiter)
            {
                recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets.Utf8
                                                                         );
            }
            // read the data without splitting to count the records
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, (string[])null);
            LineRecordReader reader = new LineRecordReader(conf, split, recordDelimiterBytes);
            LongWritable     key    = new LongWritable();
            Text             value  = new Text();
            int numRecordsNoSplits  = 0;

            while (reader.Next(key, value))
            {
                ++numRecordsNoSplits;
            }
            reader.Close();
            // count the records in the first split
            split  = new FileSplit(testFilePath, 0, firstSplitLength, (string[])null);
            reader = new LineRecordReader(conf, split, recordDelimiterBytes);
            int numRecordsFirstSplit = 0;

            while (reader.Next(key, value))
            {
                ++numRecordsFirstSplit;
            }
            reader.Close();
            // count the records in the second split
            split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength
                                  , (string[])null);
            reader = new LineRecordReader(conf, split, recordDelimiterBytes);
            int numRecordsRemainingSplits = 0;

            while (reader.Next(key, value))
            {
                ++numRecordsRemainingSplits;
            }
            reader.Close();
            NUnit.Framework.Assert.AreEqual("Unexpected number of records in split", numRecordsNoSplits
                                            , numRecordsFirstSplit + numRecordsRemainingSplits);
        }
Exemplo n.º 13
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Map(Text key, LongWritable value, OutputCollector <K, LongWritable
                                                                                   > collector, Reporter reporter)
            {
                string name = key.ToString();
                long   size = value.Get();
                long   seed = long.Parse(name);

                if (size == 0)
                {
                    return;
                }
                reporter.SetStatus("opening " + name);
                FSDataInputStream @in = fs.Open(new Path(DataDir, name));

                try
                {
                    for (int i = 0; i < SeeksPerFile; i++)
                    {
                        // generate a random position
                        long position = Math.Abs(random.NextLong()) % size;
                        // seek file to that position
                        reporter.SetStatus("seeking " + name);
                        @in.Seek(position);
                        byte b = @in.ReadByte();
                        // check that byte matches
                        byte checkByte = 0;
                        // advance random state to that position
                        random.SetSeed(seed);
                        for (int p = 0; p <= position; p += check.Length)
                        {
                            reporter.SetStatus("generating data for " + name);
                            if (fastCheck)
                            {
                                checkByte = unchecked ((byte)random.Next(byte.MaxValue));
                            }
                            else
                            {
                                random.NextBytes(check);
                                checkByte = check[(int)(position % check.Length)];
                            }
                        }
                        NUnit.Framework.Assert.AreEqual(b, checkByte);
                    }
                }
                finally
                {
                    @in.Close();
                }
            }
        /// <exception cref="System.IO.IOException"/>
        public virtual void RunTest(SequenceFile.CompressionType compressionType)
        {
            JobConf    job     = new JobConf();
            FileSystem fs      = FileSystem.GetLocal(job);
            Path       dir     = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file    = new Path(dir, "test.seq");
            Path       tempDir = new Path(dir, "tmp");

            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            fs.Mkdirs(tempDir);
            LongWritable tkey = new LongWritable();
            Text         tval = new Text();

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable
                                                                                         ), typeof(Text), compressionType, new DefaultCodec());
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(1234);
                    tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue");
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            long fileLength = fs.GetFileStatus(file).GetLen();

            Log.Info("With compression = " + compressionType + ": " + "compressed length = "
                     + fileLength);
            SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator
                                                                     (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job);
            Path[] paths = new Path[] { file };
            SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false
                                                                         );
            int count = 0;

            while (rIter.Next())
            {
                count++;
            }
            NUnit.Framework.Assert.AreEqual(Records, count);
            NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get());
        }
        /// <exception cref="System.IO.IOException"/>
        private static IList <Text> ReadSplit(TextInputFormat format, InputSplit split, JobConf
                                              jobConf)
        {
            IList <Text> result = new AList <Text>();
            RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, jobConf,
                                                                              voidReporter);
            LongWritable key   = reader.CreateKey();
            Text         value = reader.CreateValue();

            while (reader.Next(key, value))
            {
                result.AddItem(value);
                value = reader.CreateValue();
            }
            reader.Close();
            return(result);
        }
Exemplo n.º 16
0
            /// <exception cref="System.IO.IOException"/>
            public override IList <InputSplit> GetSplits(JobContext job)
            {
                Configuration      conf   = job.GetConfiguration();
                Path               src    = new Path(conf.Get(IndirectInputFile, null));
                FileSystem         fs     = src.GetFileSystem(conf);
                IList <InputSplit> splits = new AList <InputSplit>();
                LongWritable       key    = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf); sl.Next(key
                                                                                              , value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(splits);
            }
Exemplo n.º 17
0
            /// <exception cref="System.IO.IOException"/>
            public virtual InputSplit[] GetSplits(JobConf job, int numSplits)
            {
                Path       src = new Path(job.Get(GenericMRLoadGenerator.IndirectInputFile, null));
                FileSystem fs  = src.GetFileSystem(job);
                AList <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit> splits = new AList
                                                                                          <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit>(numSplits);
                LongWritable key = new LongWritable();

                Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text();
                for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job); sl.Next(key,
                                                                                             value);)
                {
                    splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path
                                                                                                    (value.ToString()), key.Get()));
                }
                return(Sharpen.Collections.ToArray(splits, new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit
                                                   [splits.Count]));
            }
Exemplo n.º 18
0
        /// <summary>test DBRecordReader.</summary>
        /// <remarks>test DBRecordReader. This reader should creates keys, values, know about position..
        ///     </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestDBRecordReader()
        {
            JobConf         job      = Org.Mockito.Mockito.Mock <JobConf>();
            DBConfiguration dbConfig = Org.Mockito.Mockito.Mock <DBConfiguration>();

            string[] fields = new string[] { "field1", "filed2" };
            DBInputFormat.DBRecordReader reader = new DBInputFormat.DBRecordReader(this, new
                                                                                   DBInputFormat.DBInputSplit(), typeof(DBInputFormat.NullDBWritable), job, DriverForTest
                                                                                   .GetConnection(), dbConfig, "condition", fields, "table");
            LongWritable key = reader.CreateKey();

            NUnit.Framework.Assert.AreEqual(0, key.Get());
            DBWritable value = ((DBWritable)reader.CreateValue());

            NUnit.Framework.Assert.AreEqual("org.apache.hadoop.mapred.lib.db.DBInputFormat$NullDBWritable"
                                            , value.GetType().FullName);
            NUnit.Framework.Assert.AreEqual(0, reader.GetPos());
            NUnit.Framework.Assert.IsFalse(reader.Next(key, value));
        }
Exemplo n.º 19
0
        // A reporter that does nothing
        /// <exception cref="System.IO.IOException"/>
        internal virtual void CheckFormat(JobConf job, int expectedN)
        {
            NLineInputFormat format = new NLineInputFormat();

            format.Configure(job);
            int ignoredNumSplits = 1;

            InputSplit[] splits = format.GetSplits(job, ignoredNumSplits);
            // check all splits except last one
            int count = 0;

            for (int j = 0; j < splits.Length - 1; j++)
            {
                NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[j].GetLocations
                                                    ().Length);
                RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], job,
                                                                                  voidReporter);
                Type readerClass = reader.GetType();
                NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader
                                                                                            ), readerClass);
                LongWritable key      = reader.CreateKey();
                Type         keyClass = key.GetType();
                NUnit.Framework.Assert.AreEqual("Key class is LongWritable.", typeof(LongWritable
                                                                                     ), keyClass);
                Text value      = reader.CreateValue();
                Type valueClass = value.GetType();
                NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass);
                try
                {
                    count = 0;
                    while (reader.Next(key, value))
                    {
                        count++;
                    }
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("number of lines in split is " + expectedN, expectedN
                                                , count);
            }
        }
Exemplo n.º 20
0
        private void ValidateInnerKeyValue(IntWritable k, TupleWritable v, int tupleSize,
                                           bool firstTuple, bool secondTuple)
        {
            string kvstr = "Unexpected tuple: " + Stringify(k, v);

            NUnit.Framework.Assert.IsTrue(kvstr, v.Size() == tupleSize);
            int          key  = k.Get();
            IntWritable  val0 = null;
            IntWritable  val1 = null;
            LongWritable val2 = null;

            NUnit.Framework.Assert.IsTrue(kvstr, key % 2 == 0 && key / 2 <= Items);
            NUnit.Framework.Assert.IsTrue(kvstr, key % 3 == 0 && key / 3 <= Items);
            NUnit.Framework.Assert.IsTrue(kvstr, key % 4 == 0 && key / 4 <= Items);
            if (firstTuple)
            {
                TupleWritable v0 = ((TupleWritable)v.Get(0));
                val0 = (IntWritable)v0.Get(0);
                val1 = (IntWritable)v0.Get(1);
                val2 = (LongWritable)v.Get(1);
            }
            else
            {
                if (secondTuple)
                {
                    val0 = (IntWritable)v.Get(0);
                    TupleWritable v1 = ((TupleWritable)v.Get(1));
                    val1 = (IntWritable)v1.Get(0);
                    val2 = (LongWritable)v1.Get(1);
                }
                else
                {
                    val0 = (IntWritable)v.Get(0);
                    val1 = (IntWritable)v.Get(1);
                    val2 = (LongWritable)v.Get(2);
                }
            }
            NUnit.Framework.Assert.IsTrue(kvstr, val0.Get() == 0);
            NUnit.Framework.Assert.IsTrue(kvstr, val1.Get() == 1);
            NUnit.Framework.Assert.IsTrue(kvstr, val2.Get() == 2);
        }
Exemplo n.º 21
0
        /// <summary>
        /// test on
        /// <see cref="Reader"/>
        /// iteration methods
        /// <pre>
        /// <c>next(), seek()</c>
        /// in and out of range.
        /// </pre>
        /// </summary>
        public virtual void TestArrayFileIteration()
        {
            int           Size = 10;
            Configuration conf = new Configuration();

            try
            {
                FileSystem       fs     = FileSystem.Get(conf);
                ArrayFile.Writer writer = new ArrayFile.Writer(conf, fs, TestFile, typeof(LongWritable
                                                                                          ), SequenceFile.CompressionType.Record, defaultProgressable);
                NUnit.Framework.Assert.IsNotNull("testArrayFileIteration error !!!", writer);
                for (int i = 0; i < Size; i++)
                {
                    writer.Append(new LongWritable(i));
                }
                writer.Close();
                ArrayFile.Reader reader       = new ArrayFile.Reader(fs, TestFile, conf);
                LongWritable     nextWritable = new LongWritable(0);
                for (int i_1 = 0; i_1 < Size; i_1++)
                {
                    nextWritable = (LongWritable)reader.Next(nextWritable);
                    Assert.Equal(nextWritable.Get(), i_1);
                }
                Assert.True("testArrayFileIteration seek error !!!", reader.Seek
                                (new LongWritable(6)));
                nextWritable = (LongWritable)reader.Next(nextWritable);
                Assert.True("testArrayFileIteration error !!!", reader.Key() ==
                            7);
                Assert.True("testArrayFileIteration error !!!", nextWritable.Equals
                                (new LongWritable(7)));
                NUnit.Framework.Assert.IsFalse("testArrayFileIteration error !!!", reader.Seek(new
                                                                                               LongWritable(Size + 5)));
                reader.Close();
            }
            catch (Exception)
            {
                Fail("testArrayFileWriterConstruction error !!!");
            }
        }
Exemplo n.º 22
0
        public virtual void TestStripBOM()
        {
            // the test data contains a BOM at the start of the file
            // confirm the BOM is skipped by LineRecordReader
            string Utf8Bom     = "\uFEFF";
            Uri    testFileUrl = GetType().GetClassLoader().GetResource("testBOM.txt");

            NUnit.Framework.Assert.IsNotNull("Cannot find testBOM.txt", testFileUrl);
            FilePath      testFile     = new FilePath(testFileUrl.GetFile());
            Path          testFilePath = new Path(testFile.GetAbsolutePath());
            long          testFileSize = testFile.Length();
            Configuration conf         = new Configuration();

            conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue);
            // read the data and check whether BOM is skipped
            FileSplit        split  = new FileSplit(testFilePath, 0, testFileSize, (string[])null);
            LineRecordReader reader = new LineRecordReader(conf, split);
            LongWritable     key    = new LongWritable();
            Text             value  = new Text();
            int  numRecords         = 0;
            bool firstLine          = true;
            bool skipBOM            = true;

            while (reader.Next(key, value))
            {
                if (firstLine)
                {
                    firstLine = false;
                    if (value.ToString().StartsWith(Utf8Bom))
                    {
                        skipBOM = false;
                    }
                }
                ++numRecords;
            }
            reader.Close();
            NUnit.Framework.Assert.IsTrue("BOM is not skipped", skipBOM);
        }
Exemplo n.º 23
0
        /// <exception cref="System.IO.IOException"/>
        private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit
                                                split, JobConf job)
        {
            IList <string> result = new AList <string>();
            RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                       job, voidReporter);
            LongWritable  key   = reader.CreateKey();
            BytesWritable value = reader.CreateValue();

            try
            {
                while (reader.Next(key, value))
                {
                    result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength
                                                                         ()));
                }
            }
            finally
            {
                reader.Close();
            }
            return(result);
        }
Exemplo n.º 24
0
        /// <exception cref="System.Exception"/>
        public virtual void TestMRMaxLine()
        {
            int         Maxpos  = 1024 * 1024;
            int         Maxline = 10 * 1024;
            int         Buf     = 64 * 1024;
            InputStream infNull = new _InputStream_343(Buf);
            // max LRR pos + LineReader buf
            LongWritable key = new LongWritable();
            Text         val = new Text();

            Log.Info("Reading a line from /dev/null");
            Configuration conf = new Configuration(false);

            conf.SetInt(LineRecordReader.MaxLineLength, Maxline);
            conf.SetInt("io.file.buffer.size", Buf);
            // used by LRR
            // test another constructor
            LineRecordReader lrr = new LineRecordReader(infNull, 0, Maxpos, conf);

            NUnit.Framework.Assert.IsFalse("Read a line from null", lrr.Next(key, val));
            infNull.Reset();
            lrr = new LineRecordReader(infNull, 0L, Maxline, Maxpos);
            NUnit.Framework.Assert.IsFalse("Read a line from null", lrr.Next(key, val));
        }
Exemplo n.º 25
0
        /// <exception cref="System.IO.IOException"/>
        private static Path[] GenerateSources(Configuration conf)
        {
            for (int i = 0; i < Sources; ++i)
            {
                source[i] = new int[Items];
                for (int j = 0; j < Items; ++j)
                {
                    source[i][j] = (i + 2) * (j + 1);
                }
            }
            Path[] src = new Path[Sources];
            SequenceFile.Writer[] @out = CreateWriters(@base, conf, Sources, src);
            IntWritable           k    = new IntWritable();

            for (int i_1 = 0; i_1 < Sources; ++i_1)
            {
                Writable v;
                if (i_1 != Sources - 1)
                {
                    v = new IntWritable();
                    ((IntWritable)v).Set(i_1);
                }
                else
                {
                    v = new LongWritable();
                    ((LongWritable)v).Set(i_1);
                }
                for (int j = 0; j < Items; ++j)
                {
                    k.Set(source[i_1][j]);
                    @out[i_1].Append(k, v);
                }
                @out[i_1].Close();
            }
            return(src);
        }
Exemplo n.º 26
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            Job    job    = Job.GetInstance(new Configuration(defaultConf));
            Random random = new Random();
            long   seed   = random.NextLong();

            Log.Info("seed = " + seed);
            random.SetSeed(seed);
            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int length   = 10000;
            int numFiles = 10;

            // create files with various lengths
            CreateFiles(length, numFiles, random);
            // create a combined split for the files
            CombineTextInputFormat format = new CombineTextInputFormat();

            for (int i = 0; i < 3; i++)
            {
                int numSplits = random.Next(length / 20) + 1;
                Log.Info("splitting: requesting = " + numSplits);
                IList <InputSplit> splits = format.GetSplits(job);
                Log.Info("splitting: got =        " + splits.Count);
                // we should have a single split as the length is comfortably smaller than
                // the block size
                NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count);
                InputSplit split = splits[0];
                NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit
                                                                                        ), split.GetType());
                // check the split
                BitSet bits = new BitSet(length);
                Log.Debug("split= " + split);
                TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                .GetConfiguration());
                RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context
                                                                                     );
                NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof(
                                                    CombineFileRecordReader), reader.GetType());
                MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl <
                    LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID
                                                                (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split);
                reader.Initialize(split, mcontext);
                try
                {
                    int count = 0;
                    while (reader.NextKeyValue())
                    {
                        LongWritable key = reader.GetCurrentKey();
                        NUnit.Framework.Assert.IsNotNull("Key should not be null.", key);
                        Text value = reader.GetCurrentValue();
                        int  v     = System.Convert.ToInt32(value.ToString());
                        Log.Debug("read " + v);
                        NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                        bits.Set(v);
                        count++;
                    }
                    Log.Debug("split=" + split + " count=" + count);
                }
                finally
                {
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                    ());
            }
        }
Exemplo n.º 27
0
 /// <summary>
 /// <inheritDoc/>
 ///
 /// </summary>
 /// <exception cref="System.IO.IOException"/>
 public override bool Next(LongWritable key, T value)
 {
     return(base.Next(key, value));
 }
Exemplo n.º 28
0
 /// <exception cref="System.IO.IOException"/>
 public virtual bool Next(LongWritable key, T value)
 {
     return(rr.Next(key, value));
 }
Exemplo n.º 29
0
        /// <exception cref="System.IO.IOException"/>
        private void RunRandomTests(CompressionCodec codec)
        {
            StringBuilder fileName = new StringBuilder("testFormat.txt");

            if (codec != null)
            {
                fileName.Append(".gz");
            }
            localFs.Delete(workDir, true);
            Path file = new Path(workDir, fileName.ToString());
            int  seed = new Random().Next();

            Log.Info("Seed = " + seed);
            Random        random   = new Random(seed);
            int           MaxTests = 20;
            LongWritable  key      = new LongWritable();
            BytesWritable value    = new BytesWritable();

            for (int i = 0; i < MaxTests; i++)
            {
                Log.Info("----------------------------------------------------------");
                // Maximum total records of 999
                int totalRecords = random.Next(999) + 1;
                // Test an empty file
                if (i == 8)
                {
                    totalRecords = 0;
                }
                // Maximum bytes in a record of 100K
                int recordLength = random.Next(1024 * 100) + 1;
                // For the 11th test, force a record length of 1
                if (i == 10)
                {
                    recordLength = 1;
                }
                // The total bytes in the test file
                int fileSize = (totalRecords * recordLength);
                Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
                // Create the job
                JobConf job = new JobConf(defaultConf);
                if (codec != null)
                {
                    ReflectionUtils.SetConf(codec, job);
                }
                // Create the test file
                AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords);
                NUnit.Framework.Assert.IsTrue(localFs.Exists(file));
                //set the fixed length record length config property for the job
                FixedLengthInputFormat.SetRecordLength(job, recordLength);
                int numSplits = 1;
                // Arbitrarily set number of splits.
                if (i > 0)
                {
                    if (i == (MaxTests - 1))
                    {
                        // Test a split size that is less than record len
                        numSplits = (int)(fileSize / Math.Floor(recordLength / 2));
                    }
                    else
                    {
                        if (MaxTests % i == 0)
                        {
                            // Let us create a split size that is forced to be
                            // smaller than the end file itself, (ensures 1+ splits)
                            numSplits = fileSize / (fileSize - random.Next(fileSize));
                        }
                        else
                        {
                            // Just pick a random split size with no upper bound
                            numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue));
                        }
                    }
                    Log.Info("Number of splits set to: " + numSplits);
                }
                // Setup the input path
                FileInputFormat.SetInputPaths(job, workDir);
                // Try splitting the file in a variety of sizes
                FixedLengthInputFormat format = new FixedLengthInputFormat();
                format.Configure(job);
                InputSplit[] splits = format.GetSplits(job, numSplits);
                Log.Info("Actual number of splits = " + splits.Length);
                // Test combined split lengths = total file size
                long recordOffset = 0;
                int  recordNumber = 0;
                foreach (InputSplit split in splits)
                {
                    RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split,
                                                                                               job, voidReporter);
                    Type clazz = reader.GetType();
                    NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:"
                                                    , typeof(FixedLengthRecordReader), clazz);
                    // Plow through the records in this split
                    while (reader.Next(key, value))
                    {
                        NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength
                                                                               ), key.Get());
                        string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value
                                                                               .GetLength());
                        NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength
                                                            ());
                        NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber
                                                      < totalRecords);
                        string origRecord = recordList[recordNumber];
                        NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString
                                                        );
                        recordNumber++;
                    }
                    reader.Close();
                }
                NUnit.Framework.Assert.AreEqual("Total original records should be total read records:"
                                                , recordList.Count, recordNumber);
            }
        }
Exemplo n.º 30
0
        public virtual void TestSortedLongWritable()
        {
            Configuration      conf = new Configuration();
            Path               path = new Path(Root, name);
            FileSystem         fs   = path.GetFileSystem(conf);
            FSDataOutputStream @out = fs.Create(path);

            try
            {
                TFile.Writer writer = new TFile.Writer(@out, BlockSize, "gz", jClassLongWritableComparator
                                                       , conf);
                try
                {
                    LongWritable key = new LongWritable(0);
                    for (long i = 0; i < Nentry; ++i)
                    {
                        key.Set(Cube(i - Nentry / 2));
                        DataOutputStream dos = writer.PrepareAppendKey(-1);
                        try
                        {
                            key.Write(dos);
                        }
                        finally
                        {
                            dos.Close();
                        }
                        dos = writer.PrepareAppendValue(-1);
                        try
                        {
                            dos.Write(Runtime.GetBytesForString(BuildValue(i)));
                        }
                        finally
                        {
                            dos.Close();
                        }
                    }
                }
                finally
                {
                    writer.Close();
                }
            }
            finally
            {
                @out.Close();
            }
            FSDataInputStream @in = fs.Open(path);

            try
            {
                TFile.Reader reader = new TFile.Reader(@in, fs.GetFileStatus(path).GetLen(), conf
                                                       );
                try
                {
                    TFile.Reader.Scanner scanner = reader.CreateScanner();
                    long          i     = 0;
                    BytesWritable value = new BytesWritable();
                    for (; !scanner.AtEnd(); scanner.Advance())
                    {
                        scanner.Entry().GetValue(value);
                        Assert.Equal(BuildValue(i), Runtime.GetStringForBytes(
                                         value.GetBytes(), 0, value.GetLength()));
                        ++i;
                    }
                }
                finally
                {
                    reader.Close();
                }
            }
            finally
            {
                @in.Close();
            }
        }