/// <summary>Map file name and offset into statistical data.</summary> /// <remarks> /// Map file name and offset into statistical data. /// <p> /// The map task is to get the /// <tt>key</tt>, which contains the file name, and the /// <tt>value</tt>, which is the offset within the file. /// The parameters are passed to the abstract method /// <see cref="IOMapperBase{T}.DoIO(Org.Apache.Hadoop.Mapred.Reporter, string, long)" /// /> /// , which performs the io operation, /// usually read or write data, and then /// <see cref="IOMapperBase{T}.CollectStats(Org.Apache.Hadoop.Mapred.OutputCollector{K, V}, string, long, object) /// "/> /// /// is called to prepare stat data for a subsequent reducer. /// </remarks> /// <exception cref="System.IO.IOException"/> public virtual void Map(Text key, LongWritable value, OutputCollector <Text, Text> output, Reporter reporter) { string name = key.ToString(); long longValue = value.Get(); reporter.SetStatus("starting " + name + " ::host = " + hostName); this.stream = GetIOStream(name); T statValue = null; long tStart = Runtime.CurrentTimeMillis(); try { statValue = DoIO(reporter, name, longValue); } finally { if (stream != null) { stream.Close(); } } long tEnd = Runtime.CurrentTimeMillis(); long execTime = tEnd - tStart; CollectStats(output, name, execTime, statValue); reporter.SetStatus("finished " + name + " ::host = " + hostName); }
public virtual void TestInitNextRecordReader() { JobConf conf = new JobConf(); Path[] paths = new Path[3]; long[] fileLength = new long[3]; FilePath[] files = new FilePath[3]; LongWritable key = new LongWritable(1); Text value = new Text(); try { for (int i = 0; i < 3; i++) { fileLength[i] = i; FilePath dir = new FilePath(outDir.ToString()); dir.Mkdir(); files[i] = new FilePath(dir, "testfile" + i); FileWriter fileWriter = new FileWriter(files[i]); fileWriter.Close(); paths[i] = new Path(outDir + "/testfile" + i); } CombineFileSplit combineFileSplit = new CombineFileSplit(conf, paths, fileLength); Reporter reporter = Org.Mockito.Mockito.Mock <Reporter>(); CombineFileRecordReader cfrr = new CombineFileRecordReader(conf, combineFileSplit , reporter, typeof(TestCombineFileRecordReader.TextRecordReaderWrapper)); Org.Mockito.Mockito.Verify(reporter).Progress(); NUnit.Framework.Assert.IsFalse(cfrr.Next(key, value)); Org.Mockito.Mockito.Verify(reporter, Org.Mockito.Mockito.Times(3)).Progress(); } finally { FileUtil.FullyDelete(new FilePath(outDir.ToString())); } }
public virtual void TestMultipleClose() { Uri testFileUrl = GetType().GetClassLoader().GetResource("recordSpanningMultipleSplits.txt.bz2" ); NUnit.Framework.Assert.IsNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2" , testFileUrl); FilePath testFile = new FilePath(testFileUrl.GetFile()); Path testFilePath = new Path(testFile.GetAbsolutePath()); long testFileSize = testFile.Length(); Configuration conf = new Configuration(); conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue); FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (string[])null); LineRecordReader reader = new LineRecordReader(conf, split); LongWritable key = new LongWritable(); Text value = new Text(); //noinspection StatementWithEmptyBody while (reader.Next(key, value)) { } reader.Close(); reader.Close(); BZip2Codec codec = new BZip2Codec(); codec.SetConf(conf); ICollection <Decompressor> decompressors = new HashSet <Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.AddItem(CodecPool.GetDecompressor(codec)); } NUnit.Framework.Assert.AreEqual(10, decompressors.Count); }
// Use the LineRecordReader to read records from the file /// <exception cref="System.IO.IOException"/> public virtual AList <string> ReadRecords(Uri testFileUrl, int splitSize) { // Set up context FilePath testFile = new FilePath(testFileUrl.GetFile()); long testFileSize = testFile.Length(); Path testFilePath = new Path(testFile.GetAbsolutePath()); Configuration conf = new Configuration(); conf.SetInt("io.file.buffer.size", 1); // Gather the records returned by the record reader AList <string> records = new AList <string>(); long offset = 0; LongWritable key = new LongWritable(); Text value = new Text(); while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, (string[])null); LineRecordReader reader = new LineRecordReader(conf, split); while (reader.Next(key, value)) { records.AddItem(value.ToString()); } offset += splitSize; } return(records); }
private void addEvent(int operation, long currentTransaction, long rowId, Object row) { this.operation.set(operation); this.currentTransaction.set(currentTransaction); // If this is an insert, originalTransaction should be set to this transaction. If not, // it will be reset by the following if anyway. long originalTransaction = currentTransaction; if (operation == DELETE_OPERATION || operation == UPDATE_OPERATION) { Object rowIdValue = rowInspector.getStructFieldData(row, recIdField); originalTransaction = origTxnInspector.get( recIdInspector.getStructFieldData(rowIdValue, originalTxnField)); rowId = rowIdInspector.get(recIdInspector.getStructFieldData(rowIdValue, rowIdField)); } else if (operation == INSERT_OPERATION) { rowId += rowIdOffset; } this.rowId.set(rowId); this.originalTransaction.set(originalTransaction); item.setFieldValue(OrcRecordUpdater.ROW, (operation == DELETE_OPERATION ? null : row)); indexBuilder.addKey(operation, originalTransaction, bucket.get(), rowId); writer.addRow(item); }
/// <summary>write the long value</summary> /// <exception cref="System.IO.IOException"/> internal static void WriteLong(long value, DataOutputStream @out) { LongWritable uLong = TlData.Get().ULong; uLong.Set(value); uLong.Write(@out); }
/// <summary>read the long value</summary> /// <exception cref="System.IO.IOException"/> internal static long ReadLong(DataInput @in) { LongWritable uLong = TlData.Get().ULong; uLong.ReadFields(@in); return(uLong.Get()); }
/// <summary> /// <inheritDoc/> /// /// </summary> /// <exception cref="System.IO.IOException"/> public override bool NextKeyValue() { try { if (key == null) { key = new LongWritable(); } if (value == null) { value = CreateValue(); } if (null == this.results) { // First time into this method, run the query. this.results = ExecuteQuery(GetSelectQuery()); } if (!results.Next()) { return(false); } // Set the key field value as the output key value key.Set(pos + split.GetStart()); value.ReadFields(results); pos++; } catch (SQLException e) { throw new IOException("SQLException in nextKeyValue", e); } return(true); }
/// <summary> /// Transfers data from FileChannel using /// <see cref="FileChannel.TransferTo(long, long, WritableByteChannel) /// "/> /// . /// Updates <code>waitForWritableTime</code> and <code>transferToTime</code> /// with the time spent blocked on the network and the time spent transferring /// data from disk to network respectively. /// Similar to readFully(), this waits till requested amount of /// data is transfered. /// </summary> /// <param name="fileCh">FileChannel to transfer data from.</param> /// <param name="position">position within the channel where the transfer begins</param> /// <param name="count">number of bytes to transfer.</param> /// <param name="waitForWritableTime"> /// nanoseconds spent waiting for the socket /// to become writable /// </param> /// <param name="transferTime">nanoseconds spent transferring data</param> /// <exception cref="System.IO.EOFException"> /// /// If end of input file is reached before requested number of /// bytes are transfered. /// </exception> /// <exception cref="SocketTimeoutException"> /// /// If this channel blocks transfer longer than timeout for /// this stream. /// </exception> /// <exception cref="System.IO.IOException"> /// Includes any exception thrown by /// <see cref="FileChannel.TransferTo(long, long, WritableByteChannel) /// "/> /// . /// </exception> public virtual void TransferToFully(FileChannel fileCh, long position, int count, LongWritable waitForWritableTime, LongWritable transferToTime) { long waitTime = 0; long transferTime = 0; while (count > 0) { /* * Ideally we should wait after transferTo returns 0. But because of * a bug in JRE on Linux (http://bugs.sun.com/view_bug.do?bug_id=5103988), * which throws an exception instead of returning 0, we wait for the * channel to be writable before writing to it. If you ever see * IOException with message "Resource temporarily unavailable" * thrown here, please let us know. * * Once we move to JAVA SE 7, wait should be moved to correct place. */ long start = Runtime.NanoTime(); WaitForWritable(); long wait = Runtime.NanoTime(); int nTransfered = (int)fileCh.TransferTo(position, count, GetChannel()); if (nTransfered == 0) { //check if end of file is reached. if (position >= fileCh.Size()) { throw new EOFException("EOF Reached. file size is " + fileCh.Size() + " and " + count + " more bytes left to be " + "transfered."); } } else { //otherwise assume the socket is full. //waitForWritable(); // see comment above. if (nTransfered < 0) { throw new IOException("Unexpected return of " + nTransfered + " from transferTo()" ); } else { position += nTransfered; count -= nTransfered; } } long transfer = Runtime.NanoTime(); waitTime += wait - start; transferTime += transfer - wait; } if (waitForWritableTime != null) { waitForWritableTime.Set(waitTime); } if (transferToTime != null) { transferToTime.Set(transferTime); } }
/// <exception cref="System.IO.IOException"/> public KeyValueLineRecordReader(Configuration job, FileSplit split) { lineRecordReader = new LineRecordReader(job, split); dummyKey = lineRecordReader.CreateKey(); innerValue = lineRecordReader.CreateValue(); string sepStr = job.Get("mapreduce.input.keyvaluelinerecordreader.key.value.separator" , "\t"); this.separator = unchecked ((byte)sepStr[0]); }
public virtual void TestUncompressedInputDefaultDelimiterPosValue() { Configuration conf = new Configuration(); string inputData = "1234567890\r\n12\r\n345"; Path inputFile = CreateInputFile(conf, inputData); conf.SetInt("io.file.buffer.size", 10); conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue); FileSplit split = new FileSplit(inputFile, 0, 15, (string[])null); LineRecordReader reader = new LineRecordReader(conf, split, null); LongWritable key = new LongWritable(); Text value = new Text(); reader.Next(key, value); // Get first record:"1234567890" NUnit.Framework.Assert.AreEqual(10, value.GetLength()); // Position should be 12 right after "1234567890\r\n" NUnit.Framework.Assert.AreEqual(12, reader.GetPos()); reader.Next(key, value); // Get second record:"12" NUnit.Framework.Assert.AreEqual(2, value.GetLength()); // Position should be 16 right after "1234567890\r\n12\r\n" NUnit.Framework.Assert.AreEqual(16, reader.GetPos()); NUnit.Framework.Assert.IsFalse(reader.Next(key, value)); split = new FileSplit(inputFile, 15, 4, (string[])null); reader = new LineRecordReader(conf, split, null); // The second split dropped the first record "\n" // The position should be 16 right after "1234567890\r\n12\r\n" NUnit.Framework.Assert.AreEqual(16, reader.GetPos()); reader.Next(key, value); // Get third record:"345" NUnit.Framework.Assert.AreEqual(3, value.GetLength()); // Position should be 19 right after "1234567890\r\n12\r\n345" NUnit.Framework.Assert.AreEqual(19, reader.GetPos()); NUnit.Framework.Assert.IsFalse(reader.Next(key, value)); NUnit.Framework.Assert.AreEqual(19, reader.GetPos()); inputData = "123456789\r\r\n"; inputFile = CreateInputFile(conf, inputData); split = new FileSplit(inputFile, 0, 12, (string[])null); reader = new LineRecordReader(conf, split, null); reader.Next(key, value); // Get first record:"123456789" NUnit.Framework.Assert.AreEqual(9, value.GetLength()); // Position should be 10 right after "123456789\r" NUnit.Framework.Assert.AreEqual(10, reader.GetPos()); reader.Next(key, value); // Get second record:"" NUnit.Framework.Assert.AreEqual(0, value.GetLength()); // Position should be 12 right after "123456789\r\r\n" NUnit.Framework.Assert.AreEqual(12, reader.GetPos()); NUnit.Framework.Assert.IsFalse(reader.Next(key, value)); NUnit.Framework.Assert.AreEqual(12, reader.GetPos()); }
/// <exception cref="System.IO.IOException"/> private void TestSplitRecordsForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) { conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue); NUnit.Framework.Assert.IsTrue("unexpected test data at " + testFilePath, testFileSize > firstSplitLength); string delimiter = conf.Get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) { recordDelimiterBytes = Sharpen.Runtime.GetBytesForString(delimiter, Charsets.Utf8 ); } // read the data without splitting to count the records FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (string[])null); LineRecordReader reader = new LineRecordReader(conf, split, recordDelimiterBytes); LongWritable key = new LongWritable(); Text value = new Text(); int numRecordsNoSplits = 0; while (reader.Next(key, value)) { ++numRecordsNoSplits; } reader.Close(); // count the records in the first split split = new FileSplit(testFilePath, 0, firstSplitLength, (string[])null); reader = new LineRecordReader(conf, split, recordDelimiterBytes); int numRecordsFirstSplit = 0; while (reader.Next(key, value)) { ++numRecordsFirstSplit; } reader.Close(); // count the records in the second split split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength , (string[])null); reader = new LineRecordReader(conf, split, recordDelimiterBytes); int numRecordsRemainingSplits = 0; while (reader.Next(key, value)) { ++numRecordsRemainingSplits; } reader.Close(); NUnit.Framework.Assert.AreEqual("Unexpected number of records in split", numRecordsNoSplits , numRecordsFirstSplit + numRecordsRemainingSplits); }
/// <exception cref="System.IO.IOException"/> public virtual void Map(Text key, LongWritable value, OutputCollector <K, LongWritable > collector, Reporter reporter) { string name = key.ToString(); long size = value.Get(); long seed = long.Parse(name); if (size == 0) { return; } reporter.SetStatus("opening " + name); FSDataInputStream @in = fs.Open(new Path(DataDir, name)); try { for (int i = 0; i < SeeksPerFile; i++) { // generate a random position long position = Math.Abs(random.NextLong()) % size; // seek file to that position reporter.SetStatus("seeking " + name); @in.Seek(position); byte b = @in.ReadByte(); // check that byte matches byte checkByte = 0; // advance random state to that position random.SetSeed(seed); for (int p = 0; p <= position; p += check.Length) { reporter.SetStatus("generating data for " + name); if (fastCheck) { checkByte = unchecked ((byte)random.Next(byte.MaxValue)); } else { random.NextBytes(check); checkByte = check[(int)(position % check.Length)]; } } NUnit.Framework.Assert.AreEqual(b, checkByte); } } finally { @in.Close(); } }
/// <exception cref="System.IO.IOException"/> public virtual void RunTest(SequenceFile.CompressionType compressionType) { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Path tempDir = new Path(dir, "tmp"); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); fs.Mkdirs(tempDir); LongWritable tkey = new LongWritable(); Text tval = new Text(); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable ), typeof(Text), compressionType, new DefaultCodec()); try { for (int i = 0; i < Records; ++i) { tkey.Set(1234); tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue"); writer.Append(tkey, tval); } } finally { writer.Close(); } long fileLength = fs.GetFileStatus(file).GetLen(); Log.Info("With compression = " + compressionType + ": " + "compressed length = " + fileLength); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job); Path[] paths = new Path[] { file }; SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false ); int count = 0; while (rIter.Next()) { count++; } NUnit.Framework.Assert.AreEqual(Records, count); NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get()); }
/// <exception cref="System.IO.IOException"/> private static IList <Text> ReadSplit(TextInputFormat format, InputSplit split, JobConf jobConf) { IList <Text> result = new AList <Text>(); RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, jobConf, voidReporter); LongWritable key = reader.CreateKey(); Text value = reader.CreateValue(); while (reader.Next(key, value)) { result.AddItem(value); value = reader.CreateValue(); } reader.Close(); return(result); }
/// <exception cref="System.IO.IOException"/> public override IList <InputSplit> GetSplits(JobContext job) { Configuration conf = job.GetConfiguration(); Path src = new Path(conf.Get(IndirectInputFile, null)); FileSystem fs = src.GetFileSystem(conf); IList <InputSplit> splits = new AList <InputSplit>(); LongWritable key = new LongWritable(); Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text(); for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf); sl.Next(key , value);) { splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path (value.ToString()), key.Get())); } return(splits); }
/// <exception cref="System.IO.IOException"/> public virtual InputSplit[] GetSplits(JobConf job, int numSplits) { Path src = new Path(job.Get(GenericMRLoadGenerator.IndirectInputFile, null)); FileSystem fs = src.GetFileSystem(job); AList <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit> splits = new AList <GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit>(numSplits); LongWritable key = new LongWritable(); Org.Apache.Hadoop.IO.Text value = new Org.Apache.Hadoop.IO.Text(); for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job); sl.Next(key, value);) { splits.AddItem(new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit(new Path (value.ToString()), key.Get())); } return(Sharpen.Collections.ToArray(splits, new GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit [splits.Count])); }
/// <summary>test DBRecordReader.</summary> /// <remarks>test DBRecordReader. This reader should creates keys, values, know about position.. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDBRecordReader() { JobConf job = Org.Mockito.Mockito.Mock <JobConf>(); DBConfiguration dbConfig = Org.Mockito.Mockito.Mock <DBConfiguration>(); string[] fields = new string[] { "field1", "filed2" }; DBInputFormat.DBRecordReader reader = new DBInputFormat.DBRecordReader(this, new DBInputFormat.DBInputSplit(), typeof(DBInputFormat.NullDBWritable), job, DriverForTest .GetConnection(), dbConfig, "condition", fields, "table"); LongWritable key = reader.CreateKey(); NUnit.Framework.Assert.AreEqual(0, key.Get()); DBWritable value = ((DBWritable)reader.CreateValue()); NUnit.Framework.Assert.AreEqual("org.apache.hadoop.mapred.lib.db.DBInputFormat$NullDBWritable" , value.GetType().FullName); NUnit.Framework.Assert.AreEqual(0, reader.GetPos()); NUnit.Framework.Assert.IsFalse(reader.Next(key, value)); }
// A reporter that does nothing /// <exception cref="System.IO.IOException"/> internal virtual void CheckFormat(JobConf job, int expectedN) { NLineInputFormat format = new NLineInputFormat(); format.Configure(job); int ignoredNumSplits = 1; InputSplit[] splits = format.GetSplits(job, ignoredNumSplits); // check all splits except last one int count = 0; for (int j = 0; j < splits.Length - 1; j++) { NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[j].GetLocations ().Length); RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], job, voidReporter); Type readerClass = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader ), readerClass); LongWritable key = reader.CreateKey(); Type keyClass = key.GetType(); NUnit.Framework.Assert.AreEqual("Key class is LongWritable.", typeof(LongWritable ), keyClass); Text value = reader.CreateValue(); Type valueClass = value.GetType(); NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass); try { count = 0; while (reader.Next(key, value)) { count++; } } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("number of lines in split is " + expectedN, expectedN , count); } }
private void ValidateInnerKeyValue(IntWritable k, TupleWritable v, int tupleSize, bool firstTuple, bool secondTuple) { string kvstr = "Unexpected tuple: " + Stringify(k, v); NUnit.Framework.Assert.IsTrue(kvstr, v.Size() == tupleSize); int key = k.Get(); IntWritable val0 = null; IntWritable val1 = null; LongWritable val2 = null; NUnit.Framework.Assert.IsTrue(kvstr, key % 2 == 0 && key / 2 <= Items); NUnit.Framework.Assert.IsTrue(kvstr, key % 3 == 0 && key / 3 <= Items); NUnit.Framework.Assert.IsTrue(kvstr, key % 4 == 0 && key / 4 <= Items); if (firstTuple) { TupleWritable v0 = ((TupleWritable)v.Get(0)); val0 = (IntWritable)v0.Get(0); val1 = (IntWritable)v0.Get(1); val2 = (LongWritable)v.Get(1); } else { if (secondTuple) { val0 = (IntWritable)v.Get(0); TupleWritable v1 = ((TupleWritable)v.Get(1)); val1 = (IntWritable)v1.Get(0); val2 = (LongWritable)v1.Get(1); } else { val0 = (IntWritable)v.Get(0); val1 = (IntWritable)v.Get(1); val2 = (LongWritable)v.Get(2); } } NUnit.Framework.Assert.IsTrue(kvstr, val0.Get() == 0); NUnit.Framework.Assert.IsTrue(kvstr, val1.Get() == 1); NUnit.Framework.Assert.IsTrue(kvstr, val2.Get() == 2); }
/// <summary> /// test on /// <see cref="Reader"/> /// iteration methods /// <pre> /// <c>next(), seek()</c> /// in and out of range. /// </pre> /// </summary> public virtual void TestArrayFileIteration() { int Size = 10; Configuration conf = new Configuration(); try { FileSystem fs = FileSystem.Get(conf); ArrayFile.Writer writer = new ArrayFile.Writer(conf, fs, TestFile, typeof(LongWritable ), SequenceFile.CompressionType.Record, defaultProgressable); NUnit.Framework.Assert.IsNotNull("testArrayFileIteration error !!!", writer); for (int i = 0; i < Size; i++) { writer.Append(new LongWritable(i)); } writer.Close(); ArrayFile.Reader reader = new ArrayFile.Reader(fs, TestFile, conf); LongWritable nextWritable = new LongWritable(0); for (int i_1 = 0; i_1 < Size; i_1++) { nextWritable = (LongWritable)reader.Next(nextWritable); Assert.Equal(nextWritable.Get(), i_1); } Assert.True("testArrayFileIteration seek error !!!", reader.Seek (new LongWritable(6))); nextWritable = (LongWritable)reader.Next(nextWritable); Assert.True("testArrayFileIteration error !!!", reader.Key() == 7); Assert.True("testArrayFileIteration error !!!", nextWritable.Equals (new LongWritable(7))); NUnit.Framework.Assert.IsFalse("testArrayFileIteration error !!!", reader.Seek(new LongWritable(Size + 5))); reader.Close(); } catch (Exception) { Fail("testArrayFileWriterConstruction error !!!"); } }
public virtual void TestStripBOM() { // the test data contains a BOM at the start of the file // confirm the BOM is skipped by LineRecordReader string Utf8Bom = "\uFEFF"; Uri testFileUrl = GetType().GetClassLoader().GetResource("testBOM.txt"); NUnit.Framework.Assert.IsNotNull("Cannot find testBOM.txt", testFileUrl); FilePath testFile = new FilePath(testFileUrl.GetFile()); Path testFilePath = new Path(testFile.GetAbsolutePath()); long testFileSize = testFile.Length(); Configuration conf = new Configuration(); conf.SetInt(LineRecordReader.MaxLineLength, int.MaxValue); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (string[])null); LineRecordReader reader = new LineRecordReader(conf, split); LongWritable key = new LongWritable(); Text value = new Text(); int numRecords = 0; bool firstLine = true; bool skipBOM = true; while (reader.Next(key, value)) { if (firstLine) { firstLine = false; if (value.ToString().StartsWith(Utf8Bom)) { skipBOM = false; } } ++numRecords; } reader.Close(); NUnit.Framework.Assert.IsTrue("BOM is not skipped", skipBOM); }
/// <exception cref="System.IO.IOException"/> private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit split, JobConf job) { IList <string> result = new AList <string>(); RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split, job, voidReporter); LongWritable key = reader.CreateKey(); BytesWritable value = reader.CreateValue(); try { while (reader.Next(key, value)) { result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength ())); } } finally { reader.Close(); } return(result); }
/// <exception cref="System.Exception"/> public virtual void TestMRMaxLine() { int Maxpos = 1024 * 1024; int Maxline = 10 * 1024; int Buf = 64 * 1024; InputStream infNull = new _InputStream_343(Buf); // max LRR pos + LineReader buf LongWritable key = new LongWritable(); Text val = new Text(); Log.Info("Reading a line from /dev/null"); Configuration conf = new Configuration(false); conf.SetInt(LineRecordReader.MaxLineLength, Maxline); conf.SetInt("io.file.buffer.size", Buf); // used by LRR // test another constructor LineRecordReader lrr = new LineRecordReader(infNull, 0, Maxpos, conf); NUnit.Framework.Assert.IsFalse("Read a line from null", lrr.Next(key, val)); infNull.Reset(); lrr = new LineRecordReader(infNull, 0L, Maxline, Maxpos); NUnit.Framework.Assert.IsFalse("Read a line from null", lrr.Next(key, val)); }
/// <exception cref="System.IO.IOException"/> private static Path[] GenerateSources(Configuration conf) { for (int i = 0; i < Sources; ++i) { source[i] = new int[Items]; for (int j = 0; j < Items; ++j) { source[i][j] = (i + 2) * (j + 1); } } Path[] src = new Path[Sources]; SequenceFile.Writer[] @out = CreateWriters(@base, conf, Sources, src); IntWritable k = new IntWritable(); for (int i_1 = 0; i_1 < Sources; ++i_1) { Writable v; if (i_1 != Sources - 1) { v = new IntWritable(); ((IntWritable)v).Set(i_1); } else { v = new LongWritable(); ((LongWritable)v).Set(i_1); } for (int j = 0; j < Items; ++j) { k.Set(source[i_1][j]); @out[i_1].Append(k, v); } @out[i_1].Close(); } return(src); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { Job job = Job.GetInstance(new Configuration(defaultConf)); Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create files with various lengths CreateFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / 20) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check the split BitSet bits = new BitSet(length); Log.Debug("split= " + split); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context ); NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof( CombineFileRecordReader), reader.GetType()); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int count = 0; while (reader.NextKeyValue()) { LongWritable key = reader.GetCurrentKey(); NUnit.Framework.Assert.IsNotNull("Key should not be null.", key); Text value = reader.GetCurrentValue(); int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("split=" + split + " count=" + count); } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
/// <summary> /// <inheritDoc/> /// /// </summary> /// <exception cref="System.IO.IOException"/> public override bool Next(LongWritable key, T value) { return(base.Next(key, value)); }
/// <exception cref="System.IO.IOException"/> public virtual bool Next(LongWritable key, T value) { return(rr.Next(key, value)); }
/// <exception cref="System.IO.IOException"/> private void RunRandomTests(CompressionCodec codec) { StringBuilder fileName = new StringBuilder("testFormat.txt"); if (codec != null) { fileName.Append(".gz"); } localFs.Delete(workDir, true); Path file = new Path(workDir, fileName.ToString()); int seed = new Random().Next(); Log.Info("Seed = " + seed); Random random = new Random(seed); int MaxTests = 20; LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < MaxTests; i++) { Log.Info("----------------------------------------------------------"); // Maximum total records of 999 int totalRecords = random.Next(999) + 1; // Test an empty file if (i == 8) { totalRecords = 0; } // Maximum bytes in a record of 100K int recordLength = random.Next(1024 * 100) + 1; // For the 11th test, force a record length of 1 if (i == 10) { recordLength = 1; } // The total bytes in the test file int fileSize = (totalRecords * recordLength); Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength); // Create the job JobConf job = new JobConf(defaultConf); if (codec != null) { ReflectionUtils.SetConf(codec, job); } // Create the test file AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords); NUnit.Framework.Assert.IsTrue(localFs.Exists(file)); //set the fixed length record length config property for the job FixedLengthInputFormat.SetRecordLength(job, recordLength); int numSplits = 1; // Arbitrarily set number of splits. if (i > 0) { if (i == (MaxTests - 1)) { // Test a split size that is less than record len numSplits = (int)(fileSize / Math.Floor(recordLength / 2)); } else { if (MaxTests % i == 0) { // Let us create a split size that is forced to be // smaller than the end file itself, (ensures 1+ splits) numSplits = fileSize / (fileSize - random.Next(fileSize)); } else { // Just pick a random split size with no upper bound numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue)); } } Log.Info("Number of splits set to: " + numSplits); } // Setup the input path FileInputFormat.SetInputPaths(job, workDir); // Try splitting the file in a variety of sizes FixedLengthInputFormat format = new FixedLengthInputFormat(); format.Configure(job); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Info("Actual number of splits = " + splits.Length); // Test combined split lengths = total file size long recordOffset = 0; int recordNumber = 0; foreach (InputSplit split in splits) { RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split, job, voidReporter); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:" , typeof(FixedLengthRecordReader), clazz); // Plow through the records in this split while (reader.Next(key, value)) { NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength ), key.Get()); string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value .GetLength()); NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength ()); NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber < totalRecords); string origRecord = recordList[recordNumber]; NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString ); recordNumber++; } reader.Close(); } NUnit.Framework.Assert.AreEqual("Total original records should be total read records:" , recordList.Count, recordNumber); } }
public virtual void TestSortedLongWritable() { Configuration conf = new Configuration(); Path path = new Path(Root, name); FileSystem fs = path.GetFileSystem(conf); FSDataOutputStream @out = fs.Create(path); try { TFile.Writer writer = new TFile.Writer(@out, BlockSize, "gz", jClassLongWritableComparator , conf); try { LongWritable key = new LongWritable(0); for (long i = 0; i < Nentry; ++i) { key.Set(Cube(i - Nentry / 2)); DataOutputStream dos = writer.PrepareAppendKey(-1); try { key.Write(dos); } finally { dos.Close(); } dos = writer.PrepareAppendValue(-1); try { dos.Write(Runtime.GetBytesForString(BuildValue(i))); } finally { dos.Close(); } } } finally { writer.Close(); } } finally { @out.Close(); } FSDataInputStream @in = fs.Open(path); try { TFile.Reader reader = new TFile.Reader(@in, fs.GetFileStatus(path).GetLen(), conf ); try { TFile.Reader.Scanner scanner = reader.CreateScanner(); long i = 0; BytesWritable value = new BytesWritable(); for (; !scanner.AtEnd(); scanner.Advance()) { scanner.Entry().GetValue(value); Assert.Equal(BuildValue(i), Runtime.GetStringForBytes( value.GetBytes(), 0, value.GetLength())); ++i; } } finally { reader.Close(); } } finally { @in.Close(); } }