/// <exception cref="System.IO.IOException"/> public virtual void Close() { if (curReader != null) { curReader.Close(); curReader = null; } }
/// <exception cref="System.IO.IOException"/> public override void Close() { if (curReader != null) { curReader.Close(); curReader = null; } }
/// <summary>From each split sampled, take the first numSamples / numSplits records.</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual K[] GetSample(InputFormat <K, V> inf, Job job) { // ArrayList::toArray doesn't preserve type IList <InputSplit> splits = inf.GetSplits(job); AList <K> samples = new AList <K>(numSamples); int splitsToSample = Math.Min(maxSplitsSampled, splits.Count); int samplesPerSplit = numSamples / splitsToSample; long records = 0; for (int i = 0; i < splitsToSample; ++i) { TaskAttemptContext samplingContext = new TaskAttemptContextImpl(job.GetConfiguration (), new TaskAttemptID()); RecordReader <K, V> reader = inf.CreateRecordReader(splits[i], samplingContext); reader.Initialize(splits[i], samplingContext); while (reader.NextKeyValue()) { samples.AddItem(ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey (), null)); ++records; if ((i + 1) * samplesPerSplit <= records) { break; } } reader.Close(); } return((K[])Sharpen.Collections.ToArray(samples)); }
/// <summary>From each split sampled, take the first numSamples / numSplits records.</summary> /// <exception cref="System.IO.IOException"/> public virtual K[] GetSample(InputFormat <K, V> inf, JobConf job) { // ArrayList::toArray doesn't preserve type InputSplit[] splits = inf.GetSplits(job, job.GetNumMapTasks()); AList <K> samples = new AList <K>(numSamples); int splitsToSample = Math.Min(maxSplitsSampled, splits.Length); int splitStep = splits.Length / splitsToSample; int samplesPerSplit = numSamples / splitsToSample; long records = 0; for (int i = 0; i < splitsToSample; ++i) { RecordReader <K, V> reader = inf.GetRecordReader(splits[i * splitStep], job, Reporter .Null); K key = reader.CreateKey(); V value = reader.CreateValue(); while (reader.Next(key, value)) { samples.AddItem(key); key = reader.CreateKey(); ++records; if ((i + 1) * samplesPerSplit <= records) { break; } } reader.Close(); } return((K[])Sharpen.Collections.ToArray(samples)); }
/// <exception cref="System.Exception"/> private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit split, Job job) { IList <string> result = new AList <string>(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, BytesWritable> reader = format.CreateRecordReader(split , context); MapContext <LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl <LongWritable, BytesWritable, LongWritable, BytesWritable>(job.GetConfiguration (), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter (), split); LongWritable key; BytesWritable value; try { reader.Initialize(split, mcontext); while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); value = reader.GetCurrentValue(); result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength ())); } } finally { reader.Close(); } return(result); }
/// <exception cref="System.IO.IOException"/> private static IList <Text> ReadSplit(KeyValueTextInputFormat format, InputSplit split , JobConf job) { IList <Text> result = new AList <Text>(); RecordReader <Text, Text> reader = null; try { reader = format.GetRecordReader(split, job, voidReporter); Text key = reader.CreateKey(); Text value = reader.CreateValue(); while (reader.Next(key, value)) { result.AddItem(value); value = (Text)reader.CreateValue(); } } finally { if (reader != null) { reader.Close(); } } return(result); }
/// <exception cref="System.IO.IOException"/> private int CountRecords(int numSplits) { InputFormat <Text, BytesWritable> format = new SequenceFileInputFilter <Text, BytesWritable >(); Text key = new Text(); BytesWritable value = new BytesWritable(); if (numSplits == 0) { numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1; } InputSplit[] splits = format.GetSplits(job, numSplits); // check each split int count = 0; Log.Info("Generated " + splits.Length + " splits."); for (int j = 0; j < splits.Length; j++) { RecordReader <Text, BytesWritable> reader = format.GetRecordReader(splits[j], job, reporter); try { while (reader.Next(key, value)) { Log.Info("Accept record " + key.ToString()); count++; } } finally { reader.Close(); } } return(count); }
/// <summary>test DBInputFormat class.</summary> /// <remarks>test DBInputFormat class. Class should split result for chunks</remarks> /// <exception cref="System.Exception"/> public virtual void TestDBInputFormat() { JobConf configuration = new JobConf(); SetupDriver(configuration); DBInputFormat <DBInputFormat.NullDBWritable> format = new DBInputFormat <DBInputFormat.NullDBWritable >(); format.SetConf(configuration); format.SetConf(configuration); DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10); Reporter reporter = Org.Mockito.Mockito.Mock <Reporter>(); RecordReader <LongWritable, DBInputFormat.NullDBWritable> reader = format.GetRecordReader (splitter, configuration, reporter); configuration.SetInt(MRJobConfig.NumMaps, 3); InputSplit[] lSplits = format.GetSplits(configuration, 3); NUnit.Framework.Assert.AreEqual(5, lSplits[0].GetLength()); NUnit.Framework.Assert.AreEqual(3, lSplits.Length); // test reader .Some simple tests NUnit.Framework.Assert.AreEqual(typeof(LongWritable), reader.CreateKey().GetType( )); NUnit.Framework.Assert.AreEqual(0, reader.GetPos()); NUnit.Framework.Assert.AreEqual(0, reader.GetProgress(), 0.001); reader.Close(); }
/// <exception cref="System.IO.IOException"/> internal static long ReadBench(JobConf conf) { // InputFormat instantiation InputFormat inf = conf.GetInputFormat(); string fn = conf.Get("test.filebench.name", string.Empty); Path pin = new Path(FileInputFormat.GetInputPaths(conf)[0], fn); FileStatus @in = pin.GetFileSystem(conf).GetFileStatus(pin); RecordReader rr = inf.GetRecordReader(new FileSplit(pin, 0, @in.GetLen(), (string [])null), conf, Reporter.Null); try { object key = rr.CreateKey(); object val = rr.CreateValue(); DateTime start = new DateTime(); while (rr.Next(key, val)) { } DateTime end = new DateTime(); return(end.GetTime() - start.GetTime()); } finally { rr.Close(); } }
/// <summary> /// Randomize the split order, then take the specified number of keys from /// each split sampled, where each key is selected with the specified /// probability and possibly replaced by a subsequently selected key when /// the quota of keys from that split is satisfied. /// </summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual K[] GetSample(InputFormat <K, V> inf, Job job) { // ArrayList::toArray doesn't preserve type IList <InputSplit> splits = inf.GetSplits(job); AList <K> samples = new AList <K>(numSamples); int splitsToSample = Math.Min(maxSplitsSampled, splits.Count); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); Log.Debug("seed: " + seed); // shuffle splits for (int i = 0; i < splits.Count; ++i) { InputSplit tmp = splits[i]; int j = r.Next(splits.Count); splits.Set(i, splits[j]); splits.Set(j, tmp); } // our target rate is in terms of the maximum number of sample splits, // but we accept the possibility of sampling additional splits to hit // the target sample keyset for (int i_1 = 0; i_1 < splitsToSample || (i_1 < splits.Count && samples.Count < numSamples); ++i_1) { TaskAttemptContext samplingContext = new TaskAttemptContextImpl(job.GetConfiguration (), new TaskAttemptID()); RecordReader <K, V> reader = inf.CreateRecordReader(splits[i_1], samplingContext); reader.Initialize(splits[i_1], samplingContext); while (reader.NextKeyValue()) { if (r.NextDouble() <= freq) { if (samples.Count < numSamples) { samples.AddItem(ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey (), null)); } else { // When exceeding the maximum number of samples, replace a // random element with this one, then adjust the frequency // to reflect the possibility of existing elements being // pushed out int ind = r.Next(numSamples); if (ind != numSamples) { samples.Set(ind, ReflectionUtils.Copy(job.GetConfiguration(), reader.GetCurrentKey (), null)); } freq *= (numSamples - 1) / (double)numSamples; } } } reader.Close(); } return((K[])Sharpen.Collections.ToArray(samples)); }
/// <summary> /// Randomize the split order, then take the specified number of keys from /// each split sampled, where each key is selected with the specified /// probability and possibly replaced by a subsequently selected key when /// the quota of keys from that split is satisfied. /// </summary> /// <exception cref="System.IO.IOException"/> public virtual K[] GetSample(InputFormat <K, V> inf, JobConf job) { // ArrayList::toArray doesn't preserve type InputSplit[] splits = inf.GetSplits(job, job.GetNumMapTasks()); AList <K> samples = new AList <K>(numSamples); int splitsToSample = Math.Min(maxSplitsSampled, splits.Length); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); Log.Debug("seed: " + seed); // shuffle splits for (int i = 0; i < splits.Length; ++i) { InputSplit tmp = splits[i]; int j = r.Next(splits.Length); splits[i] = splits[j]; splits[j] = tmp; } // our target rate is in terms of the maximum number of sample splits, // but we accept the possibility of sampling additional splits to hit // the target sample keyset for (int i_1 = 0; i_1 < splitsToSample || (i_1 < splits.Length && samples.Count < numSamples); ++i_1) { RecordReader <K, V> reader = inf.GetRecordReader(splits[i_1], job, Reporter.Null); K key = reader.CreateKey(); V value = reader.CreateValue(); while (reader.Next(key, value)) { if (r.NextDouble() <= freq) { if (samples.Count < numSamples) { samples.AddItem(key); } else { // When exceeding the maximum number of samples, replace a // random element with this one, then adjust the frequency // to reflect the possibility of existing elements being // pushed out int ind = r.Next(numSamples); if (ind != numSamples) { samples.Set(ind, key); } freq *= (numSamples - 1) / (double)numSamples; } key = reader.CreateKey(); } } reader.Close(); } return((K[])Sharpen.Collections.ToArray(samples)); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(conf); Reporter reporter = Reporter.Null; Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create a file with various lengths CreateFiles(length, numFiles, random); // create a combine split for the files InputFormat <IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat <IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / (SequenceFile.SyncInterval / 20)) + 1; Log.Info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Info("splitting: got = " + splits.Length); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Length); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check each split BitSet bits = new BitSet(length); RecordReader <IntWritable, BytesWritable> reader = format.GetRecordReader(split, job , reporter); try { while (reader.Next(key, value)) { NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(key.Get()) ); bits.Set(key.Get()); } } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
public static DataRecord.ScopeData Read(RecordReader ins0) { if (ins0.Open("ScopeData")) { DataRecord.ScopeData scope = new DataRecord.ScopeData(); scope.prefix = ins0.String("prefix"); scope.requiresDigitPrefix = ins0.Bool("requiresDigitPrefix"); scope.suffix = ins0.String("suffix"); if (ins0.Close()) { return(scope); } } return(null); }
/// <exception cref="System.IO.IOException"/> private static IList <Text> ReadSplit(TextInputFormat format, InputSplit split, JobConf jobConf) { IList <Text> result = new AList <Text>(); RecordReader <LongWritable, Text> reader = format.GetRecordReader(split, jobConf, voidReporter); LongWritable key = reader.CreateKey(); Text value = reader.CreateValue(); while (reader.Next(key, value)) { result.AddItem(value); value = reader.CreateValue(); } reader.Close(); return(result); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal virtual void CheckFormat(Job job, int expectedN, int lastN) { NLineInputFormat format = new NLineInputFormat(); IList <InputSplit> splits = format.GetSplits(job); int count = 0; for (int i = 0; i < splits.Count; i++) { NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[i].GetLocations ().Length); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(splits[i], context ); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader ), clazz); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[i]); reader.Initialize(splits[i], mcontext); try { count = 0; while (reader.NextKeyValue()) { count++; } } finally { reader.Close(); } if (i == splits.Count - 1) { NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", lastN , count); } else { NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", expectedN , count); } } }
// A reporter that does nothing /// <exception cref="System.IO.IOException"/> internal virtual void CheckFormat(JobConf job, int expectedN) { NLineInputFormat format = new NLineInputFormat(); format.Configure(job); int ignoredNumSplits = 1; InputSplit[] splits = format.GetSplits(job, ignoredNumSplits); // check all splits except last one int count = 0; for (int j = 0; j < splits.Length - 1; j++) { NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[j].GetLocations ().Length); RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], job, voidReporter); Type readerClass = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader ), readerClass); LongWritable key = reader.CreateKey(); Type keyClass = key.GetType(); NUnit.Framework.Assert.AreEqual("Key class is LongWritable.", typeof(LongWritable ), keyClass); Text value = reader.CreateValue(); Type valueClass = value.GetType(); NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass); try { count = 0; while (reader.Next(key, value)) { count++; } } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("number of lines in split is " + expectedN, expectedN , count); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private static IList <Text> ReadSplit(KeyValueTextInputFormat format, InputSplit split , Job job) { IList <Text> result = new AList <Text>(); Configuration conf = job.GetConfiguration(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(conf ); RecordReader <Text, Text> reader = format.CreateRecordReader(split, MapReduceTestUtil .CreateDummyMapTaskAttemptContext(conf)); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(conf, context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil. CreateDummyReporter(), split); reader.Initialize(split, mcontext); while (reader.NextKeyValue()) { result.AddItem(new Text(reader.GetCurrentValue())); } reader.Close(); return(result); }
public void Extract(out TableData data) { data = new TableData(); string[] record; // Skip records. for (int i = 0; i < idOffset - 1; i++) { reader.ReadNextRecord(out record); } int recordCounter = 0; while (reader.ReadNextRecord(out record)) { // Skip header and record column count. if (recordCounter == 0) { data.header = record; recordCounter++; int columnCount = record.Length; writer.CreateFields(columnCount + ADDITIONAL_COLUMNS_COUNT); data.columnCount = columnCount; continue; } string[] hashedRecord = BuildHashedRecord(record); writer.Write(hashedRecord.ToArray()); recordCounter++; if (recordCounter % 1000 == 0) { Console.WriteLine("Extracted {0} rows.", recordCounter); } } reader.Close(); data.recordCount = recordCounter; }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private int CountRecords(int numSplits) { InputFormat <Text, BytesWritable> format = new SequenceFileInputFilter <Text, BytesWritable >(); if (numSplits == 0) { numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1; } FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(inFile).GetLen() / numSplits ); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); // check each split int count = 0; foreach (InputSplit split in format.GetSplits(job)) { RecordReader <Text, BytesWritable> reader = format.CreateRecordReader(split, context ); MapContext <Text, BytesWritable, Text, BytesWritable> mcontext = new MapContextImpl <Text, BytesWritable, Text, BytesWritable>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { while (reader.NextKeyValue()) { Log.Info("Accept record " + reader.GetCurrentKey().ToString()); count++; } } finally { reader.Close(); } } return(count); }
/// <exception cref="System.IO.IOException"/> private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit split, JobConf job) { IList <string> result = new AList <string>(); RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split, job, voidReporter); LongWritable key = reader.CreateKey(); BytesWritable value = reader.CreateValue(); try { while (reader.Next(key, value)) { result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength ())); } } finally { reader.Close(); } return(result); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { Job job = Job.GetInstance(new Configuration(defaultConf)); Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create files with various lengths CreateFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / 20) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check the split BitSet bits = new BitSet(length); Log.Debug("split= " + split); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context ); NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof( CombineFileRecordReader), reader.GetType()); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int count = 0; while (reader.NextKeyValue()) { LongWritable key = reader.GetCurrentKey(); NUnit.Framework.Assert.IsNotNull("Key should not be null.", key); Text value = reader.GetCurrentValue(); int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("split=" + split + " count=" + count); } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
/// <exception cref="System.IO.IOException"/> public virtual void Close() { delegate_.Close(); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(); Path file = new Path(workDir, "test.txt"); // A reporter that does nothing Reporter reporter = Reporter.Null; int seed = new Random().Next(); Log.Info("seed = " + seed); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) + 1) { Log.Debug("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(localFs.Create(file)); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i * 2)); writer.Write("\t"); writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); format.Configure(job); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 20) + 1; Log.Debug("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Debug("splitting: got = " + splits.Length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Length; j++) { Log.Debug("split[" + j + "]= " + splits[j]); RecordReader <Text, Text> reader = format.GetRecordReader(splits[j], job, reporter ); Type readerClass = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is KeyValueLineRecordReader.", typeof( KeyValueLineRecordReader), readerClass); Text key = reader.CreateKey(); Type keyClass = key.GetType(); Text value = reader.CreateValue(); Type valueClass = value.GetType(); NUnit.Framework.Assert.AreEqual("Key class is Text.", typeof(Text), keyClass); NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass); try { int count = 0; while (reader.Next(key, value)) { int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); if (bits.Get(v)) { Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos ()); } NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
/// <summary>Forward close request to proxied RR.</summary> /// <exception cref="System.IO.IOException"/> public override void Close() { rr.Close(); }
/// <summary>Forward close request to proxied RR.</summary> /// <exception cref="System.IO.IOException"/> public virtual void Close() { rr.Close(); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestBinary() { Configuration conf = new Configuration(); Job job = Job.GetInstance(conf); Path outdir = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); FileOutputFormat.SetOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable )); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable )); SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); OutputFormat <BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat (); OutputCommitter committer = outputFormat.GetOutputCommitter(context); committer.SetupJob(job); RecordWriter <BytesWritable, BytesWritable> writer = outputFormat.GetRecordWriter( context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); Log.Info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < Records; ++i) { iwritable = new IntWritable(r.Next()); iwritable.Write(outbuf); bkey.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); dwritable = new DoubleWritable(r.NextDouble()); dwritable.Write(outbuf); bval.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); writer.Write(bkey, bval); } } finally { writer.Close(context); } committer.CommitTask(context); committer.CommitJob(job); InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable , DoubleWritable>(); int count = 0; r.SetSeed(seed); SequenceFileInputFormat.SetInputPaths(job, outdir); Log.Info("Reading data by SequenceFileInputFormat"); foreach (InputSplit split in iformat.GetSplits(job)) { RecordReader <IntWritable, DoubleWritable> reader = iformat.CreateRecordReader(split , context); MapContext <IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl <IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil .CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.NextKeyValue()) { sourceInt = r.Next(); sourceDouble = r.NextDouble(); iwritable = reader.GetCurrentKey(); dwritable = reader.GetCurrentValue(); NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":" + sourceInt + "*", sourceInt, iwritable.Get()); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":" + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
// A random task attempt id for testing. /// <exception cref="System.IO.IOException"/> public virtual void TestBinary() { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(new Path(new Path(Runtime.GetProperty("test.build.data", ".") ), FileOutputCommitter.TempDirName), "_" + attempt); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); fs.Delete(dir, true); if (!fs.Mkdirs(dir)) { Fail("Failed to create output directory"); } job.Set(JobContext.TaskAttemptId, attempt); FileOutputFormat.SetOutputPath(job, dir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, dir); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable )); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable )); SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); RecordWriter <BytesWritable, BytesWritable> writer = new SequenceFileAsBinaryOutputFormat ().GetRecordWriter(fs, job, file.ToString(), Reporter.Null); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); Log.Info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < Records; ++i) { iwritable = new IntWritable(r.Next()); iwritable.Write(outbuf); bkey.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); dwritable = new DoubleWritable(r.NextDouble()); dwritable.Write(outbuf); bval.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); writer.Write(bkey, bval); } } finally { writer.Close(Reporter.Null); } InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable , DoubleWritable>(); int count = 0; r.SetSeed(seed); DataInputBuffer buf = new DataInputBuffer(); int NumSplits = 3; SequenceFileInputFormat.AddInputPath(job, file); Log.Info("Reading data by SequenceFileInputFormat"); foreach (InputSplit split in iformat.GetSplits(job, NumSplits)) { RecordReader <IntWritable, DoubleWritable> reader = iformat.GetRecordReader(split, job, Reporter.Null); try { int sourceInt; double sourceDouble; while (reader.Next(iwritable, dwritable)) { sourceInt = r.Next(); sourceDouble = r.NextDouble(); NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":" + sourceInt + "*", sourceInt, iwritable.Get()); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":" + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
/// <exception cref="System.IO.IOException"/> public virtual void TestBinary() { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); Text tkey = new Text(); Text tval = new Text(); SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, file, typeof(Text), typeof(Text)); try { for (int i = 0; i < Records; ++i) { tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); writer.Append(tkey, tval); } } finally { writer.Close(); } InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat (); int count = 0; r.SetSeed(seed); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); Text cmpkey = new Text(); Text cmpval = new Text(); DataInputBuffer buf = new DataInputBuffer(); int NumSplits = 3; FileInputFormat.SetInputPaths(job, file); foreach (InputSplit split in bformat.GetSplits(job, NumSplits)) { RecordReader <BytesWritable, BytesWritable> reader = bformat.GetRecordReader(split , job, Reporter.Null); try { while (reader.Next(bkey, bval)) { tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); buf.Reset(bkey.GetBytes(), bkey.GetLength()); cmpkey.ReadFields(buf); buf.Reset(bval.GetBytes(), bval.GetLength()); cmpval.ReadFields(buf); NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":" + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString())); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":" + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString())); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
/// <exception cref="System.IO.IOException"/> private void RunRandomTests(CompressionCodec codec) { StringBuilder fileName = new StringBuilder("testFormat.txt"); if (codec != null) { fileName.Append(".gz"); } localFs.Delete(workDir, true); Path file = new Path(workDir, fileName.ToString()); int seed = new Random().Next(); Log.Info("Seed = " + seed); Random random = new Random(seed); int MaxTests = 20; LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < MaxTests; i++) { Log.Info("----------------------------------------------------------"); // Maximum total records of 999 int totalRecords = random.Next(999) + 1; // Test an empty file if (i == 8) { totalRecords = 0; } // Maximum bytes in a record of 100K int recordLength = random.Next(1024 * 100) + 1; // For the 11th test, force a record length of 1 if (i == 10) { recordLength = 1; } // The total bytes in the test file int fileSize = (totalRecords * recordLength); Log.Info("totalRecords=" + totalRecords + " recordLength=" + recordLength); // Create the job JobConf job = new JobConf(defaultConf); if (codec != null) { ReflectionUtils.SetConf(codec, job); } // Create the test file AList <string> recordList = CreateFile(file, codec, recordLength, totalRecords); NUnit.Framework.Assert.IsTrue(localFs.Exists(file)); //set the fixed length record length config property for the job FixedLengthInputFormat.SetRecordLength(job, recordLength); int numSplits = 1; // Arbitrarily set number of splits. if (i > 0) { if (i == (MaxTests - 1)) { // Test a split size that is less than record len numSplits = (int)(fileSize / Math.Floor(recordLength / 2)); } else { if (MaxTests % i == 0) { // Let us create a split size that is forced to be // smaller than the end file itself, (ensures 1+ splits) numSplits = fileSize / (fileSize - random.Next(fileSize)); } else { // Just pick a random split size with no upper bound numSplits = Math.Max(1, fileSize / random.Next(int.MaxValue)); } } Log.Info("Number of splits set to: " + numSplits); } // Setup the input path FileInputFormat.SetInputPaths(job, workDir); // Try splitting the file in a variety of sizes FixedLengthInputFormat format = new FixedLengthInputFormat(); format.Configure(job); InputSplit[] splits = format.GetSplits(job, numSplits); Log.Info("Actual number of splits = " + splits.Length); // Test combined split lengths = total file size long recordOffset = 0; int recordNumber = 0; foreach (InputSplit split in splits) { RecordReader <LongWritable, BytesWritable> reader = format.GetRecordReader(split, job, voidReporter); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("RecordReader class should be FixedLengthRecordReader:" , typeof(FixedLengthRecordReader), clazz); // Plow through the records in this split while (reader.Next(key, value)) { NUnit.Framework.Assert.AreEqual("Checking key", (long)(recordNumber * recordLength ), key.Get()); string valueString = Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value .GetLength()); NUnit.Framework.Assert.AreEqual("Checking record length:", recordLength, value.GetLength ()); NUnit.Framework.Assert.IsTrue("Checking for more records than expected:", recordNumber < totalRecords); string origRecord = recordList[recordNumber]; NUnit.Framework.Assert.AreEqual("Checking record content:", origRecord, valueString ); recordNumber++; } reader.Close(); } NUnit.Framework.Assert.AreEqual("Total original records should be total read records:" , recordList.Count, recordNumber); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestBinary() { Job job = Job.GetInstance(); FileSystem fs = FileSystem.GetLocal(job.GetConfiguration()); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); Text tkey = new Text(); Text tval = new Text(); SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.GetConfiguration(), file, typeof(Text), typeof(Text)); try { for (int i = 0; i < Records; ++i) { tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); writer.Append(tkey, tval); } } finally { writer.Close(); } TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat (); int count = 0; r.SetSeed(seed); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); Text cmpkey = new Text(); Text cmpval = new Text(); DataInputBuffer buf = new DataInputBuffer(); FileInputFormat.SetInputPaths(job, file); foreach (InputSplit split in bformat.GetSplits(job)) { RecordReader <BytesWritable, BytesWritable> reader = bformat.CreateRecordReader(split , context); MapContext <BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl <BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil .CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { while (reader.NextKeyValue()) { bkey = reader.GetCurrentKey(); bval = reader.GetCurrentValue(); tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); buf.Reset(bkey.GetBytes(), bkey.GetLength()); cmpkey.ReadFields(buf); buf.Reset(bval.GetBytes(), bval.GetLength()); cmpval.ReadFields(buf); NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":" + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString())); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":" + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString())); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }