/// <summary> /// Logically splits the set of input files for the job, splits N lines /// of the input as one split. /// </summary> /// <seealso cref="Org.Apache.Hadoop.Mapred.FileInputFormat{K, V}.GetSplits(Org.Apache.Hadoop.Mapred.JobConf, int) /// "/> /// <exception cref="System.IO.IOException"/> public override InputSplit[] GetSplits(JobConf job, int numSplits) { AList <FileSplit> splits = new AList <FileSplit>(); foreach (FileStatus status in ListStatus(job)) { foreach (FileSplit split in NLineInputFormat.GetSplitsForFile(status, job, N)) { splits.AddItem(new FileSplit(split)); } } return(Sharpen.Collections.ToArray(splits, new FileSplit[splits.Count])); }
// A reporter that does nothing /// <exception cref="System.IO.IOException"/> internal virtual void CheckFormat(JobConf job, int expectedN) { NLineInputFormat format = new NLineInputFormat(); format.Configure(job); int ignoredNumSplits = 1; InputSplit[] splits = format.GetSplits(job, ignoredNumSplits); // check all splits except last one int count = 0; for (int j = 0; j < splits.Length - 1; j++) { NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[j].GetLocations ().Length); RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], job, voidReporter); Type readerClass = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader ), readerClass); LongWritable key = reader.CreateKey(); Type keyClass = key.GetType(); NUnit.Framework.Assert.AreEqual("Key class is LongWritable.", typeof(LongWritable ), keyClass); Text value = reader.CreateValue(); Type valueClass = value.GetType(); NUnit.Framework.Assert.AreEqual("Value class is Text.", typeof(Text), valueClass); try { count = 0; while (reader.Next(key, value)) { count++; } } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("number of lines in split is " + expectedN, expectedN , count); } }