/// <exception cref="System.IO.IOException"/> public virtual void TestAddInputPathWithFormat() { Job conf = Job.GetInstance(); MultipleInputs.AddInputPath(conf, new Path("/foo"), typeof(TextInputFormat)); MultipleInputs.AddInputPath(conf, new Path("/bar"), typeof(KeyValueTextInputFormat )); IDictionary <Path, InputFormat> inputs = MultipleInputs.GetInputFormatMap(conf); NUnit.Framework.Assert.AreEqual(typeof(TextInputFormat), inputs[new Path("/foo")] .GetType()); NUnit.Framework.Assert.AreEqual(typeof(KeyValueTextInputFormat), inputs[new Path( "/bar")].GetType()); }
/// <exception cref="System.Exception"/> public virtual void TestSplitting() { Job job = Job.GetInstance(); MiniDFSCluster dfs = null; try { dfs = new MiniDFSCluster.Builder(job.GetConfiguration()).NumDataNodes(4).Racks(new string[] { "/rack0", "/rack0", "/rack1", "/rack1" }).Hosts(new string[] { "host0" , "host1", "host2", "host3" }).Build(); FileSystem fs = dfs.GetFileSystem(); Path path = GetPath("/foo/bar", fs); Path path2 = GetPath("/foo/baz", fs); Path path3 = GetPath("/bar/bar", fs); Path path4 = GetPath("/bar/baz", fs); int numSplits = 100; FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(path).GetLen() / numSplits ); MultipleInputs.AddInputPath(job, path, typeof(TextInputFormat), typeof(TestDelegatingInputFormat.MapClass )); MultipleInputs.AddInputPath(job, path2, typeof(TextInputFormat), typeof(TestDelegatingInputFormat.MapClass2 )); MultipleInputs.AddInputPath(job, path3, typeof(KeyValueTextInputFormat), typeof(TestDelegatingInputFormat.MapClass )); MultipleInputs.AddInputPath(job, path4, typeof(TextInputFormat), typeof(TestDelegatingInputFormat.MapClass2 )); DelegatingInputFormat inFormat = new DelegatingInputFormat(); int[] bins = new int[3]; foreach (InputSplit split in (IList <InputSplit>)inFormat.GetSplits(job)) { NUnit.Framework.Assert.IsTrue(split is TaggedInputSplit); TaggedInputSplit tis = (TaggedInputSplit)split; int index = -1; if (tis.GetInputFormatClass().Equals(typeof(KeyValueTextInputFormat))) { // path3 index = 0; } else { if (tis.GetMapperClass().Equals(typeof(TestDelegatingInputFormat.MapClass))) { // path index = 1; } else { // path2 and path4 index = 2; } } bins[index]++; } NUnit.Framework.Assert.AreEqual("count is not equal to num splits", numSplits, bins [0]); NUnit.Framework.Assert.AreEqual("count is not equal to num splits", numSplits, bins [1]); NUnit.Framework.Assert.AreEqual("count is not equal to 2 * num splits", numSplits * 2, bins[2]); } finally { if (dfs != null) { dfs.Shutdown(); } } }
public virtual void TestDoMultipleInputs() { Path in1Dir = GetDir(In1Dir); Path in2Dir = GetDir(In2Dir); Path outDir = GetDir(OutDir); Configuration conf = CreateJobConf(); FileSystem fs = FileSystem.Get(conf); fs.Delete(outDir, true); DataOutputStream file1 = fs.Create(new Path(in1Dir, "part-0")); file1.WriteBytes("a\nb\nc\nd\ne"); file1.Close(); // write tab delimited to second file because we're doing // KeyValueInputFormat DataOutputStream file2 = fs.Create(new Path(in2Dir, "part-0")); file2.WriteBytes("a\tblah\nb\tblah\nc\tblah\nd\tblah\ne\tblah"); file2.Close(); Job job = Job.GetInstance(conf); job.SetJobName("mi"); MultipleInputs.AddInputPath(job, in1Dir, typeof(TextInputFormat), typeof(TestMultipleInputs.MapClass )); MultipleInputs.AddInputPath(job, in2Dir, typeof(KeyValueTextInputFormat), typeof( TestMultipleInputs.KeyValueMapClass)); job.SetMapOutputKeyClass(typeof(Text)); job.SetMapOutputValueClass(typeof(Text)); job.SetOutputKeyClass(typeof(NullWritable)); job.SetOutputValueClass(typeof(Text)); job.SetReducerClass(typeof(TestMultipleInputs.ReducerClass)); FileOutputFormat.SetOutputPath(job, outDir); bool success = false; try { success = job.WaitForCompletion(true); } catch (Exception ie) { throw new RuntimeException(ie); } catch (TypeLoadException instante) { throw new RuntimeException(instante); } if (!success) { throw new RuntimeException("Job failed!"); } // copy bytes a bunch of times for the ease of readLine() - whatever BufferedReader output = new BufferedReader(new InputStreamReader(fs.Open(new Path (outDir, "part-r-00000")))); // reducer should have counted one key from each file NUnit.Framework.Assert.IsTrue(output.ReadLine().Equals("a 2")); NUnit.Framework.Assert.IsTrue(output.ReadLine().Equals("b 2")); NUnit.Framework.Assert.IsTrue(output.ReadLine().Equals("c 2")); NUnit.Framework.Assert.IsTrue(output.ReadLine().Equals("d 2")); NUnit.Framework.Assert.IsTrue(output.ReadLine().Equals("e 2")); }