/// <exception cref="System.IO.IOException"/> public virtual void TestAddInputPathWithFormat() { Job conf = Job.GetInstance(); MultipleInputs.AddInputPath(conf, new Path("/foo"), typeof(TextInputFormat)); MultipleInputs.AddInputPath(conf, new Path("/bar"), typeof(KeyValueTextInputFormat )); IDictionary <Path, InputFormat> inputs = MultipleInputs.GetInputFormatMap(conf); NUnit.Framework.Assert.AreEqual(typeof(TextInputFormat), inputs[new Path("/foo")] .GetType()); NUnit.Framework.Assert.AreEqual(typeof(KeyValueTextInputFormat), inputs[new Path( "/bar")].GetType()); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public override IList <InputSplit> GetSplits(JobContext job) { Configuration conf = job.GetConfiguration(); Job jobCopy = Job.GetInstance(conf); IList <InputSplit> splits = new AList <InputSplit>(); IDictionary <Path, InputFormat> formatMap = MultipleInputs.GetInputFormatMap(job); IDictionary <Path, Type> mapperMap = MultipleInputs.GetMapperTypeMap(job); IDictionary <Type, IList <Path> > formatPaths = new Dictionary <Type, IList <Path> >(); // First, build a map of InputFormats to Paths foreach (KeyValuePair <Path, InputFormat> entry in formatMap) { if (!formatPaths.Contains(entry.Value.GetType())) { formatPaths[entry.Value.GetType()] = new List <Path>(); } formatPaths[entry.Value.GetType()].AddItem(entry.Key); } foreach (KeyValuePair <Type, IList <Path> > formatEntry in formatPaths) { Type formatClass = formatEntry.Key; InputFormat format = (InputFormat)ReflectionUtils.NewInstance(formatClass, conf); IList <Path> paths = formatEntry.Value; IDictionary <Type, IList <Path> > mapperPaths = new Dictionary <Type, IList <Path> >(); // Now, for each set of paths that have a common InputFormat, build // a map of Mappers to the paths they're used for foreach (Path path in paths) { Type mapperClass = mapperMap[path]; if (!mapperPaths.Contains(mapperClass)) { mapperPaths[mapperClass] = new List <Path>(); } mapperPaths[mapperClass].AddItem(path); } // Now each set of paths that has a common InputFormat and Mapper can // be added to the same job, and split together. foreach (KeyValuePair <Type, IList <Path> > mapEntry in mapperPaths) { paths = mapEntry.Value; Type mapperClass = mapEntry.Key; if (mapperClass == null) { try { mapperClass = job.GetMapperClass(); } catch (TypeLoadException e) { throw new IOException("Mapper class is not found", e); } } FileInputFormat.SetInputPaths(jobCopy, Sharpen.Collections.ToArray(paths, new Path [paths.Count])); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a TaggedInputSplit. IList <InputSplit> pathSplits = format.GetSplits(jobCopy); foreach (InputSplit pathSplit in pathSplits) { splits.AddItem(new TaggedInputSplit(pathSplit, conf, format.GetType(), mapperClass )); } } } return(splits); }