/// <summary> /// Retrieves a map of /// <see cref="Org.Apache.Hadoop.FS.Path"/> /// s to the /// <see cref="Org.Apache.Hadoop.Mapred.Mapper{K1, V1, K2, V2}"/> /// class that /// should be used for them. /// </summary> /// <param name="conf">The confuration of the job</param> /// <seealso cref="AddInputPath(Org.Apache.Hadoop.Mapred.JobConf, Org.Apache.Hadoop.FS.Path, System.Type{T}, System.Type{T}) /// "/> /// <returns>A map of paths to mappers for the job</returns> internal static IDictionary <Path, Type> GetMapperTypeMap(JobConf conf) { if (conf.Get("mapreduce.input.multipleinputs.dir.mappers") == null) { return(Sharpen.Collections.EmptyMap()); } IDictionary <Path, Type> m = new Dictionary <Path, Type>(); string[] pathMappings = conf.Get("mapreduce.input.multipleinputs.dir.mappers").Split (","); foreach (string pathMapping in pathMappings) { string[] split = pathMapping.Split(";"); Type mapClass; try { mapClass = (Type)conf.GetClassByName(split[1]); } catch (TypeLoadException e) { throw new RuntimeException(e); } m[new Path(split[0])] = mapClass; } return(m); }
private Type LoadLibJar(JobConf jobConf) { try { return(jobConf.GetClassByName("testjar.ClassWordCount")); } catch (TypeLoadException) { return(null); } }
/// <summary> /// Get the /// <see cref="Org.Apache.Hadoop.IO.Compress.CompressionCodec"/> /// for compressing the job outputs. /// </summary> /// <param name="conf"> /// the /// <see cref="JobConf"/> /// to look in /// </param> /// <param name="defaultValue"> /// the /// <see cref="Org.Apache.Hadoop.IO.Compress.CompressionCodec"/> /// to return if not set /// </param> /// <returns> /// the /// <see cref="Org.Apache.Hadoop.IO.Compress.CompressionCodec"/> /// to be used to compress the /// job outputs /// </returns> /// <exception cref="System.ArgumentException">if the class was specified, but not found /// </exception> public static Type GetOutputCompressorClass(JobConf conf, Type defaultValue) { Type codecClass = defaultValue; string name = conf.Get(FileOutputFormat.CompressCodec); if (name != null) { try { codecClass = conf.GetClassByName(name).AsSubclass <CompressionCodec>(); } catch (TypeLoadException e) { throw new ArgumentException("Compression codec " + name + " was not found.", e); } } return(codecClass); }
/// <summary> /// Let the first actual define the InputFormat and the second define /// the <tt>mapred.input.dir</tt> property. /// </summary> /// <exception cref="System.IO.IOException"/> internal override void Parse(IList <Parser.Token> ll, JobConf job) { StringBuilder sb = new StringBuilder(); IEnumerator <Parser.Token> i = ll.GetEnumerator(); while (i.HasNext()) { Parser.Token t = i.Next(); if (Parser.TType.Comma.Equals(t.GetType())) { try { inf = (InputFormat)ReflectionUtils.NewInstance(job.GetClassByName(sb.ToString()), job); } catch (TypeLoadException e) { throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e); } catch (ArgumentException e) { throw (IOException)Sharpen.Extensions.InitCause(new IOException(), e); } break; } sb.Append(t.GetStr()); } if (!i.HasNext()) { throw new IOException("Parse error"); } Parser.Token t_1 = i.Next(); if (!Parser.TType.Quot.Equals(t_1.GetType())) { throw new IOException("Expected quoted string"); } indir = t_1.GetStr(); }
/// <summary> /// Retrieves a map of /// <see cref="Org.Apache.Hadoop.FS.Path"/> /// s to the /// <see cref="Org.Apache.Hadoop.Mapred.InputFormat{K, V}"/> /// class /// that should be used for them. /// </summary> /// <param name="conf">The confuration of the job</param> /// <seealso cref="AddInputPath(Org.Apache.Hadoop.Mapred.JobConf, Org.Apache.Hadoop.FS.Path, System.Type{T}) /// "/> /// <returns>A map of paths to inputformats for the job</returns> internal static IDictionary <Path, InputFormat> GetInputFormatMap(JobConf conf) { IDictionary <Path, InputFormat> m = new Dictionary <Path, InputFormat>(); string[] pathMappings = conf.Get("mapreduce.input.multipleinputs.dir.formats").Split (","); foreach (string pathMapping in pathMappings) { string[] split = pathMapping.Split(";"); InputFormat inputFormat; try { inputFormat = (InputFormat)ReflectionUtils.NewInstance(conf.GetClassByName(split[ 1]), conf); } catch (TypeLoadException e) { throw new RuntimeException(e); } m[new Path(split[0])] = inputFormat; } return(m); }
/// <exception cref="System.IO.IOException"/> public virtual void TestSplitableCodecs() { JobConf conf = new JobConf(defaultConf); int seed = new Random().Next(); // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec" ), conf); } catch (TypeLoadException) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test" + codec.GetDefaultExtension()); // A reporter that does nothing Reporter reporter = Reporter.Null; Log.Info("seed = " + seed); Random random = new Random(seed); FileSystem localFs = FileSystem.GetLocal(conf); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(conf, workDir); int MaxLength = 500000; // for a variety of lengths for (int length = MaxLength / 2; length < MaxLength; length += random.Next(MaxLength / 4) + 1) { Log.Info("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create (file))); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.Configure(conf); LongWritable key = new LongWritable(); Text value = new Text(); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 2000) + 1; Log.Info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(conf, numSplits); Log.Info("splitting: got = " + splits.Length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Length; j++) { Log.Debug("split[" + j + "]= " + splits[j]); RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], conf, reporter); try { int counter = 0; while (reader.Next(key, value)) { int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); if (bits.Get(v)) { Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos ()); } NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); counter++; } if (counter > 0) { Log.Info("splits[" + j + "]=" + splits[j] + " count=" + counter); } else { Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + counter); } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
/// <exception cref="System.TypeLoadException"/> private static Type GetClass <InterfaceType>(CommandLine cl, string key, JobConf conf ) { System.Type cls = typeof(InterfaceType); return(conf.GetClassByName(cl.GetOptionValue(key)).AsSubclass(cls)); }