public virtual void Configure(JobConf job) { keyclass = (Type)job.GetClass <WritableComparable>("test.fakeif.keyclass", typeof( NullWritable)); valclass = (Type)job.GetClass <WritableComparable>("test.fakeif.valclass", typeof( NullWritable)); }
public RR_ClassLoaderChecker(JobConf job) { NUnit.Framework.Assert.IsTrue("The class loader has not been inherited from " + typeof( CompositeRecordReader).Name, job.GetClassLoader() is TestWrappedRecordReaderClassloader.Fake_ClassLoader ); keyclass = (Type)job.GetClass <WritableComparable>("test.fakeif.keyclass", typeof( NullWritable)); valclass = (Type)job.GetClass <WritableComparable>("test.fakeif.valclass", typeof( NullWritable)); }
/// <summary>Configures all the chain elements for the task.</summary> /// <param name="jobConf">chain job's JobConf.</param> public virtual void Configure(JobConf jobConf) { string prefix = GetPrefix(isMap); chainJobConf = jobConf; SerializationFactory serializationFactory = new SerializationFactory(chainJobConf ); int index = jobConf.GetInt(prefix + ChainMapperSize, 0); for (int i = 0; i < index; i++) { Type klass = jobConf.GetClass <Mapper>(prefix + ChainMapperClass + i, null); JobConf mConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainMapperConfig + i)); Mapper mapper = ReflectionUtils.NewInstance(klass, mConf); mappers.AddItem(mapper); if (mConf.GetBoolean(MapperByValue, true)) { mappersKeySerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass (MapperOutputKeyClass, null))); mappersValueSerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass (MapperOutputValueClass, null))); } else { mappersKeySerialization.AddItem(null); mappersValueSerialization.AddItem(null); } } Type klass_1 = jobConf.GetClass <Reducer>(prefix + ChainReducerClass, null); if (klass_1 != null) { JobConf rConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainReducerConfig )); reducer = ReflectionUtils.NewInstance(klass_1, rConf); if (rConf.GetBoolean(ReducerByValue, true)) { reducerKeySerialization = serializationFactory.GetSerialization(rConf.GetClass(ReducerOutputKeyClass , null)); reducerValueSerialization = serializationFactory.GetSerialization(rConf.GetClass( ReducerOutputValueClass, null)); } else { reducerKeySerialization = null; reducerValueSerialization = null; } } }
/// <summary> /// Given an expression and an optional comparator, build a tree of /// InputFormats using the comparator to sort keys. /// </summary> /// <exception cref="System.IO.IOException"/> internal static Parser.Node Parse(string expr, JobConf job) { if (null == expr) { throw new IOException("Expression is null"); } Type cmpcl = job.GetClass <WritableComparator>("mapred.join.keycomparator", null); Parser.Lexer lex = new Parser.Lexer(expr); Stack <Parser.Token> st = new Stack <Parser.Token>(); Parser.Token tok; while ((tok = lex.Next()) != null) { if (Parser.TType.Rparen.Equals(tok.GetType())) { st.Push(Reduce(st, job)); } else { st.Push(tok); } } if (st.Count == 1 && Parser.TType.Cif.Equals(st.Peek().GetType())) { Parser.Node ret = st.Pop().GetNode(); if (cmpcl != null) { ret.SetKeyComparator(cmpcl); } return(ret); } throw new IOException("Missing ')'"); }
/// <summary>Get a PathFilter instance of the filter set for the input paths.</summary> /// <returns>the PathFilter instance set for the job, NULL if none has been set.</returns> public static PathFilter GetInputPathFilter(JobConf conf) { Type filterClass = conf.GetClass <PathFilter>(FileInputFormat.PathfilterClass, null ); return((filterClass != null) ? ReflectionUtils.NewInstance(filterClass, conf) : null); }
/// <exception cref="System.IO.IOException"/> private void GetBaseOutputFormat(JobConf job) { baseOut = ReflectionUtils.NewInstance(job.GetClass <OutputFormat>("mapreduce.output.lazyoutputformat.outputformat" , null), job); if (baseOut == null) { throw new IOException("Ouput format not set for LazyOutputFormat"); } }
/// <exception cref="System.IO.IOException"/> public virtual RecordReader GetRecordReader(InputSplit split, JobConf job, Reporter reporter) { InputFormat indirIF = (InputFormat)ReflectionUtils.NewInstance(job.GetClass(GenericMRLoadGenerator .IndirectInputFormat, typeof(SequenceFileInputFormat)), job); GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit @is = ((GenericMRLoadGenerator.IndirectInputFormat.IndirectSplit )split); return(indirIF.GetRecordReader(new FileSplit(@is.GetPath(), 0, @is.GetLength(), ( string[])null), job, reporter)); }
/// <summary>test configuration for db.</summary> /// <remarks>test configuration for db. should works DBConfiguration.* parameters.</remarks> public virtual void TestSetInput() { JobConf configuration = new JobConf(); string[] fieldNames = new string[] { "field1", "field2" }; DBInputFormat.SetInput(configuration, typeof(DBInputFormat.NullDBWritable), "table" , "conditions", "orderBy", fieldNames); NUnit.Framework.Assert.AreEqual("org.apache.hadoop.mapred.lib.db.DBInputFormat$NullDBWritable" , configuration.GetClass(DBConfiguration.InputClassProperty, null).FullName); NUnit.Framework.Assert.AreEqual("table", configuration.Get(DBConfiguration.InputTableNameProperty , null)); string[] fields = configuration.GetStrings(DBConfiguration.InputFieldNamesProperty ); NUnit.Framework.Assert.AreEqual("field1", fields[0]); NUnit.Framework.Assert.AreEqual("field2", fields[1]); NUnit.Framework.Assert.AreEqual("conditions", configuration.Get(DBConfiguration.InputConditionsProperty , null)); NUnit.Framework.Assert.AreEqual("orderBy", configuration.Get(DBConfiguration.InputOrderByProperty , null)); configuration = new JobConf(); DBInputFormat.SetInput(configuration, typeof(DBInputFormat.NullDBWritable), "query" , "countQuery"); NUnit.Framework.Assert.AreEqual("query", configuration.Get(DBConfiguration.InputQuery , null)); NUnit.Framework.Assert.AreEqual("countQuery", configuration.Get(DBConfiguration.InputCountQuery , null)); JobConf jConfiguration = new JobConf(); DBConfiguration.ConfigureDB(jConfiguration, "driverClass", "dbUrl", "user", "password" ); NUnit.Framework.Assert.AreEqual("driverClass", jConfiguration.Get(DBConfiguration .DriverClassProperty)); NUnit.Framework.Assert.AreEqual("dbUrl", jConfiguration.Get(DBConfiguration.UrlProperty )); NUnit.Framework.Assert.AreEqual("user", jConfiguration.Get(DBConfiguration.UsernameProperty )); NUnit.Framework.Assert.AreEqual("password", jConfiguration.Get(DBConfiguration.PasswordProperty )); jConfiguration = new JobConf(); DBConfiguration.ConfigureDB(jConfiguration, "driverClass", "dbUrl"); NUnit.Framework.Assert.AreEqual("driverClass", jConfiguration.Get(DBConfiguration .DriverClassProperty)); NUnit.Framework.Assert.AreEqual("dbUrl", jConfiguration.Get(DBConfiguration.UrlProperty )); NUnit.Framework.Assert.IsNull(jConfiguration.Get(DBConfiguration.UsernameProperty )); NUnit.Framework.Assert.IsNull(jConfiguration.Get(DBConfiguration.PasswordProperty )); }
/// <summary>Inform the parser of user-defined types.</summary> /// <exception cref="System.IO.IOException"/> private void AddUserIdentifiers(JobConf job) { Sharpen.Pattern x = Sharpen.Pattern.Compile("^mapred\\.join\\.define\\.(\\w+)$"); foreach (KeyValuePair <string, string> kv in job) { Matcher m = x.Matcher(kv.Key); if (m.Matches()) { try { Parser.CNode.AddIdentifier(m.Group(1), job.GetClass <ComposableRecordReader>(m.Group (0), null)); } catch (MissingMethodException e) { throw (IOException)Sharpen.Extensions.InitCause(new IOException("Invalid define for " + m.Group(1)), e); } } } }
public virtual void TestPluginAbility() { try { // create JobConf with mapreduce.job.shuffle.consumer.plugin=TestShuffleConsumerPlugin JobConf jobConf = new JobConf(); jobConf.SetClass(MRConfig.ShuffleConsumerPlugin, typeof(TestShufflePlugin.TestShuffleConsumerPlugin ), typeof(ShuffleConsumerPlugin)); ShuffleConsumerPlugin shuffleConsumerPlugin = null; Type clazz = jobConf.GetClass <ShuffleConsumerPlugin>(MRConfig.ShuffleConsumerPlugin , typeof(Shuffle)); NUnit.Framework.Assert.IsNotNull("Unable to get " + MRConfig.ShuffleConsumerPlugin , clazz); // load 3rd party plugin through core's factory method shuffleConsumerPlugin = ReflectionUtils.NewInstance(clazz, jobConf); NUnit.Framework.Assert.IsNotNull("Unable to load " + MRConfig.ShuffleConsumerPlugin , shuffleConsumerPlugin); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false); } }
/// <summary>Get the user's original partitioner.</summary> /// <param name="conf">the configuration to look in</param> /// <returns>the class that the user submitted</returns> internal static Type GetJavaPartitioner(JobConf conf) { return(conf.GetClass <Partitioner>(Org.Apache.Hadoop.Mapred.Pipes.Submitter.Partitioner , typeof(HashPartitioner))); }
/// <summary> /// Get the /// <see cref="Org.Apache.Hadoop.Mapreduce.InputFormat{K, V}"/> /// class for the job. /// </summary> /// <returns> /// the /// <see cref="Org.Apache.Hadoop.Mapreduce.InputFormat{K, V}"/> /// class for the job. /// </returns> /// <exception cref="System.TypeLoadException"/> public virtual Type GetInputFormatClass() { return((Type)conf.GetClass(InputFormatClassAttr, typeof(TextInputFormat))); }
/// <summary>Returns the value class for a named output.</summary> /// <param name="conf">job conf</param> /// <param name="namedOutput">named output</param> /// <returns>class of named output value</returns> public static Type GetNamedOutputValueClass(JobConf conf, string namedOutput) { CheckNamedOutput(conf, namedOutput, false); return(conf.GetClass <object>(MoPrefix + namedOutput + Value, null)); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public override void Run(JobConf job, TaskUmbilicalProtocol umbilical) { job.SetBoolean(JobContext.SkipRecords, IsSkipping()); if (IsMapOrReduce()) { copyPhase = GetProgress().AddPhase("copy"); sortPhase = GetProgress().AddPhase("sort"); reducePhase = GetProgress().AddPhase("reduce"); } // start thread that will handle communication with parent Task.TaskReporter reporter = StartReporter(umbilical); bool useNewApi = job.GetUseNewReducer(); Initialize(job, GetJobID(), reporter, useNewApi); // check if it is a cleanupJobTask if (jobCleanup) { RunJobCleanupTask(umbilical, reporter); return; } if (jobSetup) { RunJobSetupTask(umbilical, reporter); return; } if (taskCleanup) { RunTaskCleanupTask(umbilical, reporter); return; } // Initialize the codec codec = InitCodec(); RawKeyValueIterator rIter = null; ShuffleConsumerPlugin shuffleConsumerPlugin = null; Type combinerClass = conf.GetCombinerClass(); Task.CombineOutputCollector combineCollector = (null != combinerClass) ? new Task.CombineOutputCollector (reduceCombineOutputCounter, reporter, conf) : null; Type clazz = job.GetClass <ShuffleConsumerPlugin>(MRConfig.ShuffleConsumerPlugin, typeof(Shuffle)); shuffleConsumerPlugin = ReflectionUtils.NewInstance(clazz, job); Log.Info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin); ShuffleConsumerPlugin.Context shuffleContext = new ShuffleConsumerPlugin.Context( GetTaskID(), job, FileSystem.GetLocal(job), umbilical, base.lDirAlloc, reporter, codec, combinerClass, combineCollector, spilledRecordsCounter, reduceCombineInputCounter , shuffledMapsCounter, reduceShuffleBytes, failedShuffleCounter, mergedMapOutputsCounter , taskStatus, copyPhase, sortPhase, this, mapOutputFile, localMapFiles); shuffleConsumerPlugin.Init(shuffleContext); rIter = shuffleConsumerPlugin.Run(); // free up the data structures mapOutputFilesOnDisk.Clear(); sortPhase.Complete(); // sort is complete SetPhase(TaskStatus.Phase.Reduce); StatusUpdate(umbilical); Type keyClass = job.GetMapOutputKeyClass(); Type valueClass = job.GetMapOutputValueClass(); RawComparator comparator = job.GetOutputValueGroupingComparator(); if (useNewApi) { RunNewReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass); } else { RunOldReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass); } shuffleConsumerPlugin.Close(); Done(umbilical, reporter); }
/// <summary> /// Get the value class for the /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/> /// </summary> /// <returns> /// the value class of the /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/> /// </returns> public static Type GetSequenceFileOutputValueClass(JobConf conf) { return(conf.GetClass <Writable>(SequenceFileAsBinaryOutputFormat.ValueClass, conf. GetOutputValueClass().AsSubclass <Writable>())); }
/// <exception cref="System.IO.IOException"/> public virtual InputSplit[] GetSplits(JobConf job, int numSplits) { // Delegate the generation of input splits to the 'original' InputFormat return(ReflectionUtils.NewInstance(job.GetClass <InputFormat>(Submitter.InputFormat , typeof(TextInputFormat)), job).GetSplits(job, numSplits)); }
/// <summary>Returns the named output OutputFormat.</summary> /// <param name="conf">job conf</param> /// <param name="namedOutput">named output</param> /// <returns>namedOutput OutputFormat</returns> public static Type GetNamedOutputFormatClass(JobConf conf, string namedOutput) { CheckNamedOutput(conf, namedOutput, false); return(conf.GetClass <OutputFormat>(MoPrefix + namedOutput + Format, null)); }
/// <exception cref="System.Exception"/> public virtual int Run(string[] argv) { JobConf job = new JobConf(GetConf()); job.SetJarByClass(typeof(GenericMRLoadGenerator)); job.SetMapperClass(typeof(GenericMRLoadGenerator.SampleMapper)); job.SetReducerClass(typeof(GenericMRLoadGenerator.SampleReducer)); if (!ParseArgs(argv, job)) { return(-1); } if (null == FileOutputFormat.GetOutputPath(job)) { // No output dir? No writes job.SetOutputFormat(typeof(NullOutputFormat)); } if (0 == FileInputFormat.GetInputPaths(job).Length) { // No input dir? Generate random data System.Console.Error.WriteLine("No input path; ignoring InputFormat"); ConfRandom(job); } else { if (null != job.GetClass(GenericMRLoadGenerator.IndirectInputFormat, null)) { // specified IndirectInputFormat? Build src list JobClient jClient = new JobClient(job); Path tmpDir = new Path(jClient.GetFs().GetHomeDirectory(), ".staging"); Random r = new Random(); Path indirInputFile = new Path(tmpDir, Sharpen.Extensions.ToString(r.Next(int.MaxValue ), 36) + "_files"); job.Set(GenericMRLoadGenerator.IndirectInputFile, indirInputFile.ToString()); SequenceFile.Writer writer = SequenceFile.CreateWriter(tmpDir.GetFileSystem(job), job, indirInputFile, typeof(LongWritable), typeof(Text), SequenceFile.CompressionType .None); try { foreach (Path p in FileInputFormat.GetInputPaths(job)) { FileSystem fs = p.GetFileSystem(job); Stack <Path> pathstack = new Stack <Path>(); pathstack.Push(p); while (!pathstack.Empty()) { foreach (FileStatus stat in fs.ListStatus(pathstack.Pop())) { if (stat.IsDirectory()) { if (!stat.GetPath().GetName().StartsWith("_")) { pathstack.Push(stat.GetPath()); } } else { writer.Sync(); writer.Append(new LongWritable(stat.GetLen()), new Text(stat.GetPath().ToUri().ToString ())); } } } } } finally { writer.Close(); } } } DateTime startTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); JobClient.RunJob(job); DateTime endTime = new DateTime(); System.Console.Out.WriteLine("Job ended: " + endTime); System.Console.Out.WriteLine("The job took " + (endTime.GetTime() - startTime.GetTime ()) / 1000 + " seconds."); return(0); }