internal ChainRecordWriter(TaskInputOutputContext <object, object, KEYOUT, VALUEOUT > context) { // ChainRecordWriter either writes to blocking queue or task context // constructor to write to context outputContext = context; }
public RecordWriterWithCounter(RecordWriter writer, string counterName, TaskInputOutputContext context) { this.writer = writer; this.counterName = counterName; this.context = context; }
internal ChainMapContextImpl(TaskInputOutputContext <KEYIN, VALUEIN, KEYOUT, VALUEOUT > @base, RecordReader <KEYIN, VALUEIN> rr, RecordWriter <KEYOUT, VALUEOUT> rw, Configuration conf) { this.reader = rr; this.output = rw; this.@base = @base; this.conf = conf; }
/// <summary> /// Creates and initializes multiple outputs support, /// it should be instantiated in the Mapper/Reducer setup method. /// </summary> /// <param name="context">the TaskInputOutputContext object</param> public MultipleOutputs(TaskInputOutputContext <object, object, KEYOUT, VALUEOUT> context ) { // instance code, to be used from Mapper/Reducer code this.context = context; namedOutputs = Sharpen.Collections.UnmodifiableSet(new HashSet <string>(MultipleOutputs .GetNamedOutputsList(context))); recordWriters = new Dictionary <string, RecordWriter <object, object> >(); countersEnabled = GetCountersEnabled(context); }
// Run the reducer directly. /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal virtual void RunReducer <Keyin, Valuein, Keyout, Valueout>(TaskInputOutputContext <KEYIN, VALUEIN, KEYOUT, VALUEOUT> context) { RecordWriter <KEYOUT, VALUEOUT> rw = new Chain.ChainRecordWriter <KEYOUT, VALUEOUT> (context); Reducer.Context reducerContext = CreateReduceContext(rw, (ReduceContext)context, rConf); reducer.Run(reducerContext); rw.Close(context); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal virtual void RunMapper(TaskInputOutputContext context, int index) { Mapper mapper = mappers[index]; RecordReader rr = new Chain.ChainRecordReader(context); RecordWriter rw = new Chain.ChainRecordWriter(context); Mapper.Context mapperContext = CreateMapContext(rr, rw, context, GetConf(index)); mapper.Run(mapperContext); rr.Close(); rw.Close(context); }
/// <summary>Add reducer that reads from context and writes to a queue</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal virtual void AddReducer(TaskInputOutputContext inputContext, Chain.ChainBlockingQueue <Chain.KeyValuePair <object, object> > outputQueue) { Type keyOutClass = rConf.GetClass(ReducerOutputKeyClass, typeof(object)); Type valueOutClass = rConf.GetClass(ReducerOutputValueClass, typeof(object)); RecordWriter rw = new Chain.ChainRecordWriter(keyOutClass, valueOutClass, outputQueue , rConf); Reducer.Context reducerContext = CreateReduceContext(rw, (ReduceContext)inputContext , rConf); Chain.ReduceRunner runner = new Chain.ReduceRunner(this, reducerContext, reducer, rw); threads.AddItem(runner); }
/// <summary> /// Add mapper(the first mapper) that reads input from the input /// context and writes to queue /// </summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal virtual void AddMapper(TaskInputOutputContext inputContext, Chain.ChainBlockingQueue <Chain.KeyValuePair <object, object> > output, int index) { Configuration conf = GetConf(index); Type keyOutClass = conf.GetClass(MapperOutputKeyClass, typeof(object)); Type valueOutClass = conf.GetClass(MapperOutputValueClass, typeof(object)); RecordReader rr = new Chain.ChainRecordReader(inputContext); RecordWriter rw = new Chain.ChainRecordWriter(keyOutClass, valueOutClass, output, conf); Mapper.Context mapperContext = CreateMapContext(rr, rw, (MapContext)inputContext, GetConf(index)); Chain.MapRunner runner = new Chain.MapRunner(this, mappers[index], mapperContext, rr, rw); threads.AddItem(runner); }
/// <summary>Compute sigma</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal static void Compute <_T0>(Summation sigma, TaskInputOutputContext <_T0> context ) { string s; Log.Info(s = "sigma=" + sigma); context.SetStatus(s); long start = Runtime.CurrentTimeMillis(); sigma.Compute(); long duration = Runtime.CurrentTimeMillis() - start; TaskResult result = new TaskResult(sigma, duration); Log.Info(s = "result=" + result); context.SetStatus(s); context.Write(NullWritable.Get(), result); }
/// <exception cref="System.IO.IOException"/> public virtual void Setup <_T0>(TaskInputOutputContext <_T0> context) { Configuration conf = context.GetConfiguration(); Path[] localFiles = context.GetLocalCacheFiles(); URI[] files = context.GetCacheFiles(); Path[] localArchives = context.GetLocalCacheArchives(); URI[] archives = context.GetCacheArchives(); FileSystem fs = LocalFileSystem.Get(conf); // Check that 2 files and 2 archives are present NUnit.Framework.Assert.AreEqual(2, localFiles.Length); NUnit.Framework.Assert.AreEqual(2, localArchives.Length); NUnit.Framework.Assert.AreEqual(2, files.Length); NUnit.Framework.Assert.AreEqual(2, archives.Length); // Check the file name NUnit.Framework.Assert.IsTrue(files[0].GetPath().EndsWith("distributed.first")); NUnit.Framework.Assert.IsTrue(files[1].GetPath().EndsWith("distributed.second.jar" )); // Check lengths of the files NUnit.Framework.Assert.AreEqual(1, fs.GetFileStatus(localFiles[0]).GetLen()); NUnit.Framework.Assert.IsTrue(fs.GetFileStatus(localFiles[1]).GetLen() > 1); // Check extraction of the archive NUnit.Framework.Assert.IsTrue(fs.Exists(new Path(localArchives[0], "distributed.jar.inside3" ))); NUnit.Framework.Assert.IsTrue(fs.Exists(new Path(localArchives[1], "distributed.jar.inside4" ))); // Check the class loaders Log.Info("Java Classpath: " + Runtime.GetProperty("java.class.path")); ClassLoader cl = Sharpen.Thread.CurrentThread().GetContextClassLoader(); // Both the file and the archive were added to classpath, so both // should be reachable via the class loader. NUnit.Framework.Assert.IsNotNull(cl.GetResource("distributed.jar.inside2")); NUnit.Framework.Assert.IsNotNull(cl.GetResource("distributed.jar.inside3")); NUnit.Framework.Assert.IsNull(cl.GetResource("distributed.jar.inside4")); // Check that the symlink for the renaming was created in the cwd; NUnit.Framework.Assert.IsTrue("symlink distributed.first.symlink doesn't exist", symlinkFile.Exists()); NUnit.Framework.Assert.AreEqual("symlink distributed.first.symlink length not 1", 1, symlinkFile.Length()); //This last one is a difference between MRv2 and MRv1 NUnit.Framework.Assert.IsTrue("second file should be symlinked too", expectedAbsentSymlinkFile .Exists()); }
/// <summary> /// Helper function to generate a /// <see cref="Org.Apache.Hadoop.FS.Path"/> /// for a file that is unique for /// the task within the job output directory. /// <p>The path can be used to create custom files from within the map and /// reduce tasks. The path name will be unique for each task. The path parent /// will be the job output directory.</p>ls /// <p>This method uses the /// <see cref="FileOutputFormat{K, V}.GetUniqueFile(Org.Apache.Hadoop.Mapreduce.TaskAttemptContext, string, string) /// "/> /// method to make the file name /// unique for the task.</p> /// </summary> /// <param name="context">the context for the task.</param> /// <param name="name">the name for the file.</param> /// <param name="extension">the extension for the file</param> /// <returns>a unique path accross all tasks of the job.</returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public static Path GetPathForWorkFile <_T0>(TaskInputOutputContext <_T0> context, string name, string extension) { return(new Path(GetWorkOutputPath(context), GetUniqueFile(context, name, extension ))); }
/// <summary> /// Get the /// <see cref="Org.Apache.Hadoop.FS.Path"/> /// to the task's temporary output directory /// for the map-reduce job /// <b id="SideEffectFiles">Tasks' Side-Effect Files</b> /// <p>Some applications need to create/write-to side-files, which differ from /// the actual job-outputs. /// <p>In such cases there could be issues with 2 instances of the same TIP /// (running simultaneously e.g. speculative tasks) trying to open/write-to the /// same file (path) on HDFS. Hence the application-writer will have to pick /// unique names per task-attempt (e.g. using the attemptid, say /// <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p> /// <p>To get around this the Map-Reduce framework helps the application-writer /// out by maintaining a special /// <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> /// sub-directory for each task-attempt on HDFS where the output of the /// task-attempt goes. On successful completion of the task-attempt the files /// in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only) /// are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the /// framework discards the sub-directory of unsuccessful task-attempts. This /// is completely transparent to the application.</p> /// <p>The application-writer can take advantage of this by creating any /// side-files required in a work directory during execution /// of his task i.e. via /// <see cref="FileOutputFormat{K, V}.GetWorkOutputPath(Org.Apache.Hadoop.Mapreduce.TaskInputOutputContext{KEYIN, VALUEIN, KEYOUT, VALUEOUT}) /// "/> /// , and /// the framework will move them out similarly - thus she doesn't have to pick /// unique paths per task-attempt.</p> /// <p>The entire discussion holds true for maps of jobs with /// reducer=NONE (i.e. 0 reduces) since output of the map, in that case, /// goes directly to HDFS.</p> /// </summary> /// <returns> /// the /// <see cref="Org.Apache.Hadoop.FS.Path"/> /// to the task's temporary output directory /// for the map-reduce job. /// </returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public static Path GetWorkOutputPath <_T0>(TaskInputOutputContext <_T0> context) { FileOutputCommitter committer = (FileOutputCommitter)context.GetOutputCommitter(); return(committer.GetWorkPath()); }
/// <summary> /// Create a map context that is based on ChainMapContext and the given record /// reader and record writer /// </summary> private Mapper.Context CreateMapContext <Keyin, Valuein, Keyout, Valueout>(RecordReader <KEYIN, VALUEIN> rr, RecordWriter <KEYOUT, VALUEOUT> rw, TaskInputOutputContext <KEYIN , VALUEIN, KEYOUT, VALUEOUT> context, Configuration conf) { MapContext <KEYIN, VALUEIN, KEYOUT, VALUEOUT> mapContext = new ChainMapContextImpl <KEYIN, VALUEIN, KEYOUT, VALUEOUT>(context, rr, rw, conf); Mapper.Context mapperContext = new WrappedMapper <KEYIN, VALUEIN, KEYOUT, VALUEOUT >().GetMapContext(mapContext); return(mapperContext); }
internal ChainRecordReader(TaskInputOutputContext <KEYIN, VALUEIN, object, object> context) { // constructor to read from the context inputContext = context; }