コード例 #1
0
ファイル: Chain.cs プロジェクト: orf53975/hadoop.net
 internal ChainRecordWriter(TaskInputOutputContext <object, object, KEYOUT, VALUEOUT
                                                    > context)
 {
     // ChainRecordWriter either writes to blocking queue or task context
     // constructor to write to context
     outputContext = context;
 }
コード例 #2
0
 public RecordWriterWithCounter(RecordWriter writer, string counterName, TaskInputOutputContext
                                context)
 {
     this.writer      = writer;
     this.counterName = counterName;
     this.context     = context;
 }
コード例 #3
0
 internal ChainMapContextImpl(TaskInputOutputContext <KEYIN, VALUEIN, KEYOUT, VALUEOUT
                                                      > @base, RecordReader <KEYIN, VALUEIN> rr, RecordWriter <KEYOUT, VALUEOUT> rw, Configuration
                              conf)
 {
     this.reader = rr;
     this.output = rw;
     this.@base  = @base;
     this.conf   = conf;
 }
コード例 #4
0
 /// <summary>
 /// Creates and initializes multiple outputs support,
 /// it should be instantiated in the Mapper/Reducer setup method.
 /// </summary>
 /// <param name="context">the TaskInputOutputContext object</param>
 public MultipleOutputs(TaskInputOutputContext <object, object, KEYOUT, VALUEOUT> context
                        )
 {
     // instance code, to be used from Mapper/Reducer code
     this.context = context;
     namedOutputs = Sharpen.Collections.UnmodifiableSet(new HashSet <string>(MultipleOutputs
                                                                             .GetNamedOutputsList(context)));
     recordWriters   = new Dictionary <string, RecordWriter <object, object> >();
     countersEnabled = GetCountersEnabled(context);
 }
コード例 #5
0
ファイル: Chain.cs プロジェクト: orf53975/hadoop.net
        // Run the reducer directly.
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal virtual void RunReducer <Keyin, Valuein, Keyout, Valueout>(TaskInputOutputContext
                                                                            <KEYIN, VALUEIN, KEYOUT, VALUEOUT> context)
        {
            RecordWriter <KEYOUT, VALUEOUT> rw = new Chain.ChainRecordWriter <KEYOUT, VALUEOUT>
                                                     (context);

            Reducer.Context reducerContext = CreateReduceContext(rw, (ReduceContext)context,
                                                                 rConf);
            reducer.Run(reducerContext);
            rw.Close(context);
        }
コード例 #6
0
ファイル: Chain.cs プロジェクト: orf53975/hadoop.net
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal virtual void RunMapper(TaskInputOutputContext context, int index)
        {
            Mapper       mapper = mappers[index];
            RecordReader rr     = new Chain.ChainRecordReader(context);
            RecordWriter rw     = new Chain.ChainRecordWriter(context);

            Mapper.Context mapperContext = CreateMapContext(rr, rw, context, GetConf(index));
            mapper.Run(mapperContext);
            rr.Close();
            rw.Close(context);
        }
コード例 #7
0
ファイル: Chain.cs プロジェクト: orf53975/hadoop.net
        /// <summary>Add reducer that reads from context and writes to a queue</summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal virtual void AddReducer(TaskInputOutputContext inputContext, Chain.ChainBlockingQueue
                                         <Chain.KeyValuePair <object, object> > outputQueue)
        {
            Type         keyOutClass   = rConf.GetClass(ReducerOutputKeyClass, typeof(object));
            Type         valueOutClass = rConf.GetClass(ReducerOutputValueClass, typeof(object));
            RecordWriter rw            = new Chain.ChainRecordWriter(keyOutClass, valueOutClass, outputQueue
                                                                     , rConf);

            Reducer.Context reducerContext = CreateReduceContext(rw, (ReduceContext)inputContext
                                                                 , rConf);
            Chain.ReduceRunner runner = new Chain.ReduceRunner(this, reducerContext, reducer,
                                                               rw);
            threads.AddItem(runner);
        }
コード例 #8
0
ファイル: Chain.cs プロジェクト: orf53975/hadoop.net
        /// <summary>
        /// Add mapper(the first mapper) that reads input from the input
        /// context and writes to queue
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal virtual void AddMapper(TaskInputOutputContext inputContext, Chain.ChainBlockingQueue
                                        <Chain.KeyValuePair <object, object> > output, int index)
        {
            Configuration conf          = GetConf(index);
            Type          keyOutClass   = conf.GetClass(MapperOutputKeyClass, typeof(object));
            Type          valueOutClass = conf.GetClass(MapperOutputValueClass, typeof(object));
            RecordReader  rr            = new Chain.ChainRecordReader(inputContext);
            RecordWriter  rw            = new Chain.ChainRecordWriter(keyOutClass, valueOutClass, output,
                                                                      conf);

            Mapper.Context mapperContext = CreateMapContext(rr, rw, (MapContext)inputContext,
                                                            GetConf(index));
            Chain.MapRunner runner = new Chain.MapRunner(this, mappers[index], mapperContext,
                                                         rr, rw);
            threads.AddItem(runner);
        }
コード例 #9
0
ファイル: DistSum.cs プロジェクト: orf53975/hadoop.net
            /// <summary>Compute sigma</summary>
            /// <exception cref="System.IO.IOException"/>
            /// <exception cref="System.Exception"/>
            internal static void Compute <_T0>(Summation sigma, TaskInputOutputContext <_T0> context
                                               )
            {
                string s;

                Log.Info(s = "sigma=" + sigma);
                context.SetStatus(s);
                long start = Runtime.CurrentTimeMillis();

                sigma.Compute();
                long       duration = Runtime.CurrentTimeMillis() - start;
                TaskResult result   = new TaskResult(sigma, duration);

                Log.Info(s = "result=" + result);
                context.SetStatus(s);
                context.Write(NullWritable.Get(), result);
            }
コード例 #10
0
            /// <exception cref="System.IO.IOException"/>
            public virtual void Setup <_T0>(TaskInputOutputContext <_T0> context)
            {
                Configuration conf = context.GetConfiguration();

                Path[]     localFiles    = context.GetLocalCacheFiles();
                URI[]      files         = context.GetCacheFiles();
                Path[]     localArchives = context.GetLocalCacheArchives();
                URI[]      archives      = context.GetCacheArchives();
                FileSystem fs            = LocalFileSystem.Get(conf);

                // Check that 2 files and 2 archives are present
                NUnit.Framework.Assert.AreEqual(2, localFiles.Length);
                NUnit.Framework.Assert.AreEqual(2, localArchives.Length);
                NUnit.Framework.Assert.AreEqual(2, files.Length);
                NUnit.Framework.Assert.AreEqual(2, archives.Length);
                // Check the file name
                NUnit.Framework.Assert.IsTrue(files[0].GetPath().EndsWith("distributed.first"));
                NUnit.Framework.Assert.IsTrue(files[1].GetPath().EndsWith("distributed.second.jar"
                                                                          ));
                // Check lengths of the files
                NUnit.Framework.Assert.AreEqual(1, fs.GetFileStatus(localFiles[0]).GetLen());
                NUnit.Framework.Assert.IsTrue(fs.GetFileStatus(localFiles[1]).GetLen() > 1);
                // Check extraction of the archive
                NUnit.Framework.Assert.IsTrue(fs.Exists(new Path(localArchives[0], "distributed.jar.inside3"
                                                                 )));
                NUnit.Framework.Assert.IsTrue(fs.Exists(new Path(localArchives[1], "distributed.jar.inside4"
                                                                 )));
                // Check the class loaders
                Log.Info("Java Classpath: " + Runtime.GetProperty("java.class.path"));
                ClassLoader cl = Sharpen.Thread.CurrentThread().GetContextClassLoader();

                // Both the file and the archive were added to classpath, so both
                // should be reachable via the class loader.
                NUnit.Framework.Assert.IsNotNull(cl.GetResource("distributed.jar.inside2"));
                NUnit.Framework.Assert.IsNotNull(cl.GetResource("distributed.jar.inside3"));
                NUnit.Framework.Assert.IsNull(cl.GetResource("distributed.jar.inside4"));
                // Check that the symlink for the renaming was created in the cwd;
                NUnit.Framework.Assert.IsTrue("symlink distributed.first.symlink doesn't exist",
                                              symlinkFile.Exists());
                NUnit.Framework.Assert.AreEqual("symlink distributed.first.symlink length not 1",
                                                1, symlinkFile.Length());
                //This last one is a difference between MRv2 and MRv1
                NUnit.Framework.Assert.IsTrue("second file should be symlinked too", expectedAbsentSymlinkFile
                                              .Exists());
            }
コード例 #11
0
 /// <summary>
 /// Helper function to generate a
 /// <see cref="Org.Apache.Hadoop.FS.Path"/>
 /// for a file that is unique for
 /// the task within the job output directory.
 /// <p>The path can be used to create custom files from within the map and
 /// reduce tasks. The path name will be unique for each task. The path parent
 /// will be the job output directory.</p>ls
 /// <p>This method uses the
 /// <see cref="FileOutputFormat{K, V}.GetUniqueFile(Org.Apache.Hadoop.Mapreduce.TaskAttemptContext, string, string)
 ///     "/>
 /// method to make the file name
 /// unique for the task.</p>
 /// </summary>
 /// <param name="context">the context for the task.</param>
 /// <param name="name">the name for the file.</param>
 /// <param name="extension">the extension for the file</param>
 /// <returns>a unique path accross all tasks of the job.</returns>
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.Exception"/>
 public static Path GetPathForWorkFile <_T0>(TaskInputOutputContext <_T0> context, string
                                             name, string extension)
 {
     return(new Path(GetWorkOutputPath(context), GetUniqueFile(context, name, extension
                                                               )));
 }
コード例 #12
0
        /// <summary>
        /// Get the
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// to the task's temporary output directory
        /// for the map-reduce job
        /// <b id="SideEffectFiles">Tasks' Side-Effect Files</b>
        /// <p>Some applications need to create/write-to side-files, which differ from
        /// the actual job-outputs.
        /// <p>In such cases there could be issues with 2 instances of the same TIP
        /// (running simultaneously e.g. speculative tasks) trying to open/write-to the
        /// same file (path) on HDFS. Hence the application-writer will have to pick
        /// unique names per task-attempt (e.g. using the attemptid, say
        /// <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
        /// <p>To get around this the Map-Reduce framework helps the application-writer
        /// out by maintaining a special
        /// <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt>
        /// sub-directory for each task-attempt on HDFS where the output of the
        /// task-attempt goes. On successful completion of the task-attempt the files
        /// in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only)
        /// are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the
        /// framework discards the sub-directory of unsuccessful task-attempts. This
        /// is completely transparent to the application.</p>
        /// <p>The application-writer can take advantage of this by creating any
        /// side-files required in a work directory during execution
        /// of his task i.e. via
        /// <see cref="FileOutputFormat{K, V}.GetWorkOutputPath(Org.Apache.Hadoop.Mapreduce.TaskInputOutputContext{KEYIN, VALUEIN, KEYOUT, VALUEOUT})
        ///     "/>
        /// , and
        /// the framework will move them out similarly - thus she doesn't have to pick
        /// unique paths per task-attempt.</p>
        /// <p>The entire discussion holds true for maps of jobs with
        /// reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
        /// goes directly to HDFS.</p>
        /// </summary>
        /// <returns>
        /// the
        /// <see cref="Org.Apache.Hadoop.FS.Path"/>
        /// to the task's temporary output directory
        /// for the map-reduce job.
        /// </returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public static Path GetWorkOutputPath <_T0>(TaskInputOutputContext <_T0> context)
        {
            FileOutputCommitter committer = (FileOutputCommitter)context.GetOutputCommitter();

            return(committer.GetWorkPath());
        }
コード例 #13
0
ファイル: Chain.cs プロジェクト: orf53975/hadoop.net
        /// <summary>
        /// Create a map context that is based on ChainMapContext and the given record
        /// reader and record writer
        /// </summary>
        private Mapper.Context CreateMapContext <Keyin, Valuein, Keyout, Valueout>(RecordReader
                                                                                   <KEYIN, VALUEIN> rr, RecordWriter <KEYOUT, VALUEOUT> rw, TaskInputOutputContext <KEYIN
                                                                                                                                                                    , VALUEIN, KEYOUT, VALUEOUT> context, Configuration conf)
        {
            MapContext <KEYIN, VALUEIN, KEYOUT, VALUEOUT> mapContext = new ChainMapContextImpl
                                                                       <KEYIN, VALUEIN, KEYOUT, VALUEOUT>(context, rr, rw, conf);

            Mapper.Context mapperContext = new WrappedMapper <KEYIN, VALUEIN, KEYOUT, VALUEOUT
                                                              >().GetMapContext(mapContext);
            return(mapperContext);
        }
コード例 #14
0
ファイル: Chain.cs プロジェクト: orf53975/hadoop.net
 internal ChainRecordReader(TaskInputOutputContext <KEYIN, VALUEIN, object, object>
                            context)
 {
     // constructor to read from the context
     inputContext = context;
 }