示例#1
0
        /// <summary>Run the map task.</summary>
        /// <param name="input">the set of inputs</param>
        /// <param name="output">the object to collect the outputs of the map</param>
        /// <param name="reporter">the object to update with status</param>
        /// <exception cref="System.IO.IOException"/>
        public override void Run(RecordReader <K1, V1> input, OutputCollector <K2, V2> output
                                 , Reporter reporter)
        {
            Application <K1, V1, K2, V2> application = null;

            try
            {
                RecordReader <FloatWritable, NullWritable> fakeInput = (!Submitter.GetIsJavaRecordReader
                                                                            (job) && !Submitter.GetIsJavaMapper(job)) ? (RecordReader <FloatWritable, NullWritable
                                                                                                                                       >)input : null;
                application = new Application <K1, V1, K2, V2>(job, fakeInput, output, reporter, (
                                                                   Type)job.GetOutputKeyClass(), (Type)job.GetOutputValueClass());
            }
            catch (Exception ie)
            {
                throw new RuntimeException("interrupted", ie);
            }
            DownwardProtocol <K1, V1> downlink = application.GetDownlink();
            bool isJavaInput = Submitter.GetIsJavaRecordReader(job);

            downlink.RunMap(reporter.GetInputSplit(), job.GetNumReduceTasks(), isJavaInput);
            bool skipping = job.GetBoolean(MRJobConfig.SkipRecords, false);

            try
            {
                if (isJavaInput)
                {
                    // allocate key & value instances that are re-used for all entries
                    K1 key   = input.CreateKey();
                    V1 value = input.CreateValue();
                    downlink.SetInputTypes(key.GetType().FullName, value.GetType().FullName);
                    while (input.Next(key, value))
                    {
                        // map pair to output
                        downlink.MapItem(key, value);
                        if (skipping)
                        {
                            //flush the streams on every record input if running in skip mode
                            //so that we don't buffer other records surrounding a bad record.
                            downlink.Flush();
                        }
                    }
                    downlink.EndOfInput();
                }
                application.WaitForFinish();
            }
            catch (Exception t)
            {
                application.Abort(t);
            }
            finally
            {
                application.Cleanup();
            }
        }
示例#2
0
        /// <summary>Create a proxy object that will speak the binary protocol on a socket.</summary>
        /// <remarks>
        /// Create a proxy object that will speak the binary protocol on a socket.
        /// Upward messages are passed on the specified handler and downward
        /// downward messages are public methods on this object.
        /// </remarks>
        /// <param name="sock">The socket to communicate on.</param>
        /// <param name="handler">The handler for the received messages.</param>
        /// <param name="key">The object to read keys into.</param>
        /// <param name="value">The object to read values into.</param>
        /// <param name="config">The job's configuration</param>
        /// <exception cref="System.IO.IOException"/>
        public BinaryProtocol(Socket sock, UpwardProtocol <K2, V2> handler, K2 key, V2 value
                              , JobConf config)
        {
            OutputStream raw = sock.GetOutputStream();

            // If we are debugging, save a copy of the downlink commands to a file
            if (Submitter.GetKeepCommandFile(config))
            {
                raw = new BinaryProtocol.TeeOutputStream("downlink.data", raw);
            }
            stream = new DataOutputStream(new BufferedOutputStream(raw, BufferSize));
            uplink = new BinaryProtocol.UplinkReaderThread <K2, V2>(sock.GetInputStream(), handler
                                                                    , key, value);
            uplink.SetName("pipe-uplink-handler");
            uplink.Start();
        }
示例#3
0
        public virtual void TestPipesPartitioner()
        {
            PipesPartitioner <IntWritable, Text> partitioner = new PipesPartitioner <IntWritable
                                                                                     , Text>();
            JobConf configuration = new JobConf();

            Submitter.GetJavaPartitioner(configuration);
            partitioner.Configure(new JobConf());
            IntWritable iw = new IntWritable(4);

            // the cache empty
            NUnit.Framework.Assert.AreEqual(0, partitioner.GetPartition(iw, new Text("test"),
                                                                        2));
            // set data into cache
            PipesPartitioner.SetNextPartition(3);
            // get data from cache
            NUnit.Framework.Assert.AreEqual(3, partitioner.GetPartition(iw, new Text("test"),
                                                                        2));
        }
示例#4
0
 /// <exception cref="System.IO.IOException"/>
 private void StartApplication(OutputCollector <K3, V3> output, Reporter reporter)
 {
     if (application == null)
     {
         try
         {
             Log.Info("starting application");
             application = new Application <K2, V2, K3, V3>(job, null, output, reporter, (Type)
                                                            job.GetOutputKeyClass(), (Type)job.GetOutputValueClass());
             downlink = application.GetDownlink();
         }
         catch (Exception ie)
         {
             throw new RuntimeException("interrupted", ie);
         }
         int reduce = 0;
         downlink.RunReduce(reduce, Submitter.GetIsJavaRecordWriter(job));
     }
 }
示例#5
0
        public virtual void TestSubmitter()
        {
            JobConf conf = new JobConf();

            FilePath[] psw = CleanTokenPasswordFile();
            Runtime.SetProperty("test.build.data", "target/tmp/build/TEST_SUBMITTER_MAPPER/data"
                                );
            conf.Set("hadoop.log.dir", "target/tmp");
            // prepare configuration
            Submitter.SetIsJavaMapper(conf, false);
            Submitter.SetIsJavaReducer(conf, false);
            Submitter.SetKeepCommandFile(conf, false);
            Submitter.SetIsJavaRecordReader(conf, false);
            Submitter.SetIsJavaRecordWriter(conf, false);
            PipesPartitioner <IntWritable, Text> partitioner = new PipesPartitioner <IntWritable
                                                                                     , Text>();

            partitioner.Configure(conf);
            Submitter.SetJavaPartitioner(conf, partitioner.GetType());
            NUnit.Framework.Assert.AreEqual(typeof(PipesPartitioner), (Submitter.GetJavaPartitioner
                                                                           (conf)));
            // test going to call main method with System.exit(). Change Security
            SecurityManager securityManager = Runtime.GetSecurityManager();
            // store System.out
            TextWriter            oldps = System.Console.Out;
            ByteArrayOutputStream @out  = new ByteArrayOutputStream();

            ExitUtil.DisableSystemExit();
            // test without parameters
            try
            {
                Runtime.SetOut(new TextWriter(@out));
                Submitter.Main(new string[0]);
                NUnit.Framework.Assert.Fail();
            }
            catch (ExitUtil.ExitException)
            {
                // System.exit prohibited! output message test
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains(string.Empty));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("bin/hadoop pipes"));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-input <path>] // Input directory"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-output <path>] // Output directory"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-jar <jar file> // jar filename"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-inputformat <class>] // InputFormat class"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-map <class>] // Java Map class"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-partitioner <class>] // Java Partitioner"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-reduce <class>] // Java Reduce class"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-writer <class>] // Java RecordWriter"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-program <executable>] // executable URI"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-reduces <num>] // number of reduces"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-lazyOutput <true/false>] // createOutputLazily"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-conf <configuration file>     specify an application configuration file"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-D <property=value>            use value for given property"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-fs <local|namenode:port>      specify a namenode"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-jt <local|resourcemanager:port>    specify a ResourceManager"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-files <comma separated list of files>    specify comma separated files to be copied to the map reduce cluster"
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-libjars <comma separated list of jars>    specify comma separated jar files to include in the classpath."
                                                                       ));
                NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-archives <comma separated list of archives>    specify comma separated archives to be unarchived on the compute machines."
                                                                       ));
            }
            finally
            {
                Runtime.SetOut(oldps);
                // restore
                Runtime.SetSecurityManager(securityManager);
                if (psw != null)
                {
                    // remove password files
                    foreach (FilePath file in psw)
                    {
                        file.DeleteOnExit();
                    }
                }
            }
            // test call Submitter form command line
            try
            {
                FilePath fCommand = GetFileCommand(null);
                string[] args     = new string[22];
                FilePath input    = new FilePath(workSpace + FilePath.separator + "input");
                if (!input.Exists())
                {
                    NUnit.Framework.Assert.IsTrue(input.CreateNewFile());
                }
                FilePath outPut = new FilePath(workSpace + FilePath.separator + "output");
                FileUtil.FullyDelete(outPut);
                args[0] = "-input";
                args[1] = input.GetAbsolutePath();
                // "input";
                args[2] = "-output";
                args[3] = outPut.GetAbsolutePath();
                // "output";
                args[4]  = "-inputformat";
                args[5]  = "org.apache.hadoop.mapred.TextInputFormat";
                args[6]  = "-map";
                args[7]  = "org.apache.hadoop.mapred.lib.IdentityMapper";
                args[8]  = "-partitioner";
                args[9]  = "org.apache.hadoop.mapred.pipes.PipesPartitioner";
                args[10] = "-reduce";
                args[11] = "org.apache.hadoop.mapred.lib.IdentityReducer";
                args[12] = "-writer";
                args[13] = "org.apache.hadoop.mapred.TextOutputFormat";
                args[14] = "-program";
                args[15] = fCommand.GetAbsolutePath();
                // "program";
                args[16] = "-reduces";
                args[17] = "2";
                args[18] = "-lazyOutput";
                args[19] = "lazyOutput";
                args[20] = "-jobconf";
                args[21] = "mapreduce.pipes.isjavarecordwriter=false,mapreduce.pipes.isjavarecordreader=false";
                Submitter.Main(args);
                NUnit.Framework.Assert.Fail();
            }
            catch (ExitUtil.ExitException e)
            {
                // status should be 0
                NUnit.Framework.Assert.AreEqual(e.status, 0);
            }
            finally
            {
                Runtime.SetOut(oldps);
                Runtime.SetSecurityManager(securityManager);
            }
        }
示例#6
0
        /// <exception cref="System.IO.IOException"/>
        internal static void RunProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program
                                        , Path inputPath, Path outputPath, int numMaps, int numReduces, string[] expectedResults
                                        , JobConf conf)
        {
            Path    wordExec = new Path("testing/bin/application");
            JobConf job      = null;

            if (conf == null)
            {
                job = mr.CreateJobConf();
            }
            else
            {
                job = new JobConf(conf);
            }
            job.SetNumMapTasks(numMaps);
            job.SetNumReduceTasks(numReduces);
            {
                FileSystem fs = dfs.GetFileSystem();
                fs.Delete(wordExec.GetParent(), true);
                fs.CopyFromLocalFile(program, wordExec);
                Submitter.SetExecutable(job, fs.MakeQualified(wordExec).ToString());
                Submitter.SetIsJavaRecordReader(job, true);
                Submitter.SetIsJavaRecordWriter(job, true);
                FileInputFormat.SetInputPaths(job, inputPath);
                FileOutputFormat.SetOutputPath(job, outputPath);
                RunningJob rJob = null;
                if (numReduces == 0)
                {
                    rJob = Submitter.JobSubmit(job);
                    while (!rJob.IsComplete())
                    {
                        try
                        {
                            Sharpen.Thread.Sleep(1000);
                        }
                        catch (Exception ie)
                        {
                            throw new RuntimeException(ie);
                        }
                    }
                }
                else
                {
                    rJob = Submitter.RunJob(job);
                }
                NUnit.Framework.Assert.IsTrue("pipes job failed", rJob.IsSuccessful());
                Counters       counters          = rJob.GetCounters();
                Counters.Group wordCountCounters = counters.GetGroup("WORDCOUNT");
                int            numCounters       = 0;
                foreach (Counters.Counter c in wordCountCounters)
                {
                    System.Console.Out.WriteLine(c);
                    ++numCounters;
                }
                NUnit.Framework.Assert.IsTrue("No counters found!", (numCounters > 0));
            }
            IList <string> results = new AList <string>();

            foreach (Path p in FileUtil.Stat2Paths(dfs.GetFileSystem().ListStatus(outputPath,
                                                                                  new Utils.OutputFileUtils.OutputFilesFilter())))
            {
                results.AddItem(MapReduceTestUtil.ReadOutput(p, job));
            }
            NUnit.Framework.Assert.AreEqual("number of reduces is wrong", expectedResults.Length
                                            , results.Count);
            for (int i = 0; i < results.Count; i++)
            {
                NUnit.Framework.Assert.AreEqual("pipes program " + program + " output " + i + " wrong"
                                                , expectedResults[i], results[i]);
            }
        }
示例#7
0
 public virtual void Configure(JobConf conf)
 {
     part = ReflectionUtils.NewInstance(Submitter.GetJavaPartitioner(conf), conf);
 }
示例#8
0
        /// <summary>Submit a pipes job based on the command line arguments.</summary>
        /// <param name="args"/>
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            int exitCode = new Submitter().Run(args);

            ExitUtil.Terminate(exitCode);
        }