Пример #1
0
        /// <summary>Run the map task.</summary>
        /// <param name="input">the set of inputs</param>
        /// <param name="output">the object to collect the outputs of the map</param>
        /// <param name="reporter">the object to update with status</param>
        /// <exception cref="System.IO.IOException"/>
        public override void Run(RecordReader <K1, V1> input, OutputCollector <K2, V2> output
                                 , Reporter reporter)
        {
            Application <K1, V1, K2, V2> application = null;

            try
            {
                RecordReader <FloatWritable, NullWritable> fakeInput = (!Submitter.GetIsJavaRecordReader
                                                                            (job) && !Submitter.GetIsJavaMapper(job)) ? (RecordReader <FloatWritable, NullWritable
                                                                                                                                       >)input : null;
                application = new Application <K1, V1, K2, V2>(job, fakeInput, output, reporter, (
                                                                   Type)job.GetOutputKeyClass(), (Type)job.GetOutputValueClass());
            }
            catch (Exception ie)
            {
                throw new RuntimeException("interrupted", ie);
            }
            DownwardProtocol <K1, V1> downlink = application.GetDownlink();
            bool isJavaInput = Submitter.GetIsJavaRecordReader(job);

            downlink.RunMap(reporter.GetInputSplit(), job.GetNumReduceTasks(), isJavaInput);
            bool skipping = job.GetBoolean(MRJobConfig.SkipRecords, false);

            try
            {
                if (isJavaInput)
                {
                    // allocate key & value instances that are re-used for all entries
                    K1 key   = input.CreateKey();
                    V1 value = input.CreateValue();
                    downlink.SetInputTypes(key.GetType().FullName, value.GetType().FullName);
                    while (input.Next(key, value))
                    {
                        // map pair to output
                        downlink.MapItem(key, value);
                        if (skipping)
                        {
                            //flush the streams on every record input if running in skip mode
                            //so that we don't buffer other records surrounding a bad record.
                            downlink.Flush();
                        }
                    }
                    downlink.EndOfInput();
                }
                application.WaitForFinish();
            }
            catch (Exception t)
            {
                application.Abort(t);
            }
            finally
            {
                application.Cleanup();
            }
        }
Пример #2
0
 /// <exception cref="System.IO.IOException"/>
 private void StartApplication(OutputCollector <K3, V3> output, Reporter reporter)
 {
     if (application == null)
     {
         try
         {
             Log.Info("starting application");
             application = new Application <K2, V2, K3, V3>(job, null, output, reporter, (Type)
                                                            job.GetOutputKeyClass(), (Type)job.GetOutputValueClass());
             downlink = application.GetDownlink();
         }
         catch (Exception ie)
         {
             throw new RuntimeException("interrupted", ie);
         }
         int reduce = 0;
         downlink.RunReduce(reduce, Submitter.GetIsJavaRecordWriter(job));
     }
 }
Пример #3
0
        /// <summary>Start the child process to handle the task for us.</summary>
        /// <param name="conf">the task's configuration</param>
        /// <param name="recordReader">the fake record reader to update progress with</param>
        /// <param name="output">the collector to send output to</param>
        /// <param name="reporter">the reporter for the task</param>
        /// <param name="outputKeyClass">the class of the output keys</param>
        /// <param name="outputValueClass">the class of the output values</param>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal Application(JobConf conf, RecordReader <FloatWritable, NullWritable> recordReader
                             , OutputCollector <K2, V2> output, Reporter reporter, Type outputKeyClass, Type outputValueClass
                             )
        {
            serverSocket = Sharpen.Extensions.CreateServerSocket(0);
            IDictionary <string, string> env = new Dictionary <string, string>();

            // add TMPDIR environment variable with the value of java.io.tmpdir
            env["TMPDIR"]       = Runtime.GetProperty("java.io.tmpdir");
            env[Submitter.Port] = Sharpen.Extensions.ToString(serverSocket.GetLocalPort());
            //Add token to the environment if security is enabled
            Org.Apache.Hadoop.Security.Token.Token <JobTokenIdentifier> jobToken = TokenCache.
                                                                                   GetJobToken(conf.GetCredentials());
            // This password is used as shared secret key between this application and
            // child pipes process
            byte[] password          = jobToken.GetPassword();
            string localPasswordFile = new FilePath(".") + Path.Separator + "jobTokenPassword";

            WritePasswordToLocalFile(localPasswordFile, password, conf);
            env["hadoop.pipes.shared.secret.location"] = localPasswordFile;
            IList <string> cmd         = new AList <string>();
            string         interpretor = conf.Get(Submitter.Interpretor);

            if (interpretor != null)
            {
                cmd.AddItem(interpretor);
            }
            string executable = DistributedCache.GetLocalCacheFiles(conf)[0].ToString();

            if (!FileUtil.CanExecute(new FilePath(executable)))
            {
                // LinuxTaskController sets +x permissions on all distcache files already.
                // In case of DefaultTaskController, set permissions here.
                FileUtil.Chmod(executable, "u+x");
            }
            cmd.AddItem(executable);
            // wrap the command in a stdout/stderr capture
            // we are starting map/reduce task of the pipes job. this is not a cleanup
            // attempt.
            TaskAttemptID taskid = ((TaskAttemptID)TaskAttemptID.ForName(conf.Get(MRJobConfig
                                                                                  .TaskAttemptId)));
            FilePath stdout    = TaskLog.GetTaskLogFile(taskid, false, TaskLog.LogName.Stdout);
            FilePath stderr    = TaskLog.GetTaskLogFile(taskid, false, TaskLog.LogName.Stderr);
            long     logLength = TaskLog.GetTaskLogLength(conf);

            cmd          = TaskLog.CaptureOutAndError(null, cmd, stdout, stderr, logLength, false);
            process      = RunClient(cmd, env);
            clientSocket = serverSocket.Accept();
            string challenge      = GetSecurityChallenge();
            string digestToSend   = CreateDigest(password, challenge);
            string digestExpected = CreateDigest(password, digestToSend);

            handler = new OutputHandler <K2, V2>(output, reporter, recordReader, digestExpected
                                                 );
            K2 outputKey   = (K2)ReflectionUtils.NewInstance(outputKeyClass, conf);
            V2 outputValue = (V2)ReflectionUtils.NewInstance(outputValueClass, conf);

            downlink = new BinaryProtocol <K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue
                                                           , conf);
            downlink.Authenticate(digestToSend, challenge);
            WaitForAuthentication();
            Log.Debug("Authentication succeeded");
            downlink.Start();
            downlink.SetJobConf(conf);
        }
Пример #4
0
 public BinaryProtocol(Stream down, DownwardProtocol handler, Stream up)
 {
     this.down = down;
     this.uplink = new BinaryUpwardProtocol(up);
     this.handler = handler;
 }