/// <summary>Run the map task.</summary> /// <param name="input">the set of inputs</param> /// <param name="output">the object to collect the outputs of the map</param> /// <param name="reporter">the object to update with status</param> /// <exception cref="System.IO.IOException"/> public override void Run(RecordReader <K1, V1> input, OutputCollector <K2, V2> output , Reporter reporter) { Application <K1, V1, K2, V2> application = null; try { RecordReader <FloatWritable, NullWritable> fakeInput = (!Submitter.GetIsJavaRecordReader (job) && !Submitter.GetIsJavaMapper(job)) ? (RecordReader <FloatWritable, NullWritable >)input : null; application = new Application <K1, V1, K2, V2>(job, fakeInput, output, reporter, ( Type)job.GetOutputKeyClass(), (Type)job.GetOutputValueClass()); } catch (Exception ie) { throw new RuntimeException("interrupted", ie); } DownwardProtocol <K1, V1> downlink = application.GetDownlink(); bool isJavaInput = Submitter.GetIsJavaRecordReader(job); downlink.RunMap(reporter.GetInputSplit(), job.GetNumReduceTasks(), isJavaInput); bool skipping = job.GetBoolean(MRJobConfig.SkipRecords, false); try { if (isJavaInput) { // allocate key & value instances that are re-used for all entries K1 key = input.CreateKey(); V1 value = input.CreateValue(); downlink.SetInputTypes(key.GetType().FullName, value.GetType().FullName); while (input.Next(key, value)) { // map pair to output downlink.MapItem(key, value); if (skipping) { //flush the streams on every record input if running in skip mode //so that we don't buffer other records surrounding a bad record. downlink.Flush(); } } downlink.EndOfInput(); } application.WaitForFinish(); } catch (Exception t) { application.Abort(t); } finally { application.Cleanup(); } }
/// <exception cref="System.IO.IOException"/> private void StartApplication(OutputCollector <K3, V3> output, Reporter reporter) { if (application == null) { try { Log.Info("starting application"); application = new Application <K2, V2, K3, V3>(job, null, output, reporter, (Type) job.GetOutputKeyClass(), (Type)job.GetOutputValueClass()); downlink = application.GetDownlink(); } catch (Exception ie) { throw new RuntimeException("interrupted", ie); } int reduce = 0; downlink.RunReduce(reduce, Submitter.GetIsJavaRecordWriter(job)); } }
/// <summary>Start the child process to handle the task for us.</summary> /// <param name="conf">the task's configuration</param> /// <param name="recordReader">the fake record reader to update progress with</param> /// <param name="output">the collector to send output to</param> /// <param name="reporter">the reporter for the task</param> /// <param name="outputKeyClass">the class of the output keys</param> /// <param name="outputValueClass">the class of the output values</param> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal Application(JobConf conf, RecordReader <FloatWritable, NullWritable> recordReader , OutputCollector <K2, V2> output, Reporter reporter, Type outputKeyClass, Type outputValueClass ) { serverSocket = Sharpen.Extensions.CreateServerSocket(0); IDictionary <string, string> env = new Dictionary <string, string>(); // add TMPDIR environment variable with the value of java.io.tmpdir env["TMPDIR"] = Runtime.GetProperty("java.io.tmpdir"); env[Submitter.Port] = Sharpen.Extensions.ToString(serverSocket.GetLocalPort()); //Add token to the environment if security is enabled Org.Apache.Hadoop.Security.Token.Token <JobTokenIdentifier> jobToken = TokenCache. GetJobToken(conf.GetCredentials()); // This password is used as shared secret key between this application and // child pipes process byte[] password = jobToken.GetPassword(); string localPasswordFile = new FilePath(".") + Path.Separator + "jobTokenPassword"; WritePasswordToLocalFile(localPasswordFile, password, conf); env["hadoop.pipes.shared.secret.location"] = localPasswordFile; IList <string> cmd = new AList <string>(); string interpretor = conf.Get(Submitter.Interpretor); if (interpretor != null) { cmd.AddItem(interpretor); } string executable = DistributedCache.GetLocalCacheFiles(conf)[0].ToString(); if (!FileUtil.CanExecute(new FilePath(executable))) { // LinuxTaskController sets +x permissions on all distcache files already. // In case of DefaultTaskController, set permissions here. FileUtil.Chmod(executable, "u+x"); } cmd.AddItem(executable); // wrap the command in a stdout/stderr capture // we are starting map/reduce task of the pipes job. this is not a cleanup // attempt. TaskAttemptID taskid = ((TaskAttemptID)TaskAttemptID.ForName(conf.Get(MRJobConfig .TaskAttemptId))); FilePath stdout = TaskLog.GetTaskLogFile(taskid, false, TaskLog.LogName.Stdout); FilePath stderr = TaskLog.GetTaskLogFile(taskid, false, TaskLog.LogName.Stderr); long logLength = TaskLog.GetTaskLogLength(conf); cmd = TaskLog.CaptureOutAndError(null, cmd, stdout, stderr, logLength, false); process = RunClient(cmd, env); clientSocket = serverSocket.Accept(); string challenge = GetSecurityChallenge(); string digestToSend = CreateDigest(password, challenge); string digestExpected = CreateDigest(password, digestToSend); handler = new OutputHandler <K2, V2>(output, reporter, recordReader, digestExpected ); K2 outputKey = (K2)ReflectionUtils.NewInstance(outputKeyClass, conf); V2 outputValue = (V2)ReflectionUtils.NewInstance(outputValueClass, conf); downlink = new BinaryProtocol <K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue , conf); downlink.Authenticate(digestToSend, challenge); WaitForAuthentication(); Log.Debug("Authentication succeeded"); downlink.Start(); downlink.SetJobConf(conf); }
public BinaryProtocol(Stream down, DownwardProtocol handler, Stream up) { this.down = down; this.uplink = new BinaryUpwardProtocol(up); this.handler = handler; }