Beispiel #1
0
        public ClusterJob Submit(XDocument config, Uri errorLocation)
        {
            // upload config xml doc to blob
            // launch single task map only job to submit xonfig xml to yarn launcher
            // get job output which will be the application id
            Uri  userDfs = this.DfsClient.Combine(this.baseUri, "user", Environment.UserName, "staging");
            Guid jobGuid = Guid.NewGuid();

            StreamingMapReduceJobCreateParameters jobParams = new StreamingMapReduceJobCreateParameters();

            jobParams.Defines.Add("mapreduce.job.reduces", "0");
            jobParams.Defines.Add("mapreduce.job.maps", "1");
            jobParams.JobName = "YarnMapLauncher";
            jobParams.Mapper  = "Microsoft.Research.Peloponnese.YarnMapLauncher.exe";
            Uri outputDir = this.DfsClient.Combine(userDfs, jobGuid.ToString());

            //dfsClient removes the leading slash, but hadoop assumes job output is relative to user working directory
            jobParams.Output = this.dfsClient.GetClusterInternalUri(outputDir).AbsoluteUri;

            using (MemoryStream upload = new MemoryStream())
            {
                XmlWriterSettings settings = new XmlWriterSettings();
                settings.Encoding = new UTF8Encoding(false);
                using (XmlWriter xw = XmlWriter.Create(upload, settings))
                {
                    config.WriteTo(xw);
                }
                XElement elem = ConfigHelpers.MakeDfsResourceFromBuffer(jobGuid + ".xml", upload.ToArray(), this.DfsClient, userDfs);
                jobParams.Input = elem.Value;
            }

            Uri peloponneseDfs = this.DfsClient.Combine(this.baseUri, "staging", "peloponnese");

            string[] files = { jobParams.Mapper, "Microsoft.Research.Peloponnese.YarnLauncher.jar" };

            foreach (string file in files)
            {
                XElement elem = ConfigHelpers.MakeDfsResourceFromFile(Path.Combine(peloponneseHome, file),
                                                                      this.DfsClient, peloponneseDfs);
                // hash mark in resources allows a target name to be specified for the symlink
                jobParams.Files.Add(string.Format("{0}#{1}", elem.Value, file));
            }

            var launchJob = JobClient.CreateStreamingJob(jobParams);

            return(new AzureYarnJob(this, errorLocation, outputDir, launchJob));
        }