/// <summary>
            /// make the necessary datastructure for starting a new physical process
            /// </summary>
            /// <param name="wd">the new process' working directory</param>
            /// <param name="details">a descriptor indicating how to start the process</param>
            /// <param name="id">the process' unique identifier</param>
            /// <returns>the datastructure needed to start the process</returns>
            private ProcessStartInfo MakeStartInfo(string wd, ExeDetails details, string id)
            {
                ProcessStartInfo startInfo = new ProcessStartInfo();

                startInfo.CreateNoWindow   = true;
                startInfo.UseShellExecute  = false;
                startInfo.WorkingDirectory = wd;

                if (details.stdoutFile != null)
                {
                    startInfo.RedirectStandardOutput = true;
                }

                if (details.stderrFile != null)
                {
                    startInfo.RedirectStandardError = true;
                }

                startInfo.Arguments = details.commandLineArguments;

                if (Path.IsPathRooted(details.commandLine))
                {
                    // if the executable was specified as a full path, run it from there
                    startInfo.FileName = details.commandLine;
                }
                else
                {
                    // otherwise run it in the process' working directory
                    startInfo.FileName = Path.Combine(wd, details.commandLine);
                }

                // add all the environment variables that were specified in the config
                foreach (var e in details.environment)
                {
                    startInfo.EnvironmentVariables.Remove(e.Key);
                    startInfo.EnvironmentVariables.Add(e.Key, e.Value);
                }

                // add the Peloponnese-specific environment variables that will let the process contact the server
                // and register itself
                startInfo.EnvironmentVariables.Add(Constants.EnvManagerServerUri, parent.parent.ServerAddress);
                startInfo.EnvironmentVariables.Add(Constants.EnvJobGuid, parent.parent.JobGuid.ToString());
                startInfo.EnvironmentVariables.Add(Constants.EnvProcessGroup, parent.GroupName);
                startInfo.EnvironmentVariables.Add(Constants.EnvProcessIdentifier, id);
                startInfo.EnvironmentVariables.Add(Constants.EnvProcessHostName, Environment.MachineName);
                // for data locality purposes, every process in the group is assumed to be running on the
                // same rack
                startInfo.EnvironmentVariables.Add(Constants.EnvProcessRackName, "localrack");

                return(startInfo);
            }
            /// <summary>
            /// start a new version of the physical process
            /// </summary>
            /// <param name="details">a descriptor of how to start the process</param>
            /// <returns>true if and only if the process was successfully started</returns>
            public async Task <bool> Start(ExeDetails details)
            {
                // copies of the identifier and version that can be used outside the lock
                string id;

                lock (this)
                {
                    // we should only be starting one version at a time
                    Debug.Assert(systemProcess == null);
                    Debug.Assert(identifier == null);

                    // increment the version before starting the process, then record the version
                    // for use outside the lock
                    ++version;
                    if (parent.NumberOfVersions > 0 && version > parent.NumberOfVersions)
                    {
                        // hack for now to stop the service running indefinitely creating local
                        // process directories if the config is broken and none of them will start
                        logger.Log("Local process " + index + " failed too many times: exiting");
                        parent.StartShuttingDown();
                        return(false);
                    }

                    // create a new unique identifier for the process based on its index and version,
                    // then record the identifier for use outside the lock
                    id = String.Format("Process.{0,3:D3}.{1,3:D3}", index, version);
                }

                // let the parent know the new process' identifier before it is started. It will tell the service
                // which will then know to accept its registration once it starts
                parent.OnRegisterProcess(id);

                // try to actually start the process
                if (await StartInternal(details, id))
                {
                    return(true);
                }
                else
                {
                    // this failed: let the parent know
                    logger.Log("Start reporting process exit");
                    parent.OnProcessExited(index, id, 1, true);
                    return(false);
                }
            }
        /// <summary>
        /// read the config and initialize. Can throw exceptions which will be cleanly caught by
        /// the parent
        /// </summary>
        /// <param name="p">parent to use for callbacks</param>
        /// <param name="name">name of this group in the service</param>
        /// <param name="config">element describing configuration parameters</param>
        public void Initialize(IServiceManager p, string name, XElement config)
        {
            parent       = p;
            logger       = parent.Logger;
            groupName    = name;
            shuttingDown = false;

            // read the target number of processes out of the config. This defaults to 1
            // if not otherwise specified
            int numberOfProcesses = 1;
            var nProcAttr         = config.Attribute("numberOfProcesses");

            if (nProcAttr != null)
            {
                // don't worry about throwing exceptions if this is malformed
                numberOfProcesses = int.Parse(nProcAttr.Value);
            }

            // read the target number of restarts for each process out of the config. This defaults to 5
            // if not otherwise specified
            numberOfVersions = 5;
            var nRestartAttr = config.Attribute("numberOfVersions");

            if (nRestartAttr != null)
            {
                // don't worry about throwing exceptions if this is malformed
                numberOfVersions = int.Parse(nRestartAttr.Value);
            }

            // make a logical process object for each process we are managing
            processes = new LocalProcess[numberOfProcesses];
            for (int i = 0; i < processes.Length; ++i)
            {
                processes[i] = new LocalProcess(this, i);
            }

            // read the descriptor that we will use to create physical processes.
            // don't worry about throwing exceptions if this isn't present or is
            // malformed
            var processElement = config.Descendants("Process").Single();

            processDetails = new ExeDetails();
            processDetails.ReadFromConfig(processElement, logger);
        }
            /// <summary>
            /// do the work of copying resources to the physical process' working directory, and
            /// starting it
            /// </summary>
            /// <param name="details">a descriptor of how to start the process</param>
            /// <param name="id">unique identifier for the process</param>
            /// <returns>true if and only if the process was started</returns>
            private async Task <bool> StartInternal(ExeDetails details, string id)
            {
                // make a working directory by combining the service's working directory
                // with the group name and unique process identifier
                var    groupWd = Path.Combine(Directory.GetCurrentDirectory(), parent.GroupName);
                string wd      = Path.Combine(groupWd, id);

                try
                {
                    // if there was already a directory of that name, try to delete it
                    Directory.Delete(wd, true);
                }
                catch (Exception e)
                {
                    if (!(e is DirectoryNotFoundException))
                    {
                        // if there's a directory there that we can't delete, don't even try to
                        // start the process because something bad is going on.
                        logger.Log("Failed to delete existing directory " + wd + ": " + e.Message);
                        return(false);
                    }
                }

                try
                {
                    // make the working directory for the new process
                    Directory.CreateDirectory(wd);
                    logger.Log("Created working directory " + wd);
                }
                catch (Exception e)
                {
                    // if we can't make the working directory, don't try to start the process
                    logger.Log("Failed to create working directory " + wd + ": " + e.Message);
                    return(false);
                }

                logger.Log("Copying resources to " + wd);
                // we will copy all the resource groups in parallel; this is the list of Tasks
                // to wait on
                var waiters = new List <Task <bool> >();

                foreach (var r in details.resources)
                {
                    waiters.Add(r.FetchToLocalDirectoryAsync(wd));
                }
                // the return values are an array of bools indicating for each group whether it
                // copied successfully or not
                var gotResourcesArray = await Task.WhenAll(waiters);

                // AND together all the return values
                var gotResources = gotResourcesArray.Aggregate(true, (a, b) => a && b);

                if (!gotResources)
                {
                    // at least one resource failed to copy: we can't start the process
                    logger.Log("Failed to copy resources to working directory " + wd);
                    return(false);
                }

                ProcessStartInfo startInfo;

                try
                {
                    // make the actual datastructure for starting the process
                    startInfo = MakeStartInfo(wd, details, id);
                }
                catch (Exception e)
                {
                    logger.Log("Failed to make process start info for " + id + ": " + e.ToString());
                    return(false);
                }

                Process newProcess;

                lock (this)
                {
                    // make a new system process and copy it into a local variable to use outside
                    // the lock. Once we exit the lock here, an asynchronous call to Stop() could try to kill
                    // the process
                    systemProcess                     = new Process();
                    systemProcess.StartInfo           = startInfo;
                    systemProcess.EnableRaisingEvents = true;
                    systemProcess.Exited             += new EventHandler(ProcessExited);
                    identifier = id;
                    newProcess = systemProcess;
                }

                logger.Log("Trying to start process " + parent.GroupName + ":" + id + " -- " + startInfo.FileName + " " + startInfo.Arguments);

                try
                {
                    newProcess.Start();
                    logger.Log("Process " + newProcess.Id + " started for " + parent.GroupName + ":" + id);

                    if (details.stdoutFile != null)
                    {
                        string stdOutDest = details.stdoutFile;
                        if (details.redirectDirectory != null)
                        {
                            stdOutDest = Path.Combine(details.redirectDirectory, stdOutDest);
                        }
                        Task copyTask = Task.Run(() => CopyStreamWithCatch(systemProcess.StandardOutput, stdOutDest, wd));
                    }

                    if (details.stderrFile != null)
                    {
                        string stdErrDest = details.stderrFile;
                        if (details.redirectDirectory != null)
                        {
                            stdErrDest = Path.Combine(details.redirectDirectory, stdErrDest);
                        }
                        Task copyTask = Task.Run(() => CopyStreamWithCatch(systemProcess.StandardError, stdErrDest, wd));
                    }

                    return(true);
                }
                catch (Exception e)
                {
                    // if we didn't manage to start the process, get rid of the pointer to it; the parent
                    // will call parent.OnProcessExited for us
                    lock (this)
                    {
                        systemProcess = null;
                        identifier    = null;
                    }

                    logger.Log("Process start failed for " + parent.GroupName + ":" + id + ": " + e.ToString());
                    return(false);
                }
            }
Example #5
0
        /// <summary>
        /// read the config and initialize. Can throw exceptions which will be cleanly caught by
        /// the parent
        /// </summary>
        /// <param name="p">parent to use for callbacks</param>
        /// <param name="name">name of this group in the service</param>
        /// <param name="config">element describing configuration parameters</param>
        public void Initialize(IServiceManager p, string name, XElement config)
        {
            parent    = p;
            logger    = parent.Logger;
            groupName = name;

            // read the target number of processes out of the config. This defaults to -1
            // if not otherwise specified, which means use all the machines in the cluster
            maxProcesses = -1;
            var nProcAttr = config.Attribute("maxProcesses");

            if (nProcAttr != null)
            {
                // don't worry about throwing exceptions if this is malformed
                maxProcesses = int.Parse(nProcAttr.Value);
            }

            // read the target number of failures out of the config. These default to -1
            // if not otherwise specified, which means tolerate arbitrary failures
            maxFailuresPerNode = -1;
            var nFPNAttr = config.Attribute("maxFailuresPerNode");

            if (nFPNAttr != null)
            {
                // don't worry about throwing exceptions if this is malformed
                maxFailuresPerNode = int.Parse(nFPNAttr.Value);
            }
            maxTotalFailures = -1;
            var nTFAttr = config.Attribute("maxTotalFailures");

            if (nTFAttr != null)
            {
                // don't worry about throwing exceptions if this is malformed
                maxTotalFailures = int.Parse(nTFAttr.Value);
            }
            // read the amount of memory to request per container from the config
            // it defaults to -1
            workerMemoryInMB = -1;
            var workerMemAttr = config.Attribute("workerMemoryInMB");

            if (workerMemAttr != null)
            {
                workerMemoryInMB = int.Parse(workerMemAttr.Value);
            }

            // read the descriptor that we will use to create physical processes.
            // don't worry about throwing exceptions if this isn't present or is
            // malformed
            var processElement = config.Descendants("Process").Single();

            processDetails = new ExeDetails();
            processDetails.ReadFromConfig(processElement, logger);

            foreach (var rg in processDetails.resources)
            {
                if (!(rg is HdfsResources))
                {
                    throw new ApplicationException("All YARN process resources must reside in HDFS: " + rg.ToString());
                }
            }
        }