예제 #1
0
        private void Dispose(bool disposing)
        {
            if (!m_disposed)
            {
                if (disposing)
                {
                    DryadLogger.LogInformation("Dispose Process", "Releasing resources for process id {0}", this.m_id);

                    this.m_assignedToNodeEvent.Close();

                    foreach (KeyValuePair <ProcessState, List <ManualResetEvent> > kvp in m_stateChangeWaiters)
                    {
                        foreach (ManualResetEvent e in kvp.Value)
                        {
                            try
                            {
                                e.Close();
                            }
                            catch (Exception ex)
                            {
                                DryadLogger.LogError(0, ex);
                            }
                        }
                    }
                }
                m_disposed = true;
            }
        }
예제 #2
0
        public YarnSchedulerHelper()
        {
            // init the DryadLogger, just to make sure
            DryadLogger.Start("xcompute.log");
            m_taskUpdateQueue = new BlockingCollection <VertexTask>();

            // if we are not running in a vertex, then init the GM
            string jmString = Environment.GetEnvironmentVariable(Constants.jobManager);

            if (String.IsNullOrEmpty(jmString))
            {
                m_minNodes   = int.Parse(Environment.GetEnvironmentVariable("MINIMUM_COMPUTE_NODES"));
                m_maxNodes   = int.Parse(Environment.GetEnvironmentVariable("MAXIMUM_COMPUTE_NODES"));
                m_startNodes = m_minNodes;

                m_vertices = new VertexTask[JobMaxNodes + 2];
                DryadLogger.LogInformation("YarnSchedulerHelper()", "Initializing JAVA GM");
                DryadLogger.LogInformation("YarnSchedulerHelper()", "m_maxNodes: {0}", m_maxNodes);
                AMInstance.RegisterGMCallback(new UpdateProcessState(QueueYarnUpdate));
                ((ISchedulerHelper)this).OnVertexChange += new VertexChangeEventHandler(OnVertexChangeHandler);
                m_appMaster = new AMInstance();
            }
            else
            {
                m_vertices = new VertexTask[JobMaxNodes + 2];
                DryadLogger.LogInformation("YarnSchedulerHelper()", "Not initializing JAVA GM");
            }
        }
예제 #3
0
        public void ProcessYarnUpdate(VertexTask v)
        {
            DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} on node {1} is in state {2}", v.Id, v.Node,
                                       v.State);
            VertexChangeEventArgs e = new VertexChangeEventArgs(v.Id);

            e.NewNode         = v.Node;
            e.NewState        = YarnTaskStateToVertexTaskState(v.State);
            e.NewRequeueCount = v.RequeueCount;

            if (m_vertices[v.Id] != null)
            {
                e.OldNode         = m_vertices[v.Id].Node;
                e.OldState        = YarnTaskStateToVertexTaskState(m_vertices[v.Id].State);
                e.OldRequeueCount = m_vertices[v.Id].RequeueCount;
            }

            if (e.NewRequeueCount != e.OldRequeueCount)
            {
                DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} requeue count changed from {1} to {2}",
                                           v.Id, e.OldRequeueCount, e.NewRequeueCount);
            }

            // Update current vertex state
            m_vertices[v.Id] = v;
            m_vertexChangeEvent(this, e);
            //m_taskChangeEvt.Set();
        }
예제 #4
0
        bool ISchedulerHelper.WaitForTasksReady()
        {
            // The basic strategy is to wait for the maximum number of vertex tasks which is
            // practical. Start by waiting for AllocatedNodes.Count.  As tasks fail or are cancelled,
            // decrement the number of tasks to wait for until we drop below Min at which time the
            // scheduler will end the job. Also, if tasks are rerun, increment the number of tasks to wait for.
            do
            {
                // Event set by the Task Monitor Thread when it finishes processes a batch of changes.
                m_taskChangeEvt.WaitOne();

                // Don't want OnVertexChangeHandler updating these counts while we're checking them
                lock (this)
                {
                    DryadLogger.LogInformation("Wait for vertex tasks",
                                               "{0} tasks are running, waiting for at least {1} before starting",
                                               m_runningTasks, m_startNodes);
                    if (m_runningTasks >= m_startNodes)
                    {
                        // We have enough running tasks to start
                        DryadLogger.LogDebug("Wait for vertex tasks",
                                             "Sufficient number of tasks transitioned to running to begin: {0} running tasks",
                                             m_runningTasks);
                        return(true);
                    }
                }
            } while (true);
        }
예제 #5
0
        private void TaskMonitorThread()
        {
            TimeSpan pollInterval    = TimeSpan.FromSeconds(1);
            TimeSpan maxPollInterval = TimeSpan.FromSeconds(16);

            // The main loop.  Each iteration polls for task changes.
            while (true)
            {
                bool     foundUpdate   = false;
                DateTime loopStartTime = DateTime.Now;
                //
                // Process change results from blocking queue
                //
                do
                {
                    VertexTask v = null;
                    if (m_taskUpdateQueue.TryTake(out v, pollInterval))
                    {
                        foundUpdate = true;
                        ProcessYarnUpdate(v);
                    }
                } while ((DateTime.Now - loopStartTime) < pollInterval);

                if (foundUpdate)
                {
                    // Notify WaitForTasksReady once for each polling cycle
                    // so that it gets all the changes in one batch
                    m_taskChangeEvt.Set();
                }

                // Check to see if we've been told to stop.
                // Timeout after pollInterval.
                // TODO: For better shutdown perf, we may want to check this at other places
                // or just kill the thread - but this provides a more graceful exit.
                if (m_threadStopEvt.WaitOne(pollInterval, true))
                {
                    m_taskMonitorThreadRunning = false;
                    DryadLogger.LogInformation("Task Monitoring Thread", "Received shutdown event");
                    return;
                }

                // Double the polling interval each iteration up to maxPollInterval
                if (pollInterval < maxPollInterval)
                {
                    double newSeconds = 2 * pollInterval.TotalSeconds;
                    if (newSeconds < maxPollInterval.TotalSeconds)
                    {
                        pollInterval = TimeSpan.FromSeconds(newSeconds);
                    }
                    else
                    {
                        pollInterval = maxPollInterval;
                    }
                }
            }
        }
예제 #6
0
        /// <summary>
        /// Set process state to cancelled and stop the vertex host process if possible
        /// </summary>
        public void Cancel(bool suppressNotifications)
        {
            DryadLogger.LogMethodEntry(this.DryadId);

            lock (syncRoot)
            {
                if (state == ProcessState.Completed)
                {
                    // Process has already completed before cancelation made it here, do nothing
                    DryadLogger.LogInformation("Cancel process", "Process {0} has already exited", DryadId);
                    DryadLogger.LogMethodExit();
                    return;
                }
                DryadLogger.LogInformation("Cancel process", "Process {0} has not already exited", DryadId);
                state          = ProcessState.Completed;
                this.cancelled = true;
            }

            // If the process started, kill it
            if (systemProcess != null)
            {
                try
                {
                    // Killing the process will trigger Process_Exited
                    DryadLogger.LogInformation("Cancel process", "Killing system process for process id {0}", DryadId);

                    if (suppressNotifications)
                    {
                        // Remove the Exited event handler
                        systemProcess.Exited -= this.Process_Exited;
                    }
                    systemProcess.Kill();
                    DryadLogger.LogMethodExit();
                    return;
                }
                catch (Exception e)
                {
                    //
                    // Failed to kill process - log exception
                    //
                    DryadLogger.LogError(0, e, "Failed to kill system process for process id {0}", DryadId);
                }
            }
            else
            {
                DryadLogger.LogInformation("Cancel process", "Process {0} has not started yet", DryadId);
            }

            // Process was either not running or failed to die, trigger Process_Exited ourself
            if (!suppressNotifications)
            {
                Process_Exited(this, null);
            }
            DryadLogger.LogMethodExit();
        }
예제 #7
0
        public void QueueYarnUpdate(int taskId, int taskState, string nodeName)
        {
            DryadLogger.LogInformation("QueueYarnUpdate", "Task {0} on node {2} is in state {3}", taskId, nodeName,
                                       taskState);
            // Set change event arguments

            YarnTaskState yTaskState = (YarnTaskState)taskState;
            VertexTask    v          = new VertexTask(taskId, nodeName, yTaskState, int.MaxValue, DateTime.UtcNow);

            m_taskUpdateQueue.Add(v);
        }
예제 #8
0
        /// <summary>
        /// Vertex host process exited event - marks process state and queues up exit process thread
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="args"></param>
        private void Process_Exited(object sender, EventArgs args)
        {
            DryadLogger.LogMethodEntry(DryadId);

            // Ensure the process exited code can only be executed once
            lock (syncRoot)
            {
                if (exited)
                {
                    DryadLogger.LogInformation("Process exit", "Process {0} already exited", DryadId);
                    DryadLogger.LogMethodExit();
                    return;
                }
                exited = true;
            }

            if (cancelled)
            {
                DryadLogger.LogInformation("Process exit", "Process {0} was cancelled", DryadId);
                exitCode = unchecked ((int)0x830A0003); // DrError_VertexReceivedTermination
            }
            else
            {
                exitCode = systemProcess.ExitCode;
                DryadLogger.LogInformation("Process exit", "Process {0} exit code {1}", DryadId, exitCode);
                if (exitCode == 0)
                {
                    lock (syncRoot)
                    {
                        state = ProcessState.Completed;
                    }
                }
                else
                {
                    lock (syncRoot)
                    {
                        state       = ProcessState.Completed;
                        this.failed = true;
                    }
                }
            }

            //
            // Ensure that the vertex complete event is sent to GM and that all pending properties are handled
            //
            ThreadPool.QueueUserWorkItem(new WaitCallback(ExitProcessThreadProc));

            DryadLogger.LogMethodExit();
        }
예제 #9
0
        public void Cancel()
        {
            bool wasRunning = false;

            lock (SyncRoot)
            {
                // If the process has already been assigned to a node, then we will need to cancel it at the node
                if (this.CurrentState < ProcessState.AssignedToNode)
                {
                    this.m_cancelled = true;
                    this.ExitCode    = 0x830A0003; // DrError_VertexReceivedTermination
                    DryadLogger.LogInformation("Cancel process", "Cancelation received for vertex {0}.{1} before it was assigned to a node", m_graphManagerId, m_graphManagerVersion);
                    wasRunning = false;
                }
                else if (this.CurrentState == ProcessState.Completed)
                {
                    // nothing to do for this case, process already completed
                    DryadLogger.LogInformation("Cancel process", "Cancellation received for vertex {0}.{1} after it completed", m_graphManagerId, m_graphManagerVersion);
                    return;
                }
                else if (Dispatcher != null)
                {
                    DryadLogger.LogInformation("Cancel process", "Cancellation received for vertex {0}.{1} after it was assigned to node {2}", m_graphManagerId, m_graphManagerVersion, Dispatcher.NodeName);
                    wasRunning = true;
                }
                else
                {
                    // This is an unexpected condition
                    DryadLogger.LogError(0, null, "Cancellation received for vertex {0}.{1} in state {2} with no dispatcher", m_graphManagerId, m_graphManagerVersion, CurrentState.ToString());
                    return;
                }

                if (wasRunning)
                {
                    if (Dispatcher != null)
                    {
                        Dispatcher.CancelScheduleProcess(m_id);
                    }
                }
                else
                {
                    ChangeState(ProcessState.Completed);
                }
            }
        }
예제 #10
0
 /// <summary>
 /// Initialization thread - initialize job working directory if needed.
 /// </summary>
 /// <param name="state"></param>
 void InitializationThreadProc(Object state)
 {
     try
     {
         if (Environment.GetEnvironmentVariable(Constants.schedulerTypeEnvVar) == Constants.schedulerTypeLocal)
         {
             initializedEvent.Set();
         }
         else if (ExecutionHelper.InitializeForJobExecution(Environment.GetEnvironmentVariable("XC_RESOURCEFILES")))
         {
             DryadLogger.LogInformation("InitializationThreadProc", "InitializeForJobExecution was successful.");
             initializedEvent.Set();
         }
         else
         {
             Surrender(new Exception("Failed to initialize vertex service for job execution"));
         }
     }
     catch (Exception ex)
     {
         Surrender(ex);
     }
 }
        public bool Start(string listenUri, ISchedulerHelper schedulerHelper)
        {
            DryadLogger.LogMethodEntry(listenUri);
            Uri baseAddress = new Uri(listenUri);

            try
            {
                NetTcpBinding binding = schedulerHelper.GetVertexServiceBinding();

                selfHost = null;

                //  Retry opening the service port if address is already in use
                int maxRetryCount = 20; // Results in retrying for ~1 min
                for (int retryCount = 0; retryCount < maxRetryCount; retryCount++)
                {
                    try
                    {
                        //Step 1 of the hosting procedure: Create ServiceHost
                        selfHost = new ServiceHost(callbackService, baseAddress);

                        //Step 2 of the hosting procedure: Add service endpoints.
                        ServiceEndpoint           vertexEndpoint = selfHost.AddServiceEndpoint(typeof(IDryadVertexCallback), binding, Constants.vertexCallbackServiceName);
                        ServiceThrottlingBehavior stb            = new ServiceThrottlingBehavior();
                        stb.MaxConcurrentCalls    = Constants.MaxConnections;
                        stb.MaxConcurrentSessions = Constants.MaxConnections;
                        selfHost.Description.Behaviors.Add(stb);

                        //Step 3 of hosting procedure : Add a security manager
                        selfHost.Authorization.ServiceAuthorizationManager = new DryadVertexServiceAuthorizationManager();

                        // Step 4 of the hosting procedure: Start the service.
                        selfHost.Open();
                        break;
                    }

                    catch (AddressAlreadyInUseException)
                    {
                        if (selfHost != null)
                        {
                            selfHost.Abort();
                            selfHost = null;
                        }

                        // If this is the last try, dont sleep. Just rethrow exception to exit.
                        if (retryCount < maxRetryCount - 1)
                        {
                            DryadLogger.LogInformation("Start Vertex Callback Service", "Address already in use. Retrying...");
                            System.Threading.Thread.Sleep(3000);
                        }
                        else
                        {
                            throw;
                        }
                    }
                }

                DryadLogger.LogInformation("Start Vertex Callback Service", "Service Host started successfully");
                return(true);
            }
            catch (CommunicationException ce)
            {
                DryadLogger.LogCritical(0, ce, "Failed to start vertex callback service");
                try
                {
                    if (selfHost != null)
                    {
                        selfHost.Abort();
                    }
                }
                catch
                {
                }
                return(false);
            }
        }
예제 #12
0
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        private static int Main(string[] args)
        {
            //
            // Try to create working directory. Fail vertex service if unable to do so.
            //
            bool createdJobDir = false;
            int  retryCount    = 0;

            do
            {
                try
                {
                    ProcessPathHelper.CreateUserWorkingDirectory();

                    Directory.CreateDirectory(ProcessPathHelper.JobPath);

                    createdJobDir = true;
                }
                catch (Exception ex)
                {
                    Console.Error.WriteLine("Failed to create working directory, {0}. Error: {1}.", ProcessPathHelper.JobPath, ex.ToString());
                    retryCount++;
                }
            } while (retryCount < numRetries && !createdJobDir);

            if (!createdJobDir)
            {
                Console.Error.WriteLine("Vertex service cannot proceed because working directory could not be created.");
                return(1);
            }

            //
            // Get Task ID from environment
            //
            int taskId;

            if (Int32.TryParse(Environment.GetEnvironmentVariable("CCP_TASKID"), out taskId) == false)
            {
                Console.Error.WriteLine("Program.Main", "Failed to read CCP_TASKID from environment");
                return(1);
            }

            //
            // Initialize tracing subsystem
            //
            string traceFile = Path.Combine(ProcessPathHelper.JobPath, String.Format("VertexServiceTrace_{0}.txt", taskId));

            DryadLogger.Start(traceFile);

            //
            // Initialize scheduler helper of the correct type
            //
            ISchedulerHelper schedulerHelper;

            try
            {
                schedulerHelper = SchedulerHelperFactory.GetInstance();
            }
            catch (Exception ex)
            {
                DryadLogger.LogCritical(0, ex, "Failed to get scheduler helper");
                DryadLogger.Stop();
                Console.Error.WriteLine("Failed to contact HPC scheduler. See log for details.");
                return(1);
            }

            //
            // Step 1 of the address configuration procedure: Create a URI to serve as the base address.
            //
            string strAddress  = schedulerHelper.GetVertexServiceBaseAddress("localhost", taskId);
            Uri    baseAddress = new Uri(strAddress);

            //
            // Step 2 of the hosting procedure: Create ServiceHost
            //
            ServiceHost selfHost = new ServiceHost(typeof(VertexService), baseAddress);

            try
            {
                //
                // Get the service binding
                //
                NetTcpBinding binding = schedulerHelper.GetVertexServiceBinding();

                //
                // Step 3 of the hosting procedure: Add service endpoints.
                //
                ServiceEndpoint vertexEndpoint = selfHost.AddServiceEndpoint(typeof(IDryadVertexService), binding, Constants.vertexServiceName);
                DryadLogger.LogInformation("Initialize vertex service", "listening on address {0}", vertexEndpoint.Address.ToString());

                //
                // Step 4 of hosting procedure : Add a security manager
                // TODO: Fix this for local scheduler and / or Azure scheduler when supported
                //
                selfHost.Authorization.ServiceAuthorizationManager = new DryadVertexServiceAuthorizationManager();

                // Step 5 of the hosting procedure: Start (and then stop) the service.
                selfHost.Open();

                Console.WriteLine("Vertex Service up and waiting for commands");

                // Wait for the shutdown event to be set.
                VertexService.shutdownEvent.WaitOne(-1, true);

                // Check vertex service shutdown condition
                if (VertexService.internalShutdown)
                {
                    string errorMsg = string.Format("Vertex Service Task unable to continue after critical error in initialization or communication: {0}", VertexService.ShutdownReason.ToString());
                    Console.WriteLine(errorMsg);
                    DryadLogger.LogCritical(0, new Exception(errorMsg));
                    DryadLogger.Stop();
                    try
                    {
                        selfHost.Abort();
                    }
                    catch
                    {
                    }

                    return(1);
                }

                // Close the ServiceHostBase to shutdown the service.
                selfHost.Close();
            }
            catch (CommunicationException ce)
            {
                //
                // Report any errors and fail task
                //
                DryadLogger.LogCritical(0, ce, "A communication exception occurred");
                DryadLogger.Stop();
                try
                {
                    selfHost.Abort();
                }
                catch
                {
                }
                Console.Error.WriteLine("CommunicationException occured, aborting vertex service. See log for details.");
                return(1);
            }
            catch (Exception ex)
            {
                //
                // Report any errors and fail task
                //
                DryadLogger.LogCritical(0, ex, "An exception occurred");
                DryadLogger.Stop();
                try
                {
                    selfHost.Abort();
                }
                catch
                {
                }
                Console.Error.WriteLine("An exception occured, aborting vertex service. See log for details.");
                return(1);
            }

            DryadLogger.LogInformation("Vertex Service", "Shut down cleanly");
            DryadLogger.Stop();
            return(0);
        }
예제 #13
0
        /// <summary>
        /// Copy the resources from staging dir to working dir
        /// </summary>
        /// <param name="resources">list of resources supplied by dryadlinq</param>
        /// <returns>success = true</returns>
        private static bool CopyStagedJobResources(string resources)
        {
            if (resources != null)
            {
                if (resources[0] == '@')
                {
                    resources = File.ReadAllText(resources.Substring(1));
                }

                if (resources.EndsWith(","))
                {
                    resources = resources.Substring(0, resources.Length - 1);
                }
                string[] files = resources.Split(',');
                DryadLogger.LogInformation("CopyStagedJobResources", string.Format("Will copy {0} resource files.", files.Length));

                if (files.Length > 1)
                {
                    string source = files[0];
                    for (int i = 1; i < files.Length; i++)
                    {
                        string jobFilePath = Path.Combine(ProcessPathHelper.JobPath, files[i]);

                        //
                        // File may already exist due to local resource copying
                        //
                        if (File.Exists(jobFilePath) == false)
                        {
                            //
                            // If file doesn't exist today, get it from staging location
                            //
                            if (source.StartsWith("hdfs://", StringComparison.InvariantCultureIgnoreCase))
                            {
                                // copy from HDFS
                                DryadLogger.LogDebug("CopyStagedJobResources", string.Format(
                                                         "[ExecutionHelper.CopyJobResources] Copying '{0}' to '{1}' from HDFS dir {2}",
                                                         files[i], jobFilePath, source));
                                GetHdfsFile(source, files[i], jobFilePath);
                            }
                            else
                            {
                                string sourceFile = Path.Combine(source, files[i]);
                                try
                                {
                                    DryadLogger.LogDebug("CopyStagedJobResources", string.Format(
                                                             "[ExecutionHelper.CopyJobResources] Copying '{0}' to '{1}'",
                                                             sourceFile, jobFilePath));
                                    File.Copy(sourceFile, jobFilePath);
                                }
                                catch (Exception e)
                                {
                                    DryadLogger.LogInformation("CopyStagedJobResources", string.Format(
                                                                   "[ExecutionHelper.CopyJobResources] Exception copying '{0}' to '{1}': {2}",
                                                                   sourceFile, jobFilePath, e.Message));
                                    return(false);
                                }
                            }
                        }
                    }
                }
                else
                {
                    Console.Error.WriteLine("[ExecutionHelper.CopyJobResources] invalid XC_RESOURCEFILES length = {0}", files.Length);
                    return(false);
                }
            }
            else
            {
                Console.Error.WriteLine("[ExecutionHelper.CopyJobResources] resources = null");
                return(false);
            }
            return(true);
        }
예제 #14
0
        /// <summary>
        /// Called in new thread in setgetproperty service operation
        /// </summary>
        /// <param name="obj"></param>
        void SetGetPropThreadProc(Object obj)
        {
            DryadLogger.LogMethodEntry(DryadId);
            PropertyRequest r = obj as PropertyRequest;

            ProcessInfo infoLocal = new ProcessInfo();

            ulong[]  propertyVersions = null;
            string[] propertyLabels   = null;

            //
            // Make sure process is started before continuing
            //
            if (this.State < ProcessState.Running)
            {
                try
                {
                    processStartEvent.WaitOne();
                }
                catch (ObjectDisposedException ex)
                {
                    // The process was cancelled and released before it started running, just return
                    if (exited)
                    {
                        DryadLogger.LogInformation("SetGetProp Thread", "Process {0} cancelled or exited before starting.", this.DryadId);
                    }
                    else
                    {
                        DryadLogger.LogError(0, ex);
                    }
                    DryadLogger.LogMethodExit();
                    return;
                }
            }

            //
            // Use status_pending if running, vertex initialization failure if process is failed and process exit code otherwise
            //
            infoLocal.processStatus = 0x103;  // WinNT.h STATUS_PENDING
            infoLocal.processState  = state;
            if (state == ProcessState.Running)
            {
                infoLocal.exitCode = 0x103; // WinNT.h STATUS_PENDING
            }
            else if (failed)
            {
                infoLocal.exitCode = Constants.DrError_VertexError;
            }
            else if (cancelled)
            {
                infoLocal.exitCode = Constants.DrError_VertexReceivedTermination;  // DryadError_VertexReceivedTermination
            }
            else
            {
                infoLocal.exitCode = (uint)systemProcess.ExitCode;
            }

            //
            // Record specified properties and update versions - wakes up anyone waiting for property changes
            //
            SetProperties(r.infos, out propertyLabels, out propertyVersions);

            //
            // Try to get property update
            //
            if (BlockOnProperty(r.blockOnLabel, r.blockOnVersion, r.maxBlockTime))
            {
                //
                // If property update was received, update the received property information
                // If received property marks vertex completed, record that
                //
                if (r.getPropLabel != null && r.getPropLabel.Length > 0)
                {
                    lock (syncRoot)
                    {
                        infoLocal.propertyInfos = new ProcessPropertyInfo[1];

                        int index;
                        if (TryGetProperty(r.getPropLabel, out infoLocal.propertyInfos[0], out index) == false)
                        {
                            DryadLogger.LogError(0, null, "Failed to get property for label {0}", r.getPropLabel);
                        }

                        if (StatusMessageContainsDryadError_VertexCompleted(infoLocal.propertyInfos[0].propertyLabel))
                        {
                            CopyProp(infoLocal.propertyInfos[0], out latestVertexStatusSent);
                        }
                    }
                }

                //
                // If request asks for statistics on vertex process, get them
                //
                if (r.ProcessStatistics)
                {
                    if (GetStatistics(out infoLocal.processStatistics) == false)
                    {
                        DryadLogger.LogError(0, null, "Failed to get vertex statistics");
                    }
                }
            }

            //
            // Try to report property change, if unsuccessful, kill the running vertex host process
            //
            if (!ReplyDispatcher.SetGetPropsComplete(r.replyUri, systemProcess, dryadProcessId, infoLocal, propertyLabels, propertyVersions))
            {
                try
                {
                    systemProcess.Kill();
                }
                catch (InvalidOperationException /* unused ioe */)
                {
                    // The process has already exited
                    // -or-
                    // There is no process associated with this Process object.
                }
                catch (Exception eInner)
                {
                    //
                    // all other exceptions
                    //
                    DryadLogger.LogError(0, eInner, "Exception calling back to '{0}'", r.replyUri);
                }
            }

            //
            // If a property was handled from the graph manager, decrement the waiter count
            //
            if (ReplyDispatcher.IsGraphMrgUri(r.replyUri))
            {
                int n = Interlocked.Decrement(ref propertyWaiters);
                DryadLogger.LogInformation("SetGetProp Thread", "Process {0} propertyWaiters = {1}", DryadId, n);
            }

            lock (syncRoot)
            {
                //
                // If vertex process has exited, and sending vertex completed event, we can stop worrying
                //
                if (!finalStatusMessageSent)
                {
                    if (latestVertexStatusSent != null)
                    {
                        if (!String.IsNullOrEmpty(latestVertexStatusSent.propertyString))
                        {
                            if (latestVertexStatusSent.propertyString.Contains(string.Format(@"(0x{0:x8})", Constants.DrError_VertexCompleted)))
                            {
                                finalStatusMessageSent = true;
                            }
                        }
                    }
                }
            }
            DryadLogger.LogMethodExit();
        }
예제 #15
0
        /// <summary>
        /// Asynchronously called on start command
        /// </summary>
        /// <param name="obj"></param>
        void StartProcessThreadProc(Object obj)
        {
            ManualResetEvent serviceInitializedEvent = obj as ManualResetEvent;
            bool             started = false;

            try
            {
                //
                // Wait for service initialization
                //
                serviceInitializedEvent.WaitOne();

                if (ExecutionHelper.InitializeForProcessExecution(dryadProcessId, Environment.GetEnvironmentVariable("XC_RESOURCEFILES")))
                {
                    //
                    // Vertex working directory configured successfully, start the vertex host
                    //
                    environment.Add(Constants.vertexSvcLocalAddrEnvVar, localAddress);

                    ProcessStartInfo startInfo = new ProcessStartInfo();
                    startInfo.CreateNoWindow   = true;
                    startInfo.UseShellExecute  = false;
                    startInfo.WorkingDirectory = ProcessPathHelper.ProcessPath(dryadProcessId);

                    //YARN Debugging
                    //var procEnvVarKeys = startInfo.EnvironmentVariables.Keys;
                    //foreach (string key in procEnvVarKeys)
                    //{
                    //    DryadLogger.LogInformation("StartProcess", "key: '{0}' value: '{1}'", key, startInfo.EnvironmentVariables[key]);
                    //}

                    string[] args = commandLine.Split(' ');
                    string   arg  = "";
                    for (int i = 1; i < args.Length; i++)
                    {
                        arg += args[i] + " ";
                    }

                    //
                    // Use either FQ path or path relative to job path
                    //
                    if (Path.IsPathRooted(args[0]))
                    {
                        startInfo.FileName = args[0];
                    }
                    else
                    {
                        startInfo.FileName = Path.Combine(ProcessPathHelper.JobPath, args[0]);
                    }
                    DryadLogger.LogInformation("StartProcess", "FileName: '{0}'", startInfo.FileName);

                    //
                    // Add environment variable to vertex host process
                    //
                    startInfo.Arguments = arg;
                    foreach (DictionaryEntry entry in environment)
                    {
                        string key = entry.Key.ToString();

                        if (key == null || startInfo.EnvironmentVariables.ContainsKey(key))
                        {
                            DryadLogger.LogInformation("StartProcess", "Attempting to add existing key '{0}' with value '{1}'",
                                                       entry.Key, entry.Value);
                        }
                        else
                        {
                            startInfo.EnvironmentVariables.Add(key, entry.Value.ToString());
                        }
                    }

                    lock (syncRoot)
                    {
                        //
                        // After taking lock, start the vertex host process and set up exited event handler
                        //
                        if (cancelled)
                        {
                            // If we've already been canceled, don't start the process
                            DryadLogger.LogInformation("Process start", "Not starting process {0} due to receipt of cancellation", DryadId);
                            return;
                        }
                        else
                        {
                            systemProcess                     = new Process();
                            systemProcess.StartInfo           = startInfo;
                            systemProcess.EnableRaisingEvents = true;
                            systemProcess.Exited             += new EventHandler(Process_Exited);
                            Console.WriteLine("Process start - Vertex host process starting");
                            started = systemProcess.Start();
                            Console.WriteLine("Process start - Vertex host process started");
                            if (started)
                            {
                                DryadLogger.LogInformation("Process start", "Vertex host process started");
                                state = ProcessState.Running;
                            }
                            else
                            {
                                DryadLogger.LogError(0, null, "Vertex host process failed to start");
                            }
                        }
                    }
                }
                else
                {
                    DryadLogger.LogError(0, null, "Initialization failed");
                }
            }
            catch (Exception e)
            {
                DryadLogger.LogError(0, e, "Error starting vertex");
            }

            if (started)
            {
                //
                // Notify Graph Manager that process started if successful
                //
                bool success = ReplyDispatcher.FireStateChange(this.graphManagerReplyUri, this.dryadProcessId, ProcessState.Running);
                if (!success)
                {
                    //
                    // Graph manager doesn't know we started and we have no way to tell it, so it's
                    // best to just fail the vertex service task and let the job manager inform the graph manager
                    //
                    VertexService.Surrender(new Exception("Unable to communicate with graph manager."));
                }
            }
            else
            {
                //
                // Otherwise, notify GM that process has failed
                //
                lock (syncRoot)
                {
                    // If we've already been canceled, we don't need to change state or record the initialization failure
                    if (!cancelled)
                    {
                        state       = ProcessState.Completed;
                        this.failed = true;
                        exitCode    = unchecked ((int)Constants.DrError_VertexInitialization); // DryadError_VertexInitialization
                    }
                }

                if (failed)  // This also means we weren't canceled
                {
                    // Notify the Graph Manager that the process failed to start
                    Process_Exited(this, null);
                }
            }

            //
            // Make sure process start event is set
            //
            processStartEvent.Set();
        }
예제 #16
0
        /// <summary>
        /// Adds specified property to property wait list and waits for it.
        /// </summary>
        /// <param name="blockOnLabel">Property label to wait for</param>
        /// <param name="blockOnVersion">Version of property to wait for</param>
        /// <param name="maxBlockTime">Time to wait for property</param>
        /// <returns>False if property was requested but none was returned</returns>
        private bool BlockOnProperty(string blockOnLabel, ulong blockOnVersion, long maxBlockTime)
        {
            DryadLogger.LogMethodEntry();

            //
            // Return true if no label is provided
            //
            if (String.IsNullOrEmpty(blockOnLabel))
            {
                DryadLogger.LogMethodExit(true);
                return(true);
            }

            DryadLogger.LogInformation("Block on property", "Label {0} Version {1} maxBlockTime {2}", blockOnLabel, blockOnVersion, maxBlockTime);

            ProcessPropertyInfo prop = null;

            //
            // If the process already exited, don't bother adding a wait event for
            // this property - if it's not already set it never will be.
            //

            lock (syncRoot)
            {
                if (!exited)
                {
                    //
                    // Add this label and version to the wait events list if needed
                    //
                    if (propertyWaitEvents.ContainsKey(blockOnLabel) == false)
                    {
                        propertyWaitEvents.Add(blockOnLabel, new Dictionary <ulong, ManualResetEvent>());
                    }

                    if (propertyWaitEvents[blockOnLabel].ContainsKey(blockOnVersion) == false)
                    {
                        propertyWaitEvents[blockOnLabel].Add(blockOnVersion, new ManualResetEvent(false));
                    }
                }
                else
                {
                    DryadLogger.LogInformation("Block on property", "Process {0} already exited, not adding waiter", this.DryadId);
                }
            }

            // todo: We still may want to implement timeouts to deal with deadlocks in the service / host but it hasn't been an issue yet.
            //if (propertyWaitEvents[blockOnLabel][blockOnVersion].WaitOne(new TimeSpan(maxBlockTime), false))

            //
            // Wait forever (or until process exits or is disposed) for the property to be set or interrupted
            //

            while (!exited)
            {
                try
                {
                    if (propertyWaitEvents[blockOnLabel][blockOnVersion].WaitOne(100, false))
                    {
                        break;
                    }
                }
                catch (ObjectDisposedException)
                {
                    DryadLogger.LogWarning("Block on property", "Process {0} disposed while waiting for label {1}, version {2}", DryadId, blockOnLabel, blockOnVersion);
                    DryadLogger.LogMethodExit(false);
                    return(false);
                }
            }

            // Did we get the property, or did the process
            // terminate?
            int index;

            if (TryGetProperty(blockOnLabel, out prop, out index))
            {
                //
                // If a property was successfully returned, return true
                //
                if ((blockOnVersion == 0) || (prop.propertyVersion > blockOnVersion))
                {
                    DryadLogger.LogMethodExit(true);
                    return(true);
                }

                if (state == ProcessState.Completed)
                {
                    DryadLogger.LogInformation("Block on property", "Vertex completed (wait) requested version:{0} returned version:{1} of label {2}", blockOnVersion, prop.propertyVersion, blockOnLabel);
                    DryadLogger.LogMethodExit(true);
                    return(true);
                }
            }

            //
            // Return false if property was requested but none was found
            //
            DryadLogger.LogMethodExit(false);
            return(false);
        }