예제 #1
0
        public void TransitionToRunning(object state)
        {
            DryadLogger.LogDebug("Change State", "Transitioning to Running with current state {0} for process {1}", this.m_currentState.ToString(), this.m_id);

            try
            {
                // In rare cases (such as a cancelled duplicate), the GM may close the handle to the process while it is transitioning to running.
                // This results in Dispose being called on this process, which closes the m_assignedToNode handle.
                // In this case, we want to catch the exception and log it, but do nothing else, since the GM is done with this process.
                if (m_assignedToNodeEvent.WaitOne(new TimeSpan(0, 0, 10), false))
                {
                    DryadLogger.LogDebug("Change State", "Successfully waited for transition to {0} for process {1}", this.m_currentState.ToString(), this.m_id);
                }
                else
                {
                    DryadLogger.LogWarning("Change State", "Timed out waiting for transition to AssignedToNode for process {0}", this.m_id);
                    // We want to fire the state change anyway or else we'll get a zombie process.
                    // The GM will handle the transition, it just may cause a delay.
                }
                ChangeState(ProcessState.Running);
            }
            catch (ObjectDisposedException ex)
            {
                DryadLogger.LogError(0, ex, "Process handle was closed while waiting for transition to assigned to node");
            }
        }
예제 #2
0
        public void SetGetPropsComplete(ProcessInfo info, string[] propertyLabels, ulong[] propertyVersions)
        {
            lock (SyncRoot)
            {
                // For the Set part
                if (propertyLabels != null && propertyVersions != null)
                {
                    for (int i = 0; i < propertyLabels.Length; i++)
                    {
                        if (m_propertyListeners.ContainsKey(propertyLabels[i]))
                        {
                            List <ulong> versionsToRemove = new List <ulong>();
                            foreach (KeyValuePair <ulong, GetSetPropertyEventHandler> entry in m_propertyListeners[propertyLabels[i]])
                            {
                                if (entry.Key <= propertyVersions[i] || entry.Key == ulong.MaxValue)
                                {
                                    DryadLogger.LogDebug("SetGetProsComplete", "Set complete - m_id: {0} state: {1}, label: {2}", m_id, info.processState, propertyLabels[i]);
                                    XComputeProcessGetSetPropertyEventArgs e = new XComputeProcessGetSetPropertyEventArgs(m_id, info, propertyVersions);
                                    entry.Value(this, e);

                                    versionsToRemove.Add(entry.Key);
                                }
                            }
                            foreach (ulong version in versionsToRemove)
                            {
                                m_propertyListeners[propertyLabels[i]].Remove(version);
                            }
                        }
                    }
                }

                // For the Get part
                if (info != null && info.propertyInfos != null)
                {
                    foreach (ProcessPropertyInfo propInfo in info.propertyInfos)
                    {
                        if (m_propertyListeners.ContainsKey(propInfo.propertyLabel))
                        {
                            List <ulong> versionsToRemove = new List <ulong>();
                            foreach (KeyValuePair <ulong, GetSetPropertyEventHandler> entry in m_propertyListeners[propInfo.propertyLabel])
                            {
                                if (entry.Key <= propInfo.propertyVersion || entry.Key == ulong.MaxValue)
                                {
                                    DryadLogger.LogDebug("SetGetProsComplete", "Get complete - m_id: {0} state: {1}, label: {2}", m_id, info.processState, propInfo.propertyLabel);

                                    XComputeProcessGetSetPropertyEventArgs e = new XComputeProcessGetSetPropertyEventArgs(m_id, info, propertyVersions);
                                    entry.Value(this, e);

                                    versionsToRemove.Add(entry.Key);
                                }
                            }
                            foreach (ulong version in versionsToRemove)
                            {
                                m_propertyListeners[propInfo.propertyLabel].Remove(version);
                            }
                        }
                    }
                }
            }
        }
예제 #3
0
        bool ISchedulerHelper.WaitForTasksReady()
        {
            // The basic strategy is to wait for the maximum number of vertex tasks which is
            // practical. Start by waiting for AllocatedNodes.Count.  As tasks fail or are cancelled,
            // decrement the number of tasks to wait for until we drop below Min at which time the
            // scheduler will end the job. Also, if tasks are rerun, increment the number of tasks to wait for.
            do
            {
                // Event set by the Task Monitor Thread when it finishes processes a batch of changes.
                m_taskChangeEvt.WaitOne();

                // Don't want OnVertexChangeHandler updating these counts while we're checking them
                lock (this)
                {
                    DryadLogger.LogInformation("Wait for vertex tasks",
                                               "{0} tasks are running, waiting for at least {1} before starting",
                                               m_runningTasks, m_startNodes);
                    if (m_runningTasks >= m_startNodes)
                    {
                        // We have enough running tasks to start
                        DryadLogger.LogDebug("Wait for vertex tasks",
                                             "Sufficient number of tasks transitioned to running to begin: {0} running tasks",
                                             m_runningTasks);
                        return(true);
                    }
                }
            } while (true);
        }
예제 #4
0
        public SchedulingResult EndScheduleProcess(IAsyncResult asyncResult)
        {
            // We don't want to retry the async end operation - if it fails retry
            // the whole scheduling operation

            try
            {
                if (!Faulted)
                {
                    if (this.m_client.EndScheduleProcess(asyncResult))
                    {
                        return(SchedulingResult.Success);
                    }
                    else
                    {
                        return(SchedulingResult.Failure);
                    }
                }
                else
                {
                    return(SchedulingResult.Failure);
                }
            }
            catch (FaultException <VertexServiceError> vse)
            {
                DryadLogger.LogWarning("Schedule Process", "Error completing schedule process {0} on node {1}: {2}", this.m_currentProcess.Id, this.m_nodeName, vse.Reason);
                return(SchedulingResult.Failure);
            }
            catch (TimeoutException te)
            {
                DryadLogger.LogWarning("Schedule Process", "Timeout communicating with vertex service for process {0} on node {1}: {2}", this.m_currentProcess.Id, this.m_nodeName, te.ToString());
            }
            catch (CommunicationException ce)
            {
                DryadLogger.LogWarning("Schedule Process", "Error communicating with vertex service for process {0} on node {1}: {2}", this.m_currentProcess.Id, this.m_nodeName, ce.ToString());
            }
            catch (Exception e)
            {
                DryadLogger.LogError(0, e, "Error calling EndScheduleProcess for process {0} on node {0}", this.m_currentProcess.Id, m_nodeName);
                return(SchedulingResult.Failure);
            }

            // If we make it here, then we need to retry the scheduling operation
            if (SafeOpenConnection())
            {
                // ScheduleProcess manages the retry count and returns false if it is exceeded
                DryadLogger.LogDebug("Schedule Process", "Communication error: retrying process {0} on node {1}", this.m_currentProcess.Id, this.m_nodeName);
                if (ScheduleProcess(m_currentReplyUri, m_currentProcess, m_currentAsyncCallback))
                {
                    return(SchedulingResult.Pending);
                }
            }

            // SafeOpenConnection failed or retry count exceeded - fault the dispatcher.
            DryadLogger.LogWarning("Schedule Process", "Connection failed to node {0}", this.m_nodeName);
            return(SchedulingResult.CommunicationError);
        }
예제 #5
0
        private void ShowProgress(string message, bool finished)
        {
            Int32 nPercent = 0;
            // Progress is incremented as active vertices complete, when they're all done
            // the GM still has to seal the output stream, which may take a nontrivial amount
            // of time, so scale to 99% until the final progress update.
            double scalingFactor = finished ? 100.0 : 99.0;

            try
            {
                nPercent = Convert.ToInt32(Convert.ToDouble(m_progressStepsCompleted) / Convert.ToDouble(m_totalProgressSteps) * scalingFactor);
                DryadLogger.LogDebug("Set Job Progress", "{0} percent complete", nPercent);
            }
            catch (OverflowException e)
            {
                DryadLogger.LogWarning("Set Job Progress", "OverflowException calculating percent complete: {0}", e.ToString());
                nPercent = 100;
            }

            if (nPercent > 100)
            {
                DryadLogger.LogWarning("Set Job Progress", "Percent complete greater than 100: {0} / {1} steps reported complete", m_progressStepsCompleted, m_totalProgressSteps);
                nPercent = 100;
            }

            try
            {
                if (message == null)
                {
                    message = String.Empty;
                }
                else if (message.Length > 80)
                {
                    // Job progress messages have max length of 80
                    message = message.Substring(0, 80);
                }
                m_schedulerHelper.SetJobProgress(nPercent, message);
            }
            catch (Exception e)
            {
                DryadLogger.LogWarning("Set Job Progress", "Failed to set job progress: {0}", e.ToString());
            }
        }
예제 #6
0
        private VertexTaskState YarnTaskStateToVertexTaskState(YarnTaskState ts)
        {
            VertexTaskState vts = VertexTaskState.NA;

            if (ts == YarnTaskState.NA)
            {
                vts = VertexTaskState.NA;
            }
            else if (ts < YarnTaskState.Running)
            {
                vts = VertexTaskState.Waiting;
            }
            else if (ts == YarnTaskState.Running)
            {
                vts = VertexTaskState.Running;
            }
            else
            {
                switch (ts)
                {
                case YarnTaskState.Completed:
                    vts = VertexTaskState.Finished;
                    break;

                case YarnTaskState.Failed:
                    vts = VertexTaskState.Failed;
                    break;
                    //case TaskState.Canceled:
                    //case TaskState.Canceling:
                    //    vts = VertexTaskState.Canceled;
                    //    break;
                }
            }
            DryadLogger.LogDebug("Task State", "Mapped ts: {0} to vts: {1}", ts, vts);
            return(vts);
        }
예제 #7
0
        public void ChangeState(ProcessState newState)
        {
            lock (SyncRoot)
            {
                if (newState > m_currentState)
                {
                    DryadLogger.LogDebug("Change State", "Transition process {0} from state {1} to state {2}", m_id, m_currentState, newState);

                    m_currentState = newState;
                    List <ProcessState> listenersToRemove = new List <ProcessState>();
                    List <ProcessState> waitersToRemove   = new List <ProcessState>();

                    // Check for listeners / waiters for earlier states, in case a state is skipped (e.g. process failed to start)
                    foreach (ProcessState s in m_stateChangeListeners.Keys)
                    {
                        if (s <= m_currentState)
                        {
                            // Notify listeners
                            if (m_stateChangeListeners[s] != null)
                            {
                                XComputeProcessStateChangeEventArgs e = new XComputeProcessStateChangeEventArgs(m_id, m_currentState, false);
                                m_stateChangeListeners[s](this, e);
                                if (m_stateChangeTimers.ContainsKey(m_stateChangeListeners[s]))
                                {
                                    m_stateChangeTimers[m_stateChangeListeners[s]].Dispose();
                                    m_stateChangeTimers.Remove(m_stateChangeListeners[s]);
                                }
                            }
                            listenersToRemove.Add(s);
                        }
                    }
                    foreach (ProcessState s in listenersToRemove)
                    {
                        m_stateChangeListeners.Remove(s);
                    }

                    foreach (ProcessState s in m_stateChangeWaiters.Keys)
                    {
                        // Signal waiters
                        if (s <= m_currentState)
                        {
                            foreach (ManualResetEvent w in m_stateChangeWaiters[s])
                            {
                                w.Set();
                            }
                            waitersToRemove.Add(s);
                        }
                    }
                    foreach (ProcessState s in waitersToRemove)
                    {
                        foreach (ManualResetEvent e in m_stateChangeWaiters[s])
                        {
                            try
                            {
                                e.Close();
                            }
                            catch (Exception ex)
                            {
                                DryadLogger.LogError(0, ex);
                            }
                        }
                        m_stateChangeWaiters.Remove(s);
                    }

                    if (m_currentState == ProcessState.AssignedToNode)
                    {
                        m_assignedToNodeEvent.Set();
                    }
                }
                else
                {
                    DryadLogger.LogWarning("Change State", "Unexpected state change attempted for process {0}: from {1} to {2}", this.m_id, this.m_currentState.ToString(), newState.ToString());
                }
            }
        }
예제 #8
0
        /// <summary>
        /// Copy the resources from staging dir to working dir
        /// </summary>
        /// <param name="resources">list of resources supplied by dryadlinq</param>
        /// <returns>success = true</returns>
        private static bool CopyStagedJobResources(string resources)
        {
            if (resources != null)
            {
                if (resources[0] == '@')
                {
                    resources = File.ReadAllText(resources.Substring(1));
                }

                if (resources.EndsWith(","))
                {
                    resources = resources.Substring(0, resources.Length - 1);
                }
                string[] files = resources.Split(',');
                DryadLogger.LogInformation("CopyStagedJobResources", string.Format("Will copy {0} resource files.", files.Length));

                if (files.Length > 1)
                {
                    string source = files[0];
                    for (int i = 1; i < files.Length; i++)
                    {
                        string jobFilePath = Path.Combine(ProcessPathHelper.JobPath, files[i]);

                        //
                        // File may already exist due to local resource copying
                        //
                        if (File.Exists(jobFilePath) == false)
                        {
                            //
                            // If file doesn't exist today, get it from staging location
                            //
                            if (source.StartsWith("hdfs://", StringComparison.InvariantCultureIgnoreCase))
                            {
                                // copy from HDFS
                                DryadLogger.LogDebug("CopyStagedJobResources", string.Format(
                                                         "[ExecutionHelper.CopyJobResources] Copying '{0}' to '{1}' from HDFS dir {2}",
                                                         files[i], jobFilePath, source));
                                GetHdfsFile(source, files[i], jobFilePath);
                            }
                            else
                            {
                                string sourceFile = Path.Combine(source, files[i]);
                                try
                                {
                                    DryadLogger.LogDebug("CopyStagedJobResources", string.Format(
                                                             "[ExecutionHelper.CopyJobResources] Copying '{0}' to '{1}'",
                                                             sourceFile, jobFilePath));
                                    File.Copy(sourceFile, jobFilePath);
                                }
                                catch (Exception e)
                                {
                                    DryadLogger.LogInformation("CopyStagedJobResources", string.Format(
                                                                   "[ExecutionHelper.CopyJobResources] Exception copying '{0}' to '{1}': {2}",
                                                                   sourceFile, jobFilePath, e.Message));
                                    return(false);
                                }
                            }
                        }
                    }
                }
                else
                {
                    Console.Error.WriteLine("[ExecutionHelper.CopyJobResources] invalid XC_RESOURCEFILES length = {0}", files.Length);
                    return(false);
                }
            }
            else
            {
                Console.Error.WriteLine("[ExecutionHelper.CopyJobResources] resources = null");
                return(false);
            }
            return(true);
        }