private void Dispose(bool disposing) { if (!m_disposed) { if (disposing) { DryadLogger.LogInformation("Dispose Process", "Releasing resources for process id {0}", this.m_id); this.m_assignedToNodeEvent.Close(); foreach (KeyValuePair <ProcessState, List <ManualResetEvent> > kvp in m_stateChangeWaiters) { foreach (ManualResetEvent e in kvp.Value) { try { e.Close(); } catch (Exception ex) { DryadLogger.LogError(0, ex); } } } } m_disposed = true; } }
public YarnSchedulerHelper() { // init the DryadLogger, just to make sure DryadLogger.Start("xcompute.log"); m_taskUpdateQueue = new BlockingCollection <VertexTask>(); // if we are not running in a vertex, then init the GM string jmString = Environment.GetEnvironmentVariable(Constants.jobManager); if (String.IsNullOrEmpty(jmString)) { m_minNodes = int.Parse(Environment.GetEnvironmentVariable("MINIMUM_COMPUTE_NODES")); m_maxNodes = int.Parse(Environment.GetEnvironmentVariable("MAXIMUM_COMPUTE_NODES")); m_startNodes = m_minNodes; m_vertices = new VertexTask[JobMaxNodes + 2]; DryadLogger.LogInformation("YarnSchedulerHelper()", "Initializing JAVA GM"); DryadLogger.LogInformation("YarnSchedulerHelper()", "m_maxNodes: {0}", m_maxNodes); AMInstance.RegisterGMCallback(new UpdateProcessState(QueueYarnUpdate)); ((ISchedulerHelper)this).OnVertexChange += new VertexChangeEventHandler(OnVertexChangeHandler); m_appMaster = new AMInstance(); } else { m_vertices = new VertexTask[JobMaxNodes + 2]; DryadLogger.LogInformation("YarnSchedulerHelper()", "Not initializing JAVA GM"); } }
public void ProcessYarnUpdate(VertexTask v) { DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} on node {1} is in state {2}", v.Id, v.Node, v.State); VertexChangeEventArgs e = new VertexChangeEventArgs(v.Id); e.NewNode = v.Node; e.NewState = YarnTaskStateToVertexTaskState(v.State); e.NewRequeueCount = v.RequeueCount; if (m_vertices[v.Id] != null) { e.OldNode = m_vertices[v.Id].Node; e.OldState = YarnTaskStateToVertexTaskState(m_vertices[v.Id].State); e.OldRequeueCount = m_vertices[v.Id].RequeueCount; } if (e.NewRequeueCount != e.OldRequeueCount) { DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} requeue count changed from {1} to {2}", v.Id, e.OldRequeueCount, e.NewRequeueCount); } // Update current vertex state m_vertices[v.Id] = v; m_vertexChangeEvent(this, e); //m_taskChangeEvt.Set(); }
bool ISchedulerHelper.WaitForTasksReady() { // The basic strategy is to wait for the maximum number of vertex tasks which is // practical. Start by waiting for AllocatedNodes.Count. As tasks fail or are cancelled, // decrement the number of tasks to wait for until we drop below Min at which time the // scheduler will end the job. Also, if tasks are rerun, increment the number of tasks to wait for. do { // Event set by the Task Monitor Thread when it finishes processes a batch of changes. m_taskChangeEvt.WaitOne(); // Don't want OnVertexChangeHandler updating these counts while we're checking them lock (this) { DryadLogger.LogInformation("Wait for vertex tasks", "{0} tasks are running, waiting for at least {1} before starting", m_runningTasks, m_startNodes); if (m_runningTasks >= m_startNodes) { // We have enough running tasks to start DryadLogger.LogDebug("Wait for vertex tasks", "Sufficient number of tasks transitioned to running to begin: {0} running tasks", m_runningTasks); return(true); } } } while (true); }
private void TaskMonitorThread() { TimeSpan pollInterval = TimeSpan.FromSeconds(1); TimeSpan maxPollInterval = TimeSpan.FromSeconds(16); // The main loop. Each iteration polls for task changes. while (true) { bool foundUpdate = false; DateTime loopStartTime = DateTime.Now; // // Process change results from blocking queue // do { VertexTask v = null; if (m_taskUpdateQueue.TryTake(out v, pollInterval)) { foundUpdate = true; ProcessYarnUpdate(v); } } while ((DateTime.Now - loopStartTime) < pollInterval); if (foundUpdate) { // Notify WaitForTasksReady once for each polling cycle // so that it gets all the changes in one batch m_taskChangeEvt.Set(); } // Check to see if we've been told to stop. // Timeout after pollInterval. // TODO: For better shutdown perf, we may want to check this at other places // or just kill the thread - but this provides a more graceful exit. if (m_threadStopEvt.WaitOne(pollInterval, true)) { m_taskMonitorThreadRunning = false; DryadLogger.LogInformation("Task Monitoring Thread", "Received shutdown event"); return; } // Double the polling interval each iteration up to maxPollInterval if (pollInterval < maxPollInterval) { double newSeconds = 2 * pollInterval.TotalSeconds; if (newSeconds < maxPollInterval.TotalSeconds) { pollInterval = TimeSpan.FromSeconds(newSeconds); } else { pollInterval = maxPollInterval; } } } }
/// <summary> /// Set process state to cancelled and stop the vertex host process if possible /// </summary> public void Cancel(bool suppressNotifications) { DryadLogger.LogMethodEntry(this.DryadId); lock (syncRoot) { if (state == ProcessState.Completed) { // Process has already completed before cancelation made it here, do nothing DryadLogger.LogInformation("Cancel process", "Process {0} has already exited", DryadId); DryadLogger.LogMethodExit(); return; } DryadLogger.LogInformation("Cancel process", "Process {0} has not already exited", DryadId); state = ProcessState.Completed; this.cancelled = true; } // If the process started, kill it if (systemProcess != null) { try { // Killing the process will trigger Process_Exited DryadLogger.LogInformation("Cancel process", "Killing system process for process id {0}", DryadId); if (suppressNotifications) { // Remove the Exited event handler systemProcess.Exited -= this.Process_Exited; } systemProcess.Kill(); DryadLogger.LogMethodExit(); return; } catch (Exception e) { // // Failed to kill process - log exception // DryadLogger.LogError(0, e, "Failed to kill system process for process id {0}", DryadId); } } else { DryadLogger.LogInformation("Cancel process", "Process {0} has not started yet", DryadId); } // Process was either not running or failed to die, trigger Process_Exited ourself if (!suppressNotifications) { Process_Exited(this, null); } DryadLogger.LogMethodExit(); }
public void QueueYarnUpdate(int taskId, int taskState, string nodeName) { DryadLogger.LogInformation("QueueYarnUpdate", "Task {0} on node {2} is in state {3}", taskId, nodeName, taskState); // Set change event arguments YarnTaskState yTaskState = (YarnTaskState)taskState; VertexTask v = new VertexTask(taskId, nodeName, yTaskState, int.MaxValue, DateTime.UtcNow); m_taskUpdateQueue.Add(v); }
/// <summary> /// Vertex host process exited event - marks process state and queues up exit process thread /// </summary> /// <param name="sender"></param> /// <param name="args"></param> private void Process_Exited(object sender, EventArgs args) { DryadLogger.LogMethodEntry(DryadId); // Ensure the process exited code can only be executed once lock (syncRoot) { if (exited) { DryadLogger.LogInformation("Process exit", "Process {0} already exited", DryadId); DryadLogger.LogMethodExit(); return; } exited = true; } if (cancelled) { DryadLogger.LogInformation("Process exit", "Process {0} was cancelled", DryadId); exitCode = unchecked ((int)0x830A0003); // DrError_VertexReceivedTermination } else { exitCode = systemProcess.ExitCode; DryadLogger.LogInformation("Process exit", "Process {0} exit code {1}", DryadId, exitCode); if (exitCode == 0) { lock (syncRoot) { state = ProcessState.Completed; } } else { lock (syncRoot) { state = ProcessState.Completed; this.failed = true; } } } // // Ensure that the vertex complete event is sent to GM and that all pending properties are handled // ThreadPool.QueueUserWorkItem(new WaitCallback(ExitProcessThreadProc)); DryadLogger.LogMethodExit(); }
public void Cancel() { bool wasRunning = false; lock (SyncRoot) { // If the process has already been assigned to a node, then we will need to cancel it at the node if (this.CurrentState < ProcessState.AssignedToNode) { this.m_cancelled = true; this.ExitCode = 0x830A0003; // DrError_VertexReceivedTermination DryadLogger.LogInformation("Cancel process", "Cancelation received for vertex {0}.{1} before it was assigned to a node", m_graphManagerId, m_graphManagerVersion); wasRunning = false; } else if (this.CurrentState == ProcessState.Completed) { // nothing to do for this case, process already completed DryadLogger.LogInformation("Cancel process", "Cancellation received for vertex {0}.{1} after it completed", m_graphManagerId, m_graphManagerVersion); return; } else if (Dispatcher != null) { DryadLogger.LogInformation("Cancel process", "Cancellation received for vertex {0}.{1} after it was assigned to node {2}", m_graphManagerId, m_graphManagerVersion, Dispatcher.NodeName); wasRunning = true; } else { // This is an unexpected condition DryadLogger.LogError(0, null, "Cancellation received for vertex {0}.{1} in state {2} with no dispatcher", m_graphManagerId, m_graphManagerVersion, CurrentState.ToString()); return; } if (wasRunning) { if (Dispatcher != null) { Dispatcher.CancelScheduleProcess(m_id); } } else { ChangeState(ProcessState.Completed); } } }
/// <summary> /// Initialization thread - initialize job working directory if needed. /// </summary> /// <param name="state"></param> void InitializationThreadProc(Object state) { try { if (Environment.GetEnvironmentVariable(Constants.schedulerTypeEnvVar) == Constants.schedulerTypeLocal) { initializedEvent.Set(); } else if (ExecutionHelper.InitializeForJobExecution(Environment.GetEnvironmentVariable("XC_RESOURCEFILES"))) { DryadLogger.LogInformation("InitializationThreadProc", "InitializeForJobExecution was successful."); initializedEvent.Set(); } else { Surrender(new Exception("Failed to initialize vertex service for job execution")); } } catch (Exception ex) { Surrender(ex); } }
public bool Start(string listenUri, ISchedulerHelper schedulerHelper) { DryadLogger.LogMethodEntry(listenUri); Uri baseAddress = new Uri(listenUri); try { NetTcpBinding binding = schedulerHelper.GetVertexServiceBinding(); selfHost = null; // Retry opening the service port if address is already in use int maxRetryCount = 20; // Results in retrying for ~1 min for (int retryCount = 0; retryCount < maxRetryCount; retryCount++) { try { //Step 1 of the hosting procedure: Create ServiceHost selfHost = new ServiceHost(callbackService, baseAddress); //Step 2 of the hosting procedure: Add service endpoints. ServiceEndpoint vertexEndpoint = selfHost.AddServiceEndpoint(typeof(IDryadVertexCallback), binding, Constants.vertexCallbackServiceName); ServiceThrottlingBehavior stb = new ServiceThrottlingBehavior(); stb.MaxConcurrentCalls = Constants.MaxConnections; stb.MaxConcurrentSessions = Constants.MaxConnections; selfHost.Description.Behaviors.Add(stb); //Step 3 of hosting procedure : Add a security manager selfHost.Authorization.ServiceAuthorizationManager = new DryadVertexServiceAuthorizationManager(); // Step 4 of the hosting procedure: Start the service. selfHost.Open(); break; } catch (AddressAlreadyInUseException) { if (selfHost != null) { selfHost.Abort(); selfHost = null; } // If this is the last try, dont sleep. Just rethrow exception to exit. if (retryCount < maxRetryCount - 1) { DryadLogger.LogInformation("Start Vertex Callback Service", "Address already in use. Retrying..."); System.Threading.Thread.Sleep(3000); } else { throw; } } } DryadLogger.LogInformation("Start Vertex Callback Service", "Service Host started successfully"); return(true); } catch (CommunicationException ce) { DryadLogger.LogCritical(0, ce, "Failed to start vertex callback service"); try { if (selfHost != null) { selfHost.Abort(); } } catch { } return(false); } }
/// <summary> /// The main entry point for the application. /// </summary> private static int Main(string[] args) { // // Try to create working directory. Fail vertex service if unable to do so. // bool createdJobDir = false; int retryCount = 0; do { try { ProcessPathHelper.CreateUserWorkingDirectory(); Directory.CreateDirectory(ProcessPathHelper.JobPath); createdJobDir = true; } catch (Exception ex) { Console.Error.WriteLine("Failed to create working directory, {0}. Error: {1}.", ProcessPathHelper.JobPath, ex.ToString()); retryCount++; } } while (retryCount < numRetries && !createdJobDir); if (!createdJobDir) { Console.Error.WriteLine("Vertex service cannot proceed because working directory could not be created."); return(1); } // // Get Task ID from environment // int taskId; if (Int32.TryParse(Environment.GetEnvironmentVariable("CCP_TASKID"), out taskId) == false) { Console.Error.WriteLine("Program.Main", "Failed to read CCP_TASKID from environment"); return(1); } // // Initialize tracing subsystem // string traceFile = Path.Combine(ProcessPathHelper.JobPath, String.Format("VertexServiceTrace_{0}.txt", taskId)); DryadLogger.Start(traceFile); // // Initialize scheduler helper of the correct type // ISchedulerHelper schedulerHelper; try { schedulerHelper = SchedulerHelperFactory.GetInstance(); } catch (Exception ex) { DryadLogger.LogCritical(0, ex, "Failed to get scheduler helper"); DryadLogger.Stop(); Console.Error.WriteLine("Failed to contact HPC scheduler. See log for details."); return(1); } // // Step 1 of the address configuration procedure: Create a URI to serve as the base address. // string strAddress = schedulerHelper.GetVertexServiceBaseAddress("localhost", taskId); Uri baseAddress = new Uri(strAddress); // // Step 2 of the hosting procedure: Create ServiceHost // ServiceHost selfHost = new ServiceHost(typeof(VertexService), baseAddress); try { // // Get the service binding // NetTcpBinding binding = schedulerHelper.GetVertexServiceBinding(); // // Step 3 of the hosting procedure: Add service endpoints. // ServiceEndpoint vertexEndpoint = selfHost.AddServiceEndpoint(typeof(IDryadVertexService), binding, Constants.vertexServiceName); DryadLogger.LogInformation("Initialize vertex service", "listening on address {0}", vertexEndpoint.Address.ToString()); // // Step 4 of hosting procedure : Add a security manager // TODO: Fix this for local scheduler and / or Azure scheduler when supported // selfHost.Authorization.ServiceAuthorizationManager = new DryadVertexServiceAuthorizationManager(); // Step 5 of the hosting procedure: Start (and then stop) the service. selfHost.Open(); Console.WriteLine("Vertex Service up and waiting for commands"); // Wait for the shutdown event to be set. VertexService.shutdownEvent.WaitOne(-1, true); // Check vertex service shutdown condition if (VertexService.internalShutdown) { string errorMsg = string.Format("Vertex Service Task unable to continue after critical error in initialization or communication: {0}", VertexService.ShutdownReason.ToString()); Console.WriteLine(errorMsg); DryadLogger.LogCritical(0, new Exception(errorMsg)); DryadLogger.Stop(); try { selfHost.Abort(); } catch { } return(1); } // Close the ServiceHostBase to shutdown the service. selfHost.Close(); } catch (CommunicationException ce) { // // Report any errors and fail task // DryadLogger.LogCritical(0, ce, "A communication exception occurred"); DryadLogger.Stop(); try { selfHost.Abort(); } catch { } Console.Error.WriteLine("CommunicationException occured, aborting vertex service. See log for details."); return(1); } catch (Exception ex) { // // Report any errors and fail task // DryadLogger.LogCritical(0, ex, "An exception occurred"); DryadLogger.Stop(); try { selfHost.Abort(); } catch { } Console.Error.WriteLine("An exception occured, aborting vertex service. See log for details."); return(1); } DryadLogger.LogInformation("Vertex Service", "Shut down cleanly"); DryadLogger.Stop(); return(0); }
/// <summary> /// Copy the resources from staging dir to working dir /// </summary> /// <param name="resources">list of resources supplied by dryadlinq</param> /// <returns>success = true</returns> private static bool CopyStagedJobResources(string resources) { if (resources != null) { if (resources[0] == '@') { resources = File.ReadAllText(resources.Substring(1)); } if (resources.EndsWith(",")) { resources = resources.Substring(0, resources.Length - 1); } string[] files = resources.Split(','); DryadLogger.LogInformation("CopyStagedJobResources", string.Format("Will copy {0} resource files.", files.Length)); if (files.Length > 1) { string source = files[0]; for (int i = 1; i < files.Length; i++) { string jobFilePath = Path.Combine(ProcessPathHelper.JobPath, files[i]); // // File may already exist due to local resource copying // if (File.Exists(jobFilePath) == false) { // // If file doesn't exist today, get it from staging location // if (source.StartsWith("hdfs://", StringComparison.InvariantCultureIgnoreCase)) { // copy from HDFS DryadLogger.LogDebug("CopyStagedJobResources", string.Format( "[ExecutionHelper.CopyJobResources] Copying '{0}' to '{1}' from HDFS dir {2}", files[i], jobFilePath, source)); GetHdfsFile(source, files[i], jobFilePath); } else { string sourceFile = Path.Combine(source, files[i]); try { DryadLogger.LogDebug("CopyStagedJobResources", string.Format( "[ExecutionHelper.CopyJobResources] Copying '{0}' to '{1}'", sourceFile, jobFilePath)); File.Copy(sourceFile, jobFilePath); } catch (Exception e) { DryadLogger.LogInformation("CopyStagedJobResources", string.Format( "[ExecutionHelper.CopyJobResources] Exception copying '{0}' to '{1}': {2}", sourceFile, jobFilePath, e.Message)); return(false); } } } } } else { Console.Error.WriteLine("[ExecutionHelper.CopyJobResources] invalid XC_RESOURCEFILES length = {0}", files.Length); return(false); } } else { Console.Error.WriteLine("[ExecutionHelper.CopyJobResources] resources = null"); return(false); } return(true); }
/// <summary> /// Called in new thread in setgetproperty service operation /// </summary> /// <param name="obj"></param> void SetGetPropThreadProc(Object obj) { DryadLogger.LogMethodEntry(DryadId); PropertyRequest r = obj as PropertyRequest; ProcessInfo infoLocal = new ProcessInfo(); ulong[] propertyVersions = null; string[] propertyLabels = null; // // Make sure process is started before continuing // if (this.State < ProcessState.Running) { try { processStartEvent.WaitOne(); } catch (ObjectDisposedException ex) { // The process was cancelled and released before it started running, just return if (exited) { DryadLogger.LogInformation("SetGetProp Thread", "Process {0} cancelled or exited before starting.", this.DryadId); } else { DryadLogger.LogError(0, ex); } DryadLogger.LogMethodExit(); return; } } // // Use status_pending if running, vertex initialization failure if process is failed and process exit code otherwise // infoLocal.processStatus = 0x103; // WinNT.h STATUS_PENDING infoLocal.processState = state; if (state == ProcessState.Running) { infoLocal.exitCode = 0x103; // WinNT.h STATUS_PENDING } else if (failed) { infoLocal.exitCode = Constants.DrError_VertexError; } else if (cancelled) { infoLocal.exitCode = Constants.DrError_VertexReceivedTermination; // DryadError_VertexReceivedTermination } else { infoLocal.exitCode = (uint)systemProcess.ExitCode; } // // Record specified properties and update versions - wakes up anyone waiting for property changes // SetProperties(r.infos, out propertyLabels, out propertyVersions); // // Try to get property update // if (BlockOnProperty(r.blockOnLabel, r.blockOnVersion, r.maxBlockTime)) { // // If property update was received, update the received property information // If received property marks vertex completed, record that // if (r.getPropLabel != null && r.getPropLabel.Length > 0) { lock (syncRoot) { infoLocal.propertyInfos = new ProcessPropertyInfo[1]; int index; if (TryGetProperty(r.getPropLabel, out infoLocal.propertyInfos[0], out index) == false) { DryadLogger.LogError(0, null, "Failed to get property for label {0}", r.getPropLabel); } if (StatusMessageContainsDryadError_VertexCompleted(infoLocal.propertyInfos[0].propertyLabel)) { CopyProp(infoLocal.propertyInfos[0], out latestVertexStatusSent); } } } // // If request asks for statistics on vertex process, get them // if (r.ProcessStatistics) { if (GetStatistics(out infoLocal.processStatistics) == false) { DryadLogger.LogError(0, null, "Failed to get vertex statistics"); } } } // // Try to report property change, if unsuccessful, kill the running vertex host process // if (!ReplyDispatcher.SetGetPropsComplete(r.replyUri, systemProcess, dryadProcessId, infoLocal, propertyLabels, propertyVersions)) { try { systemProcess.Kill(); } catch (InvalidOperationException /* unused ioe */) { // The process has already exited // -or- // There is no process associated with this Process object. } catch (Exception eInner) { // // all other exceptions // DryadLogger.LogError(0, eInner, "Exception calling back to '{0}'", r.replyUri); } } // // If a property was handled from the graph manager, decrement the waiter count // if (ReplyDispatcher.IsGraphMrgUri(r.replyUri)) { int n = Interlocked.Decrement(ref propertyWaiters); DryadLogger.LogInformation("SetGetProp Thread", "Process {0} propertyWaiters = {1}", DryadId, n); } lock (syncRoot) { // // If vertex process has exited, and sending vertex completed event, we can stop worrying // if (!finalStatusMessageSent) { if (latestVertexStatusSent != null) { if (!String.IsNullOrEmpty(latestVertexStatusSent.propertyString)) { if (latestVertexStatusSent.propertyString.Contains(string.Format(@"(0x{0:x8})", Constants.DrError_VertexCompleted))) { finalStatusMessageSent = true; } } } } } DryadLogger.LogMethodExit(); }
/// <summary> /// Asynchronously called on start command /// </summary> /// <param name="obj"></param> void StartProcessThreadProc(Object obj) { ManualResetEvent serviceInitializedEvent = obj as ManualResetEvent; bool started = false; try { // // Wait for service initialization // serviceInitializedEvent.WaitOne(); if (ExecutionHelper.InitializeForProcessExecution(dryadProcessId, Environment.GetEnvironmentVariable("XC_RESOURCEFILES"))) { // // Vertex working directory configured successfully, start the vertex host // environment.Add(Constants.vertexSvcLocalAddrEnvVar, localAddress); ProcessStartInfo startInfo = new ProcessStartInfo(); startInfo.CreateNoWindow = true; startInfo.UseShellExecute = false; startInfo.WorkingDirectory = ProcessPathHelper.ProcessPath(dryadProcessId); //YARN Debugging //var procEnvVarKeys = startInfo.EnvironmentVariables.Keys; //foreach (string key in procEnvVarKeys) //{ // DryadLogger.LogInformation("StartProcess", "key: '{0}' value: '{1}'", key, startInfo.EnvironmentVariables[key]); //} string[] args = commandLine.Split(' '); string arg = ""; for (int i = 1; i < args.Length; i++) { arg += args[i] + " "; } // // Use either FQ path or path relative to job path // if (Path.IsPathRooted(args[0])) { startInfo.FileName = args[0]; } else { startInfo.FileName = Path.Combine(ProcessPathHelper.JobPath, args[0]); } DryadLogger.LogInformation("StartProcess", "FileName: '{0}'", startInfo.FileName); // // Add environment variable to vertex host process // startInfo.Arguments = arg; foreach (DictionaryEntry entry in environment) { string key = entry.Key.ToString(); if (key == null || startInfo.EnvironmentVariables.ContainsKey(key)) { DryadLogger.LogInformation("StartProcess", "Attempting to add existing key '{0}' with value '{1}'", entry.Key, entry.Value); } else { startInfo.EnvironmentVariables.Add(key, entry.Value.ToString()); } } lock (syncRoot) { // // After taking lock, start the vertex host process and set up exited event handler // if (cancelled) { // If we've already been canceled, don't start the process DryadLogger.LogInformation("Process start", "Not starting process {0} due to receipt of cancellation", DryadId); return; } else { systemProcess = new Process(); systemProcess.StartInfo = startInfo; systemProcess.EnableRaisingEvents = true; systemProcess.Exited += new EventHandler(Process_Exited); Console.WriteLine("Process start - Vertex host process starting"); started = systemProcess.Start(); Console.WriteLine("Process start - Vertex host process started"); if (started) { DryadLogger.LogInformation("Process start", "Vertex host process started"); state = ProcessState.Running; } else { DryadLogger.LogError(0, null, "Vertex host process failed to start"); } } } } else { DryadLogger.LogError(0, null, "Initialization failed"); } } catch (Exception e) { DryadLogger.LogError(0, e, "Error starting vertex"); } if (started) { // // Notify Graph Manager that process started if successful // bool success = ReplyDispatcher.FireStateChange(this.graphManagerReplyUri, this.dryadProcessId, ProcessState.Running); if (!success) { // // Graph manager doesn't know we started and we have no way to tell it, so it's // best to just fail the vertex service task and let the job manager inform the graph manager // VertexService.Surrender(new Exception("Unable to communicate with graph manager.")); } } else { // // Otherwise, notify GM that process has failed // lock (syncRoot) { // If we've already been canceled, we don't need to change state or record the initialization failure if (!cancelled) { state = ProcessState.Completed; this.failed = true; exitCode = unchecked ((int)Constants.DrError_VertexInitialization); // DryadError_VertexInitialization } } if (failed) // This also means we weren't canceled { // Notify the Graph Manager that the process failed to start Process_Exited(this, null); } } // // Make sure process start event is set // processStartEvent.Set(); }
/// <summary> /// Adds specified property to property wait list and waits for it. /// </summary> /// <param name="blockOnLabel">Property label to wait for</param> /// <param name="blockOnVersion">Version of property to wait for</param> /// <param name="maxBlockTime">Time to wait for property</param> /// <returns>False if property was requested but none was returned</returns> private bool BlockOnProperty(string blockOnLabel, ulong blockOnVersion, long maxBlockTime) { DryadLogger.LogMethodEntry(); // // Return true if no label is provided // if (String.IsNullOrEmpty(blockOnLabel)) { DryadLogger.LogMethodExit(true); return(true); } DryadLogger.LogInformation("Block on property", "Label {0} Version {1} maxBlockTime {2}", blockOnLabel, blockOnVersion, maxBlockTime); ProcessPropertyInfo prop = null; // // If the process already exited, don't bother adding a wait event for // this property - if it's not already set it never will be. // lock (syncRoot) { if (!exited) { // // Add this label and version to the wait events list if needed // if (propertyWaitEvents.ContainsKey(blockOnLabel) == false) { propertyWaitEvents.Add(blockOnLabel, new Dictionary <ulong, ManualResetEvent>()); } if (propertyWaitEvents[blockOnLabel].ContainsKey(blockOnVersion) == false) { propertyWaitEvents[blockOnLabel].Add(blockOnVersion, new ManualResetEvent(false)); } } else { DryadLogger.LogInformation("Block on property", "Process {0} already exited, not adding waiter", this.DryadId); } } // todo: We still may want to implement timeouts to deal with deadlocks in the service / host but it hasn't been an issue yet. //if (propertyWaitEvents[blockOnLabel][blockOnVersion].WaitOne(new TimeSpan(maxBlockTime), false)) // // Wait forever (or until process exits or is disposed) for the property to be set or interrupted // while (!exited) { try { if (propertyWaitEvents[blockOnLabel][blockOnVersion].WaitOne(100, false)) { break; } } catch (ObjectDisposedException) { DryadLogger.LogWarning("Block on property", "Process {0} disposed while waiting for label {1}, version {2}", DryadId, blockOnLabel, blockOnVersion); DryadLogger.LogMethodExit(false); return(false); } } // Did we get the property, or did the process // terminate? int index; if (TryGetProperty(blockOnLabel, out prop, out index)) { // // If a property was successfully returned, return true // if ((blockOnVersion == 0) || (prop.propertyVersion > blockOnVersion)) { DryadLogger.LogMethodExit(true); return(true); } if (state == ProcessState.Completed) { DryadLogger.LogInformation("Block on property", "Vertex completed (wait) requested version:{0} returned version:{1} of label {2}", blockOnVersion, prop.propertyVersion, blockOnLabel); DryadLogger.LogMethodExit(true); return(true); } } // // Return false if property was requested but none was found // DryadLogger.LogMethodExit(false); return(false); }