public void Add(ScheduleProcessRequest req) { lock (SyncRoot) { m_processRequestPool.Add(req); } }
private bool FindRequestForNode(string node, out ScheduleProcessRequest req) { req = null; ulong maxAffinity = 0; bool result = false; Dispatcher dispatcher = null; Stopwatch swTotal = new Stopwatch(); Stopwatch swSearch = new Stopwatch(); Stopwatch swBlock = new Stopwatch(); int requestCount = 0; swTotal.Start(); if (dispatcherPool.TryReserveDispatcher(node, out dispatcher)) { swBlock.Start(); lock (requestPool.SyncRoot) { swBlock.Stop(); swSearch.Start(); requestCount = requestPool.Count; if (requestCount != 0) { foreach (ScheduleProcessRequest r in requestPool) { // Skip any lingering processes which have been cancelled. if (processTable.ContainsKey(r.Id) && processTable[r.Id].Cancelled) { continue; } if (r.MustRunOnNode(node)) { req = r; DryadLogger.LogDebug("Find Request for Node", "process {0} has hard affinity constraint for node {1}", req.Id, node); break; } else if (r.CanRunOnNode(node)) { ulong thisAffinity = r.GetAffinityWeightForNode(node); if (thisAffinity == 0 && req == null) { req = r; DryadLogger.LogDebug("Find Request for Node", "Process {0} has 0 affinity constraint for node {1} but no other process has been selected yet", r.Id, node); } else if (thisAffinity > maxAffinity) { maxAffinity = thisAffinity; req = r; DryadLogger.LogDebug("Find Request for Node", "Process {0} with affinity constraint {1} for node {2} larger than previous max", r.Id, thisAffinity, node); } } } } swSearch.Stop(); if (req != null) { requestPool.Remove(req); DryadLogger.LogDebug("Find Request for Node", "Found request {0} for node {1}", req.Id, node); result = true; } else { DryadLogger.LogDebug("Find Request for Node", "Did not find any requests for node {0}", node); dispatcher.Release(); result = false; } } } swTotal.Stop(); DryadLogger.LogInformation("Find Request for Node", "Searching {0} requests. Block {1} ms. Inner search {2} ms. Total elapsed time {3} ms.", requestCount, swBlock.ElapsedMilliseconds, swSearch.ElapsedMilliseconds, swTotal.ElapsedMilliseconds); return result; }
private bool FindNodeForRequest(ScheduleProcessRequest req, out Dispatcher dispatcher) { dispatcher = null; if (req.HardAffinity != null) { if (dispatcherPool.TryReserveDispatcher(req.HardAffinity, out dispatcher)) { return true; } else { return false; } } else { // First try soft affinity in decreasing order (assumes Soft Affinity list in req is sorted descending by weight) // Keep a map of the nodes we've already tried, because Dryad adds each affinity twice // once for the node and once for the "pod" Dictionary<string, bool> attemptedNodes = new Dictionary<string, bool>(); int count = 0; for (int i = 0; i < req.AffinityCount; i++) { if (attemptedNodes.ContainsKey(req.AffinityAt(i).Node.ToUpper())) { continue; } attemptedNodes.Add(req.AffinityAt(i).Node.ToUpper(), true); count++; if (dispatcherPool.TryReserveDispatcher(req.AffinityAt(i).Node, out dispatcher)) { DryadLogger.LogDebug("Find Node For Request", "process {0} satisfied affinity constraint: node {1}, weight {2}", req.Id, req.AffinityAt(i).Node, req.AffinityAt(i).Weight); return true; } DryadLogger.LogDebug("Find Node For Request", "process {0} did not satisfy affinity constraint: node {1}, weight {2}", req.Id, req.AffinityAt(i).Node, req.AffinityAt(i).Weight); } // If we get this far and AffinityCount > 0, then we failed to satisfy the affinity constraints // log a message so we can more easily detect this situation if (count > 0) { DryadLogger.LogInformation("Find Node For Request", "process {0} failed to satisfy any of {1} affinity constraints", req.Id, count); } // Finally try any available node lock (dispatcherPool.SyncRoot) { foreach (Dispatcher d in dispatcherPool) { if (req.CanRunOnNode(d.NodeName)) { if (d.Reserve()) { dispatcher = d; return true; } } } } } return false; }
public bool ScheduleProcess(int processId, string commandLine, List<SoftAffinity> softAffinities, string hardAffinity, StringDictionary environment) { bool retVal = false; processTable[processId].SetIdAndVersion(commandLine); DryadLogger.LogInformation("Schedule process", "Internal ID {0} corresponds to vertex {1}.{2}", processId, processTable[processId].GraphManagerId, processTable[processId].GraphManagerVersion); DryadLogger.LogInformation("Schedule process", "Internal ID {0} has a command line of {1}", processId, commandLine); if (environment == null) { environment = new StringDictionary(); } environment[Constants.jobManager] = AzureUtils.CurrentHostName; environment["CCP_DRYADPROCID"] = processId.ToString(CultureInfo.InvariantCulture); ScheduleProcessRequest req = new ScheduleProcessRequest(processId, commandLine, softAffinities, hardAffinity, environment); Dispatcher dispatcher = null; // Take the request pool lock in case a ProcessExit comes in after we've looked for a node // but before the request has been added to the request pool. lock (requestPool.SyncRoot) { if (!FindNodeForRequest(req, out dispatcher)) { if (dispatcherPool.Count > 0) { DryadLogger.LogDebug("Schedule Process", "No nodes available, adding process {0} to request pool", processId); requestPool.Add(req); return true; } else { DryadLogger.LogCritical(0, null, "No available dispatchers"); return false; } } } // Found a Dispatcher, schedule the request outside of the lock retVal = ScheduleProcess(req, dispatcher); if (!retVal) { processTable[processId].ChangeState(ProcessState.SchedulingFailed); dispatcher.Release(); } return retVal; }
private bool ScheduleProcess(ScheduleProcessRequest request, Dispatcher dispatcher) { lock (processTable.SyncRoot) { lock (this.processTable[request.Id].SyncRoot) { processTable[request.Id].Dispatcher = dispatcher; } } if (dispatcher.ScheduleProcess(replyUri, request, new AsyncCallback(this.ScheduleProcessCallback))) { DryadLogger.LogInformation("Schedule Process", "Began asynchronous scheduling of process {0} on node '{1}': '{2}'", request.Id, dispatcher.NodeName, request.CommandLine); return true; } else { DryadLogger.LogWarning("Schedule Process", "Failed to begin asynchronous scheduling of process {0} on node '{1}'", request.Id, dispatcher.NodeName); return false; } }
public bool Remove(ScheduleProcessRequest req) { lock (SyncRoot) { return m_processRequestPool.Remove(req); } }
public bool ScheduleProcess(string replyUri, ScheduleProcessRequest req, AsyncCallback cb) { bool faultDispatcher = true; for (int numRetries = 0; numRetries < MaxRetries; numRetries++) { try { // TODO: Why are we taking the lock in this particular case again? lock (SyncRoot) { if (!Faulted && m_schedulingAttempts < MaxRetries) { m_schedulingAttempts++; // Set the current process so that if the dispatcher faults we know // which process to kill m_currentProcess = req; m_currentReplyUri = replyUri; m_currentAsyncCallback = cb; this.m_client.BeginScheduleProcess(replyUri, req.Id, req.CommandLine, req.Environment, cb, (object)this); return true; } } return false; } catch (FaultException<VertexServiceError> vse) { DryadLogger.LogWarning("Schedule Process", "Error scheduling process {0} on node {1}: {2}", req.Id, this.m_nodeName, vse.Reason); faultDispatcher = false; break; } catch (TimeoutException te) { DryadLogger.LogWarning("Schedule Process", "Timeout communicating with vertex service scheduling process {0} on node {1}: {2}", req.Id, this.m_nodeName, te.ToString()); if (!SafeOpenConnection()) { faultDispatcher = true; break; } } catch (CommunicationException ce) { DryadLogger.LogWarning("Schedule Process", "Error communicating with vertex service scheduling process {0} on node {1}: {2}", req.Id, this.m_nodeName, ce.ToString()); if (!SafeOpenConnection()) { faultDispatcher = true; break; } } catch (Exception e) { DryadLogger.LogError(0, e, "Error calling ScheduleProcess for process {0} on node {1}", req.Id, m_nodeName); faultDispatcher = false; break; } } if (faultDispatcher) { RaiseFaultedEvent(); } return false; }
/// <summary> /// Notify vertex service that the Graph Manager is done /// with vertex process processId /// </summary> /// <param name="processId">Process Id of the process to release</param> public void ReleaseProcess(int processId) { bool faultDispatcher = true; for (int numRetries = 0; numRetries < MaxRetries; numRetries++) { try { if (CurrentProcess == processId) { m_currentProcess = null; } if (!Faulted) { this.m_client.ReleaseProcess(processId); } return; } // ReleaseProcess is one-way catch (TimeoutException te) { DryadLogger.LogWarning("Release Process", "Timeout communicating with vertex service on node {0}: {1}", this.m_nodeName, te.ToString()); if (!SafeOpenConnection()) { faultDispatcher = true; break; } } catch (CommunicationException ce) { DryadLogger.LogWarning("Release Process", "Error communicating with vertex service on node {0}: {1}", this.m_nodeName, ce.ToString()); if (!SafeOpenConnection()) { faultDispatcher = true; break; } } catch (Exception e) { DryadLogger.LogError(0, e, "Error calling ReleaseProcess for node {0}", m_nodeName); faultDispatcher = false; break; } } if (faultDispatcher) { RaiseFaultedEvent(); } }