internal void PostStatus(NodeStatus nodeStatus, bool blockUntilSent) { try { PostStatusThrow(nodeStatus, blockUntilSent); } catch (Exception e) { ReportUnhandledError(e); } }
internal override void CreateFromStream(BinaryReader reader) { base.CreateFromStream(reader); if (reader.ReadByte() == 0) { nodeStatus = null; } else { nodeStatus = NodeStatus.CreateFromStream(reader); } }
internal static NodeStatus CreateFromStream(BinaryReader reader) { NodeStatus status = new NodeStatus(null); status.traversalType = reader.ReadBoolean(); status.statusTimeStamp = reader.ReadInt64(); status.requestId = reader.ReadInt32(); status.isActive = reader.ReadBoolean(); status.isLaunchInProgress = reader.ReadBoolean(); status.queueDepth = reader.ReadInt32(); status.lastTaskActivityTimeStamp = reader.ReadInt64(); status.lastEngineActivityTimeStamp = reader.ReadInt64(); if (reader.ReadByte() == 0) { status.stateOfInProgressTargets = null; } else { int numberOfInProgressTargets = reader.ReadInt32(); status.stateOfInProgressTargets = new TargetInProgessState[numberOfInProgressTargets]; for (int i = 0; i < numberOfInProgressTargets; i++) { if (reader.ReadByte() == 0) { status.stateOfInProgressTargets[i] = null; } else { TargetInProgessState state = new TargetInProgessState(); state.CreateFromStream(reader); status.stateOfInProgressTargets[i] = state; } } } if (reader.ReadByte() == 0) { status.unhandledException = null; } else { status.unhandledException = (Exception)formatter.Deserialize(reader.BaseStream); } return(status); }
internal void PostNodeStatus(int nodeId, NodeStatus nodeStatus) { ErrorUtilities.VerifyThrow(nodeStatus.RequestId != NodeStatus.UnrequestedStatus, "Node manager should not receive unrequested status"); NodeStatus[] currentStatus = statusForNodes; for (int i = 0; i < nodeList.Count; i++) { if (nodeList[i].NodeId == nodeId) { currentStatus[i] = nodeStatus; break; } } statusReplyCount++; statusMessageReceived.Set(); }
/// <summary> /// Request status from all nodes in the system /// </summary> /// <param name="responseTimeout"></param> /// <returns></returns> internal NodeStatus[] RequestStatusForNodes(int responseTimeout) { int requestId = 0; statusForNodes = new NodeStatus[nodeList.Count]; statusReplyCount = 0; statusMessageReceived.Reset(); // Request status from all registered nodes for (int i = 0; i < nodeList.Count; i++) { nodeList[i].NodeProvider.RequestNodeStatus(nodeList[i].NodeIndex, requestId); } long startTime = DateTime.Now.Ticks; while (statusReplyCount < nodeList.Count) { if (statusMessageReceived.WaitOne(responseTimeout, false)) { // We received another reply statusMessageReceived.Reset(); // Calculate the time remaining and only continue if there is time left TimeSpan timeSpent = new TimeSpan(DateTime.Now.Ticks - startTime); startTime = DateTime.Now.Ticks; responseTimeout = responseTimeout - (int)timeSpent.TotalMilliseconds; if (responseTimeout <= 0) { Console.WriteLine("Response time out out exceeded :" + DateTime.Now.Ticks); break; } } else { // Timed out waiting for the response from the node Console.WriteLine("Response time out out exceeded:" + DateTime.Now.Ticks); break; } } return(statusForNodes); }
/// <summary> /// The coordinating engine is requesting status /// </summary> internal void RequestStatus(int requestId) { // Check if the status has been requested before the local // engine has been started. if (localEngine == null) { NodeStatus nodeStatus = null; lock (buildRequests) { nodeStatus = new NodeStatus(requestId, true, buildRequests.Count, 0, 0, false); } parentCallback.PostStatus(nodeId, nodeStatus, false); } else { // Since the local engine has been started - ask it for status RequestStatusEngineCommand requestStatus = new RequestStatusEngineCommand(requestId); localEngine.PostEngineCommand(requestStatus); } }
/// <summary> /// This function can be used by the node provider to report a failure which doesn't prevent further /// communication with the parent node. The node will attempt to notify the parent of the failure, /// send all outstanding logging events and shutdown. /// </summary> /// <param name="originalException"></param> /// <exception cref="Exception">Throws exception (with nested original exception) if reporting to parent fails.</exception> internal void ReportUnhandledError(Exception originalException) { NodeStatus nodeStatus = new NodeStatus(originalException); if (Engine.debugMode) { Console.WriteLine("Node.ReportUnhandledError: " + originalException.Message); } try { try { PostStatusThrow(nodeStatus, true /* wait for the message to be sent before returning */); } catch (Exception ex) { // If an error occurred while trying to send the original exception to the parent // rethrow the original exception string message = ResourceUtilities.FormatResourceString("FatalErrorOnChildNode", nodeId, ex.Message); ErrorUtilities.LaunchMsBuildDebuggerOnFatalError(); throw new Exception(message, originalException); } } finally { // Makesure we write the exception to a file so even if something goes wrong with the logging or transfer to the parent // then we will atleast get the message on disk. LocalNode.DumpExceptionToFile(originalException); } if (localEngine != null) { localEngine.Shutdown(); } }
/// <summary> /// This method is called to post the status of the node. Because status is used /// to report errors and to respond to inactivity notices, we use a separate queue /// to deliver status event to the shared memory. Otherwise status maybe be delayed /// if it is stuck behind a large number of other events. We also wait for the status /// to be sent before returning. /// </summary> public void PostStatus(int nodeId, NodeStatus nodeStatus, bool blockUntilSent) { // We should not be on the running on the callback writer thread ErrorUtilities.VerifyThrow(Thread.CurrentThread != writerThread, "Should never call this function from the writer thread"); LocalCallDescriptorForPostStatus callDescriptor = new LocalCallDescriptorForPostStatus(nodeStatus); nodeHiPriCommandQueue.Enqueue(callDescriptor); // We need to block until the event we posted has been processed, but if the writer thread // exit due to an error the shared memory is no longer valid so there is no way to send the message while (blockUntilSent && !writerThreadHasExited && nodeHiPriCommandQueue.Count > 0) { nodeHiPriCommandQueue.QueueEmptyEvent.WaitOne(1000, false); // Check if the communication threads are supposed to exit if (exitCommunicationThreads.WaitOne(0, false)) { break; } } }
/// <summary> /// A variation of PostStatus that throws instead of calling ReportUnhandledError /// if there's a problem. This allows ReportUnhandledError itself to post status /// without the possibility of a loop. /// </summary> internal void PostStatusThrow(NodeStatus nodeStatus, bool blockUntilSent) { parentCallback.PostStatus(nodeId, nodeStatus, blockUntilSent); }
/// <summary> /// This method is called to post the status of the node /// </summary> public void PostStatus(int nodeId, NodeStatus nodeStatus, bool blockUntilSent) { parentEngine.PostNodeStatus(nodeId, nodeStatus); }
/// <summary> /// This method is called when the parent engine doesn't see activity for a preset time period to /// determine if the whole system is making forward progress. In order to that, status is collected /// from every node in the system. If no node is making forward progress then the graph of all the /// inprogress targets is analyzed for cycles. If a cycle is found the appropriate node is instructed /// to break it. If no cause for deadlock can be determined the system is shutdown. /// </summary> /// <returns>New inactivity timeout</returns> internal int DetectDeadlock(int queueCounts, long lastLoopActivity, int currentTimeout) { // Don't try to detect deadlock in single threaded mode or on a child node if (parentEngine.Router.ChildMode || parentEngine.Router.SingleThreadedMode) { return(Timeout.Infinite); } // Calculate time since last loop activity TimeSpan timeSinceLastLoopActivity = new TimeSpan(DateTime.Now.Ticks - lastLoopActivity); // If there are items in the queue waiting to be processed or there was loop activity // not so long ago - continue if (queueCounts > 0 || timeSinceLastLoopActivity.TotalMilliseconds < currentTimeout) { return(currentTimeout); } if (nodeManager.TaskExecutionModule == null) { return(currentTimeout); } // Calculate the time since the last task activity TimeSpan timeSinceLastTEMActivity = new TimeSpan(DateTime.Now.Ticks - nodeManager.TaskExecutionModule.LastTaskActivity()); // If there was not task activity for the whole time period - check with individual nodes // to see if there was activity there if (timeSinceLastTEMActivity.TotalMilliseconds < currentTimeout) { // Increase the timeout since tasks are taking a long time return(calculateNewLoopTimeout(currentTimeout)); } // Check if we are waiting on an outcome of an operation if ((ignoreTimeout - DateTime.Now.Ticks) > 0) { return(currentTimeout); } long requestStartTime = DateTime.Now.Ticks; NodeStatus[] nodeStatus = nodeManager.RequestStatusForNodes(nodeStatusReplyTimeout); long requestDurationTime = DateTime.Now.Ticks - requestStartTime; for (int i = 0; i < nodeStatus.Length; i++) { if (nodeStatus[i] == null) { // A node failed to respond to the request for status. The only option is to shutdown // the build and error out LogOrDumpError("FailedToReceiveChildStatus", i + 1, nodeStatusReplyTimeout); SystemShutdown(); return(currentTimeout); } else if (nodeStatus[i].HasExited) { // A node has exited prematurely. The only option is to shutdown LogOrDumpError("ChildExitedPrematurely", i + 1); SystemShutdown(); return(currentTimeout); } else if (nodeStatus[i].IsActive) { // Calculate the time since last node activity TimeSpan timeSinceLastNodeTaskActivity = new TimeSpan(nodeStatus[i].TimeSinceLastTaskActivity); TimeSpan timeSinceLastNodeLoopActivity = new TimeSpan(nodeStatus[i].TimeSinceLastLoopActivity); // Check if there was activity on the node within the timeout if (nodeStatus[i].QueueDepth > 0 || timeSinceLastNodeTaskActivity.TotalMilliseconds < currentTimeout || timeSinceLastNodeLoopActivity.TotalMilliseconds < currentTimeout) { // If the time out has been exceeded while one of the nodes was // active lets increase the timeout return(calculateNewLoopTimeout(currentTimeout)); } } else if (nodeStatus[i].IsLaunchInProgress) { // If there is a node in process of being launched, only the NodeProvider // knows how long that should take so the decision to error out can // only be made by the node provider. return(currentTimeout); } } // There was no detected activity within the system for the whole time period. Check // if there is a cycle in the in progress targets TargetCycleDetector cycleDetector = new TargetCycleDetector(parentEngine.LoggingServices, parentEngine.EngineCallback); AddTargetStatesToCycleDetector(nodeStatus, cycleDetector); NodeStatus localStatus = parentEngine.RequestStatus(0); cycleDetector.AddTargetsToGraph(localStatus.StateOfInProgressTargets); if (cycleDetector.FindCycles()) { if (Engine.debugMode) { Console.WriteLine("Breaking cycle between " + cycleDetector.CycleEdgeChild.TargetId.name + " and " + cycleDetector.CycleEdgeParent.TargetId.name); } // A cycle has been detected - it needs to be broken for the build to continue nodeManager.PostCycleNotification(cycleDetector.CycleEdgeChild.TargetId.nodeId, cycleDetector.CycleEdgeChild, cycleDetector.CycleEdgeParent); // Use the amount of time it took us to receive the NodeStatus and buffer it a little because node status is sent via a faster code path ignoreTimeout = DateTime.Now.Ticks + requestDurationTime + (cycleBreakTimeout * TimeSpan.TicksPerMillisecond); return(currentTimeout); } // The system doesn't appear to be making progress. Switch to a largest sampling interval. if (currentTimeout != maxLoopTimeout) { return(maxLoopTimeout); } // Should make at least two observations before assuming that no forward progress is being made if (previousStatus == null || previousLocalStatus == null || nodeStatus.Length != previousStatus.Length) { previousStatus = nodeStatus; previousLocalStatus = localStatus; return(currentTimeout); } // There was some activity between previous and current status checks on the local node if (localStatus.LastLoopActivity != previousLocalStatus.LastLoopActivity || localStatus.LastTaskActivity != previousLocalStatus.LastTaskActivity) { previousStatus = nodeStatus; previousLocalStatus = localStatus; return(currentTimeout); } for (int i = 0; i < nodeStatus.Length; i++) { // There was some activity between previous and current status checks on the child node if (nodeStatus[i].LastTaskActivity != previousStatus[i].LastTaskActivity || nodeStatus[i].LastLoopActivity != previousStatus[i].LastLoopActivity) { previousStatus = nodeStatus; previousLocalStatus = localStatus; return(currentTimeout); } } // The system is not making forward progress for an unknown reason. The // only recourse to is to collect as much data as possible and shutdown with // an error message // UNDONE - using logging and resource string to output the state dump GatherNodeInformationForShutdown(nodeStatus, localStatus); SystemShutdown(); return(currentTimeout); }
internal LocalCallDescriptorForPostStatus(NodeStatus nodeStatus) : base(LocalCallType.PostStatus) { this.nodeStatus = nodeStatus; }