Example #1
0
 /// <summary>
 /// Adds a set of nodeStatus's to the cycle graph
 /// </summary>
 private void AddTargetStatesToCycleDetector(NodeStatus[] nodeStatus, TargetCycleDetector cycleDetector)
 {
     for (int i = 0; i < nodeStatus.Length; i++)
     {
         cycleDetector.AddTargetsToGraph(nodeStatus[i].StateOfInProgressTargets);
     }
 }
Example #2
0
        /// <summary>
        /// This method is called when the parent engine doesn't see activity for a preset time period to
        /// determine if the whole system is making forward progress. In order to that, status is collected
        /// from every node in the system. If no node is making forward progress then the graph of all the
        /// inprogress targets is analyzed for cycles. If a cycle is found the appropriate node is instructed
        /// to break it. If no cause for deadlock can be determined the system is shutdown.
        /// </summary>
        /// <returns>New inactivity timeout</returns>
        internal int DetectDeadlock(int queueCounts, long lastLoopActivity, int currentTimeout)
        {
            // Don't try to detect deadlock in single threaded mode or on a child node
            if (parentEngine.Router.ChildMode || parentEngine.Router.SingleThreadedMode)
            {
                return(Timeout.Infinite);
            }

            // Calculate time since last loop activity
            TimeSpan timeSinceLastLoopActivity =
                new TimeSpan(DateTime.Now.Ticks - lastLoopActivity);

            // If there are items in the queue waiting to be processed or there was loop activity
            // not so long ago - continue
            if (queueCounts > 0 || timeSinceLastLoopActivity.TotalMilliseconds < currentTimeout)
            {
                return(currentTimeout);
            }

            if (nodeManager.TaskExecutionModule == null)
            {
                return(currentTimeout);
            }

            // Calculate the time since the last task activity
            TimeSpan timeSinceLastTEMActivity =
                new TimeSpan(DateTime.Now.Ticks - nodeManager.TaskExecutionModule.LastTaskActivity());

            // If there was not task activity for the whole time period - check with individual nodes
            // to see if there was activity there
            if (timeSinceLastTEMActivity.TotalMilliseconds < currentTimeout)
            {
                // Increase the timeout since tasks are taking a long time
                return(calculateNewLoopTimeout(currentTimeout));
            }

            // Check if we are waiting on an outcome of an operation
            if ((ignoreTimeout - DateTime.Now.Ticks) > 0)
            {
                return(currentTimeout);
            }

            long requestStartTime = DateTime.Now.Ticks;

            NodeStatus[] nodeStatus          = nodeManager.RequestStatusForNodes(nodeStatusReplyTimeout);
            long         requestDurationTime = DateTime.Now.Ticks - requestStartTime;

            for (int i = 0; i < nodeStatus.Length; i++)
            {
                if (nodeStatus[i] == null)
                {
                    // A node failed to respond to the request for status. The only option is to shutdown
                    // the build and error out
                    LogOrDumpError("FailedToReceiveChildStatus", i + 1, nodeStatusReplyTimeout);

                    SystemShutdown();
                    return(currentTimeout);
                }
                else if (nodeStatus[i].HasExited)
                {
                    // A node has exited prematurely. The only option is to shutdown
                    LogOrDumpError("ChildExitedPrematurely", i + 1);

                    SystemShutdown();
                    return(currentTimeout);
                }
                else if (nodeStatus[i].IsActive)
                {
                    // Calculate the time since last node activity
                    TimeSpan timeSinceLastNodeTaskActivity = new TimeSpan(nodeStatus[i].TimeSinceLastTaskActivity);
                    TimeSpan timeSinceLastNodeLoopActivity = new TimeSpan(nodeStatus[i].TimeSinceLastLoopActivity);

                    // Check if there was activity on the node within the timeout
                    if (nodeStatus[i].QueueDepth > 0 ||
                        timeSinceLastNodeTaskActivity.TotalMilliseconds < currentTimeout ||
                        timeSinceLastNodeLoopActivity.TotalMilliseconds < currentTimeout)
                    {
                        // If the time out has been exceeded while one of the nodes was
                        // active lets increase the timeout
                        return(calculateNewLoopTimeout(currentTimeout));
                    }
                }
                else if (nodeStatus[i].IsLaunchInProgress)
                {
                    // If there is a node in process of being launched, only the NodeProvider
                    // knows how long that should take so the decision to error out can
                    // only be made by the node provider.
                    return(currentTimeout);
                }
            }

            // There was no detected activity within the system for the whole time period. Check
            // if there is a cycle in the in progress targets
            TargetCycleDetector cycleDetector = new TargetCycleDetector(parentEngine.LoggingServices, parentEngine.EngineCallback);

            AddTargetStatesToCycleDetector(nodeStatus, cycleDetector);
            NodeStatus localStatus = parentEngine.RequestStatus(0);

            cycleDetector.AddTargetsToGraph(localStatus.StateOfInProgressTargets);


            if (cycleDetector.FindCycles())
            {
                if (Engine.debugMode)
                {
                    Console.WriteLine("Breaking cycle between " + cycleDetector.CycleEdgeChild.TargetId.name + " and " +
                                      cycleDetector.CycleEdgeParent.TargetId.name);
                }
                // A cycle has been detected - it needs to be broken for the build to continue
                nodeManager.PostCycleNotification(cycleDetector.CycleEdgeChild.TargetId.nodeId,
                                                  cycleDetector.CycleEdgeChild,
                                                  cycleDetector.CycleEdgeParent);
                // Use the amount of time it took us to receive the NodeStatus and buffer it a little because node status is sent via a faster code path
                ignoreTimeout = DateTime.Now.Ticks + requestDurationTime + (cycleBreakTimeout * TimeSpan.TicksPerMillisecond);
                return(currentTimeout);
            }

            // The system doesn't appear to be making progress. Switch to a largest sampling interval.
            if (currentTimeout != maxLoopTimeout)
            {
                return(maxLoopTimeout);
            }

            // Should make at least two observations before assuming that no forward progress is being made
            if (previousStatus == null || previousLocalStatus == null || nodeStatus.Length != previousStatus.Length)
            {
                previousStatus      = nodeStatus;
                previousLocalStatus = localStatus;
                return(currentTimeout);
            }

            // There was some activity between previous and current status checks on the local node
            if (localStatus.LastLoopActivity != previousLocalStatus.LastLoopActivity ||
                localStatus.LastTaskActivity != previousLocalStatus.LastTaskActivity)
            {
                previousStatus      = nodeStatus;
                previousLocalStatus = localStatus;
                return(currentTimeout);
            }

            for (int i = 0; i < nodeStatus.Length; i++)
            {
                // There was some activity between previous and current status checks on the child node
                if (nodeStatus[i].LastTaskActivity != previousStatus[i].LastTaskActivity ||
                    nodeStatus[i].LastLoopActivity != previousStatus[i].LastLoopActivity)
                {
                    previousStatus      = nodeStatus;
                    previousLocalStatus = localStatus;
                    return(currentTimeout);
                }
            }

            // The system is not making forward progress for an unknown reason. The
            // only recourse to is to collect as much data as possible and shutdown with
            // an error message
            // UNDONE - using logging and resource string to output the state dump

            GatherNodeInformationForShutdown(nodeStatus, localStatus);
            SystemShutdown();
            return(currentTimeout);
        }
Example #3
0
        /// <summary>
        /// This method is called when the parent engine doesn't see activity for a preset time period to
        /// determine if the whole system is making forward progress. In order to that, status is collected
        /// from every node in the system. If no node is making forward progress then the graph of all the 
        /// inprogress targets is analyzed for cycles. If a cycle is found the appropriate node is instructed
        /// to break it. If no cause for deadlock can be determined the system is shutdown.
        /// </summary>
        /// <returns>New inactivity timeout</returns>
        internal int DetectDeadlock( int queueCounts, long lastLoopActivity, int currentTimeout)
        {
            // Don't try to detect deadlock in single threaded mode or on a child node
            if (parentEngine.Router.ChildMode || parentEngine.Router.SingleThreadedMode)
            {
                return Timeout.Infinite;
            }

            // Calculate time since last loop activity
            TimeSpan timeSinceLastLoopActivity =
                            new TimeSpan(DateTime.Now.Ticks - lastLoopActivity);

            // If there are items in the queue waiting to be processed or there was loop activity
            // not so long ago - continue
            if (queueCounts > 0 || timeSinceLastLoopActivity.TotalMilliseconds < currentTimeout)
            {
                return currentTimeout;
            }

            if (nodeManager.TaskExecutionModule == null)
            {
                return currentTimeout;
            }

            // Calculate the time since the last task activity
            TimeSpan timeSinceLastTEMActivity =
                            new TimeSpan(DateTime.Now.Ticks - nodeManager.TaskExecutionModule.LastTaskActivity());

            // If there was not task activity for the whole time period - check with individual nodes
            // to see if there was activity there
            if (timeSinceLastTEMActivity.TotalMilliseconds < currentTimeout)
            {
                // Increase the timeout since tasks are taking a long time
                return calculateNewLoopTimeout(currentTimeout);
            }

            // Check if we are waiting on an outcome of an operation
            if ((ignoreTimeout - DateTime.Now.Ticks) > 0)
            {
                return currentTimeout;
            }

            long requestStartTime = DateTime.Now.Ticks;
            NodeStatus[] nodeStatus = nodeManager.RequestStatusForNodes(nodeStatusReplyTimeout);
            long requestDurationTime = DateTime.Now.Ticks - requestStartTime;

            for (int i = 0; i < nodeStatus.Length; i++)
            {
                if (nodeStatus[i] == null)
                {
                    // A node failed to respond to the request for status. The only option is to shutdown
                    // the build and error out
                    LogOrDumpError("FailedToReceiveChildStatus", i + 1, nodeStatusReplyTimeout);

                    SystemShutdown();
                    return currentTimeout;
                }
                else if (nodeStatus[i].HasExited)
                {
                    // A node has exited prematurely. The only option is to shutdown 
                    LogOrDumpError("ChildExitedPrematurely", i + 1);

                    SystemShutdown();
                    return currentTimeout;
                }
                else if (nodeStatus[i].IsActive)
                {
                    // Calculate the time since last node activity
                    TimeSpan timeSinceLastNodeTaskActivity = new TimeSpan(nodeStatus[i].TimeSinceLastTaskActivity);
                    TimeSpan timeSinceLastNodeLoopActivity = new TimeSpan(nodeStatus[i].TimeSinceLastLoopActivity);

                    // Check if there was activity on the node within the timeout
                    if (nodeStatus[i].QueueDepth > 0 ||
                        timeSinceLastNodeTaskActivity.TotalMilliseconds < currentTimeout ||
                        timeSinceLastNodeLoopActivity.TotalMilliseconds < currentTimeout)
                    {
                        // If the time out has been exceeded while one of the nodes was
                        // active lets increase the timeout
                        return calculateNewLoopTimeout(currentTimeout);
                    }
                }
                else if (nodeStatus[i].IsLaunchInProgress)
                {
                    // If there is a node in process of being launched, only the NodeProvider
                    // knows how long that should take so the decision to error out can
                    // only be made by the node provider.
                    return currentTimeout;
                }
            }

            // There was no detected activity within the system for the whole time period. Check
            // if there is a cycle in the in progress targets
            TargetCycleDetector cycleDetector = new TargetCycleDetector(parentEngine.LoggingServices, parentEngine.EngineCallback);
            AddTargetStatesToCycleDetector(nodeStatus, cycleDetector);
            NodeStatus localStatus = parentEngine.RequestStatus(0);
            cycleDetector.AddTargetsToGraph(localStatus.StateOfInProgressTargets);


            if (cycleDetector.FindCycles())
            {
                if (Engine.debugMode)
                {
                    Console.WriteLine("Breaking cycle between " + cycleDetector.CycleEdgeChild.TargetId.name + " and " +
                                  cycleDetector.CycleEdgeParent.TargetId.name);
                }
                // A cycle has been detected - it needs to be broken for the build to continue
                nodeManager.PostCycleNotification(cycleDetector.CycleEdgeChild.TargetId.nodeId,
                                                  cycleDetector.CycleEdgeChild,
                                                  cycleDetector.CycleEdgeParent);
                // Use the amount of time it took us to receive the NodeStatus and buffer it a little because node status is sent via a faster code path
                ignoreTimeout = DateTime.Now.Ticks + requestDurationTime + (cycleBreakTimeout * TimeSpan.TicksPerMillisecond);
                return currentTimeout; 
            }

            // The system doesn't appear to be making progress. Switch to a largest sampling interval.
            if (currentTimeout != maxLoopTimeout)
            {
                return maxLoopTimeout;
            }

            // Should make at least two observations before assuming that no forward progress is being made
            if (previousStatus == null || previousLocalStatus == null || nodeStatus.Length != previousStatus.Length)
            {
                previousStatus = nodeStatus;
                previousLocalStatus = localStatus;
                return currentTimeout;
            }

            // There was some activity between previous and current status checks on the local node
            if (localStatus.LastLoopActivity != previousLocalStatus.LastLoopActivity ||
                localStatus.LastTaskActivity != previousLocalStatus.LastTaskActivity )
            {
                previousStatus = nodeStatus;
                previousLocalStatus = localStatus;
                return currentTimeout;
            }

            for (int i = 0; i < nodeStatus.Length; i++)
            {
                // There was some activity between previous and current status checks on the child node
                if (nodeStatus[i].LastTaskActivity != previousStatus[i].LastTaskActivity ||
                    nodeStatus[i].LastLoopActivity != previousStatus[i].LastLoopActivity)
                {
                    previousStatus = nodeStatus;
                    previousLocalStatus = localStatus;
                    return currentTimeout;
                }
            }

            // The system is not making forward progress for an unknown reason. The
            // only recourse to is to collect as much data as possible and shutdown with
            // an error message
            // UNDONE - using logging and resource string to output the state dump

            GatherNodeInformationForShutdown(nodeStatus, localStatus);
            SystemShutdown();
            return currentTimeout;
        }
Example #4
0
        /// <summary>
        /// Adds a set of nodeStatus's to the cycle graph 
        /// </summary>
        private void AddTargetStatesToCycleDetector(NodeStatus[] nodeStatus, TargetCycleDetector cycleDetector)
        {

            for (int i = 0; i < nodeStatus.Length; i++)
            {
               cycleDetector.AddTargetsToGraph(nodeStatus[i].StateOfInProgressTargets);
            }
        }