private IEnumerable<NodeId> GetNodesToSchedule( HashSet<NodeId> nodesToSchedule, VisitationTracker transitiveDependencyNodeFilter, ForceSkipDependenciesMode forceSkipDepsMode, bool scheduleMetaPips, HashSet<NodeId> mustExecute, BuildSetCalculatorStats stats, ref int metaPipCount) { if (forceSkipDepsMode == ForceSkipDependenciesMode.Disabled) { ScheduleDependenciesUntilCleanAndMaterialized(nodesToSchedule, transitiveDependencyNodeFilter, stats); } else { int numExplicitlySelectedProcesses, numScheduledProcesses, numExecutedProcesses, numExecutedProcessesWithoutDirty = 0; Func<NodeId, bool> isProcess = (node) => GetPipType(node) == PipType.Process; using (m_counters.StartStopwatch(PipExecutorCounter.ForceSkipDependenciesScheduleDependenciesUntilInputsPresentDuration)) { numExplicitlySelectedProcesses = nodesToSchedule.Count(isProcess); // Calculate how many process pips are in the transitive dependency closure of the filtered pips foreach (var node in m_graph.Nodes) { if (transitiveDependencyNodeFilter.WasVisited(node) && isProcess(node)) { numExecutedProcessesWithoutDirty++; } } ScheduleDependenciesUntilRequiredInputsPresent(nodesToSchedule, transitiveDependencyNodeFilter, mustExecute, forceSkipDepsMode); numScheduledProcesses = nodesToSchedule.Where(isProcess).Count(); numExecutedProcesses = mustExecute.Where(isProcess).Count(); } Logger.Log.DirtyBuildStats( m_loggingContext, (long)m_counters.GetElapsedTime(PipExecutorCounter.ForceSkipDependenciesScheduleDependenciesUntilInputsPresentDuration).TotalMilliseconds, forceSkipDepsMode == ForceSkipDependenciesMode.Module, numExplicitlySelectedProcesses, numScheduledProcesses, numExecutedProcesses, numExecutedProcessesWithoutDirty - numExecutedProcesses); } int scheduledMetaPipCount = 0; if (scheduleMetaPips) { using (m_counters.StartStopwatch(PipExecutorCounter.BuildSetCalculatorComputeAffectedMetaPips)) { // We compute the affected meta pips from the scheduled nodes. Simply traversing the graph from // the scheduled nodes is expensive, so we split the process by first computing the meta-pip frontiers, // i.e., the meta-pips that directly depend on a scheduled node. This computation can be done in parallel. // Assumption: the dependents of meta pips are always meta pips. // (1) Compute the meta-pips frontiers from the scheduled nodes. VisitationTracker visitedNodes = new VisitationTracker(m_graph); ConcurrentDictionary<NodeId, Unit> metaPipFrontier = new ConcurrentDictionary<NodeId, Unit>(); Parallel.ForEach( nodesToSchedule, node => { foreach (var oe in m_graph.GetOutgoingEdges(node)) { if (GetPipType(oe.OtherNode).IsMetaPip()) { metaPipFrontier.TryAdd(oe.OtherNode, Unit.Void); } } }); // (2) Traverse the graph from the frontiers. m_visitor.VisitTransitiveDependents( metaPipFrontier.Keys, visitedNodes, node => { nodesToSchedule.Add(node); ++scheduledMetaPipCount; return true; }); } metaPipCount = scheduledMetaPipCount; } return nodesToSchedule; }
private void ScheduleDependenciesUntilCleanAndMaterialized( HashSet<NodeId> nodesToSchedule, VisitationTracker buildCone, BuildSetCalculatorStats stats) { int initialNodesToScheduleCount = nodesToSchedule.Count; int initialProcessesToScheduleCount = nodesToSchedule.Count(IsProcess); int nodesAddedDueToNotCleanMaterializedCount = 0; int processesAddedDueToNotCleanMaterializedCount = 0; int nodesAddedDueToCollateralDirtyCount = 0; int processesAddedDueToCollateralDirtyCount = 0; // TODO: If this method turns out to be the bottleneck, we can make it parallel later. var cleanMaterializedNodes = new HashSet<NodeId>(); using (m_counters.StartStopwatch(PipExecutorCounter.BuildSetCalculatorScheduleDependenciesUntilCleanAndMaterialized)) { // Add all dirty nodes from nodesToSchedule. From those dirty nodes, we try to add to nodesToSchedule // their dependencies that need to be built. // Nodes that are not materialized should have been marked dirty by CalculateDirtyNodes. nodesToSchedule.RemoveWhere(node => !IsNodeDirty(node)); var nodeQueue = new Queue<NodeId>(nodesToSchedule); Action<NodeId> addNode = markedNode => { if (buildCone.WasVisited(markedNode) && !GetPipType(markedNode).IsMetaPip() && nodesToSchedule.Add(markedNode)) { // Node is in the build cone, and has not been scheduled yet. nodeQueue.Enqueue(markedNode); ++nodesAddedDueToCollateralDirtyCount; if (IsProcess(markedNode)) { ++processesAddedDueToCollateralDirtyCount; } } }; Action<NodeId> scheduleDependencyNode = node => { if (buildCone.WasVisited(node)) { // The node is in the build cone. var pipType = GetPipType(node); // The node is clean if it's (1) marked as clean and materialized and (2) none of its outputs are rewritten. // Condition (2) is conservative, and is needed for correctness in the presence of rewritten files. bool isCleanMaterialized = IsNodeCleanAndMaterialized(node) && !IsRewrittenPip(node); if (!isCleanMaterialized && nodesToSchedule.Add(node)) { // (1) Node is dirty or has not materialized its outputs. // (2) Node has not been scheduled yet. // Mark process node dirty, and add its dependents so that the dependencies of its dependents can be added later. MarkProcessNodeDirtyAndAddItsDependents(node, addNode); ++nodesAddedDueToNotCleanMaterializedCount; if (pipType != PipType.HashSourceFile) { nodeQueue.Enqueue(node); } if (IsProcess(node)) { ++processesAddedDueToNotCleanMaterializedCount; } } if (isCleanMaterialized) { cleanMaterializedNodes.Add(node); } } }; while (nodeQueue.Count > 0) { NodeId node = nodeQueue.Dequeue(); foreach (Edge inEdge in m_graph.GetIncomingEdges(node)) { scheduleDependencyNode(inEdge.OtherNode); } } nodesToSchedule.UnionWith(cleanMaterializedNodes); } stats.CleanMaterializedNodeFrontierCount = cleanMaterializedNodes.Count; stats.CleanMaterializedProcessFrontierCount = cleanMaterializedNodes.Count(IsProcess); Logger.Log.BuildSetCalculatorScheduleDependenciesUntilCleanAndMaterializedStats( m_loggingContext, initialNodesToScheduleCount, initialProcessesToScheduleCount, nodesAddedDueToNotCleanMaterializedCount, processesAddedDueToNotCleanMaterializedCount, nodesAddedDueToCollateralDirtyCount, processesAddedDueToCollateralDirtyCount, stats.CleanMaterializedNodeFrontierCount, stats.CleanMaterializedProcessFrontierCount, (int)m_counters.GetElapsedTime(PipExecutorCounter.BuildSetCalculatorScheduleDependenciesUntilCleanAndMaterialized).TotalMilliseconds); }
/// <summary> /// Gets nodes to schedule. /// </summary> /// <param name="scheduleDependents">If true, then include all transitive dependents of the explicitly scheduled nodes.</param> /// <param name="explicitlyScheduledNodes">Explicitly scheduled nodes.</param> /// <param name="forceSkipDepsMode">If not disabled, then skip dependencies. This corresponds to "dirty" build.</param> /// <param name="scheduleMetaPips">If true, metapips will be scheduled</param> /// <returns>Nodes to schedule.</returns> public GetScheduledNodesResult GetNodesToSchedule( bool scheduleDependents, IEnumerable<NodeId> explicitlyScheduledNodes, ForceSkipDependenciesMode forceSkipDepsMode, bool scheduleMetaPips) { int explicitlySelectedNodeCount; int explicitlySelectedProcessCount; int dirtyNodeCount; int dirtyProcessCount; int nonMaterializedNodeCount; int nonMaterializedProcessCount; int processesInBuildCone = 0; HashSet<NodeId> nodesToSchedule; VisitationTracker transitiveDependencyNodeFilter; using (m_counters.StartStopwatch(PipExecutorCounter.BuildSetCalculatorComputeBuildCone)) { var visitedNodes = new VisitationTracker(m_graph); nodesToSchedule = new HashSet<NodeId>(explicitlyScheduledNodes); explicitlySelectedNodeCount = nodesToSchedule.Count; explicitlySelectedProcessCount = nodesToSchedule.Count(IsProcess); // 1. Calculate dirty nodes. // The filter-passing set may include nodes which are dirty/clean and schedulable/not-schedulable (w.r.t. state). // We want stats on dirty vs. not-dirty, and want to drop anything not schedulable. // This step also marks dirty non-materialized nodes. CalculateDirtyNodes( nodesToSchedule, out dirtyNodeCount, out dirtyProcessCount, out nonMaterializedNodeCount, out nonMaterializedProcessCount); if (dirtyNodeCount == 0) { int duration = (int) m_counters.GetElapsedTime(PipExecutorCounter.BuildSetCalculatorComputeBuildCone).TotalMilliseconds; // Build cone is the same as the explicitly selected processes. Logger.Log.BuildSetCalculatorProcessStats( m_loggingContext, m_graph.Nodes.Count(IsProcess), explicitlySelectedProcessCount, explicitlySelectedProcessCount, explicitlySelectedProcessCount, 0, duration); Logger.Log.BuildSetCalculatorStats( m_loggingContext, 0, 0, explicitlySelectedNodeCount, explicitlySelectedProcessCount, nonMaterializedNodeCount, nonMaterializedProcessCount, duration, 0, 0, 0, 0); return GetScheduledNodesResult.CreateForNoOperationBuild(explicitlySelectedProcessCount); } // 2. Add transitive dependents of explicitly scheduled nodes (if requested). if (scheduleDependents) { m_visitor.VisitTransitiveDependents( nodesToSchedule, visitedNodes, node => { // Don't schedule dependents that are meta pips. These may artificially connect unrequested // pips since we will later schedule their dependencies. For example, this would cause // everything referenced by a spec file pip to be scheduled as a single unit. PipType pipType = GetPipType(node); if (!pipType.IsMetaPip()) { nodesToSchedule.Add(node); if (pipType == PipType.Process) { ++processesInBuildCone; } return true; } return false; }); } // At this point nodesToSchedule contains // (1) all nodes that are explicitly scheduled (explicitlyScheduledNodes), and // (2) if scheduleDependents is true, all dependents of (1) transitively. transitiveDependencyNodeFilter = visitedNodes; // 3. Collect/visit transitive dependencies, but don't put it in nodesToSchedule. transitiveDependencyNodeFilter.UnsafeReset(); // The code below essentially does m_visitor.VisitTransitiveDependencies(nodesToSchedule, transitiveDependencyNodeFilter, node => true), but in parallel. foreach (var nodeId in nodesToSchedule) { if (transitiveDependencyNodeFilter.MarkVisited(nodeId)) { if (IsProcess(nodeId)) { ++processesInBuildCone; } } } ParallelAlgorithms.WhileNotEmpty( nodesToSchedule, (node, add) => { foreach (Edge inEdge in m_graph.GetIncomingEdges(node)) { if (visitedNodes.MarkVisited(inEdge.OtherNode)) { add(inEdge.OtherNode); if (IsProcess(inEdge.OtherNode)) { Interlocked.Increment(ref processesInBuildCone); } } } }); // At this point nodesToSchedule hasn't change from step 2. // But now, transitiveDependencyNodeFilter have already marked all nodes in nodesToSchedule, plus // their dependencies transitively. } IEnumerable<NodeId> scheduledNodes; var mustExecute = new HashSet<NodeId>(); var stats = new BuildSetCalculatorStats(); var metaPipCount = 0; using (m_counters.StartStopwatch(PipExecutorCounter.BuildSetCalculatorGetNodesToSchedule)) { scheduledNodes = GetNodesToSchedule( nodesToSchedule, transitiveDependencyNodeFilter, forceSkipDepsMode, scheduleMetaPips, mustExecute, stats, ref metaPipCount); } int buildConeDuration = (int) m_counters.GetElapsedTime(PipExecutorCounter.BuildSetCalculatorComputeBuildCone).TotalMilliseconds; int getScheduledNodesDuration = (int) m_counters.GetElapsedTime(PipExecutorCounter.BuildSetCalculatorGetNodesToSchedule).TotalMilliseconds; int scheduledProcessCount = scheduledNodes.Count(IsProcess); Logger.Log.BuildSetCalculatorProcessStats( m_loggingContext, m_graph.Nodes.Count(IsProcess), explicitlySelectedProcessCount, processesInBuildCone, (processesInBuildCone - scheduledProcessCount) + stats.CleanMaterializedProcessFrontierCount, scheduledProcessCount, buildConeDuration + getScheduledNodesDuration); Logger.Log.BuildSetCalculatorStats( m_loggingContext, dirtyNodeCount, dirtyProcessCount, explicitlySelectedNodeCount, explicitlySelectedProcessCount, nonMaterializedNodeCount, nonMaterializedProcessCount, buildConeDuration, scheduledNodes.Count(), scheduledProcessCount, metaPipCount, getScheduledNodesDuration); int incrementalSchedulingCacheHits = forceSkipDepsMode == ForceSkipDependenciesMode.Disabled ? (processesInBuildCone - scheduledProcessCount + stats.CleanMaterializedProcessFrontierCount) : 0; return new GetScheduledNodesResult( scheduledNodes: scheduledNodes, mustExecuteNodes: mustExecute, incrementalSchedulingCacheHitProcesses: incrementalSchedulingCacheHits, cleanMaterializedProcessFrontierCount: forceSkipDepsMode == ForceSkipDependenciesMode.Disabled ? stats.CleanMaterializedProcessFrontierCount : 0); }