Example #1
0
        /// <summary>
        /// collectAllocatedInfo - From a single job, collect a rowset of allocated resources and pass it on to core or node sorting
        /// </summary>
        /// <param name="job">ISchedulerJob job to collect allocation history</param>
        private static void collectAllocatedInfo(ISchedulerJob job)
        {
            if (bVerbose)
            {
                Console.WriteLine("Entering collectAllocatedInfo:  job {0} project: {1}", job.Id, job.Project);
            }

            IPropertyIdCollection props = new PropertyIdCollection();

            props.Add(AllocationProperties.NodeName);
            props.Add(AllocationProperties.NodeId);
            props.Add(AllocationProperties.CoreId);
            props.Add(AllocationProperties.StartTime);
            props.Add(AllocationProperties.EndTime);

            // OpenJobAllocationHistory returns information sorted by ascending AllocationProperties.StartTime
            using (ISchedulerRowEnumerator rows = job.OpenJobAllocationHistoryEnumerator(props)) {
                if (bNodesOnly)
                {
                    NodeDuration(rows);
                }
                else
                {
                    CoreDuration(rows);
                }
            }
            return;
        }
Example #2
0
        /// <summary>
        /// NodeDuration - Sort information from the rows returned from OpenJobAllocationHistoryEnumerator
        /// </summary>
        /// <param name="rows">RowSet, allocated Core information from the job, to be resorted into node allocation</param>
        private static void NodeDuration(ISchedulerRowEnumerator rows)
        {
            if (bVerbose)
            {
                Console.WriteLine("Entering NodeDuration");
            }

            TimeSpan tsTotal = new TimeSpan(0);

            List <NodeUse> nodeList = new List <NodeUse>();

            // Convert core rowset into node list
            foreach (PropertyRow row in rows)
            {
                // Find the last item in this list that uses this node
                int iIndex = nodeList.FindLastIndex(
                    delegate(NodeUse n)
                {
                    return(n.NodeId == (int)row[(int)RowIndex.NodeId].Value);
                }
                    );

                // If this node does not yet exist, or if the current start is beyond the endtime in the list, add a new list item
                if ((iIndex < 0) || (nodeList[iIndex].EndTime < (DateTime)row[(int)RowIndex.StartTime].Value))
                {
                    if (bVerbose)
                    {
                        Console.WriteLine("Add item to Node List");
                    }

                    // If the core is still running, set the end time to maximum so all other searches will be swallowed
                    DateTime coreEndTime = (row[(int)RowIndex.EndTime].Id == AllocationProperties.EndTime) ? (DateTime)row[(int)RowIndex.EndTime].Value : DateTime.MaxValue;
                    NodeUse  nu          = new NodeUse((int)row[(int)RowIndex.NodeId].Value,
                                                       (string)row[(int)RowIndex.NodeName].Value,
                                                       (DateTime)row[(int)RowIndex.StartTime].Value,
                                                       coreEndTime);
                    nodeList.Add(nu);
                    if (bVerbose)
                    {
                        Console.WriteLine("Added Node List item for: {0}", (string)row[(int)RowIndex.NodeName].Value);
                    }
                }
                else     // A node was found in the list that overlaps this core's duration
                {
                    if (row[(int)RowIndex.EndTime].Id != AllocationProperties.EndTime)
                    {
                        // If the current core is still running, set the end time to maximum
                        nodeList[iIndex].EndTime = DateTime.MaxValue;
                    }
                    else if ((DateTime)row[(int)RowIndex.EndTime].Value > nodeList[iIndex].EndTime)
                    {
                        // If the current core endtime is greater than the list node endtime, extend the nodes duration
                        nodeList[iIndex].EndTime = (DateTime)row[(int)RowIndex.EndTime].Value;
                    }
                }
            }
            if (bVerbose)
            {
                Console.WriteLine("Node List created");
            }
            // Add all node duration and display information if appropriate
            foreach (NodeUse nodeUse in nodeList)
            {
                // Show each node only if /detailed was set
                if (bDetailed)
                {
                    Console.Write("{0} {1} Start: {2} End: ",
                                  nodeUse.NodeName,
                                  nodeUse.NodeId,
                                  nodeUse.StartTime);
                    if (nodeUse.EndTime != DateTime.MaxValue)
                    {
                        Console.WriteLine((DateTime)nodeUse.EndTime);
                    }
                    else
                    {
                        Console.WriteLine(CORERUNNING);
                    }
                }
                if (bVerbose)
                {
                    Console.WriteLine("dtCurrent:  {0}", dtCurrent);
                }

                // If the node still has a core running, set the end length to the current time
                if (nodeUse.EndTime == DateTime.MaxValue)
                {
                    nodeUse.EndTime = dtCurrent;
                }

                // Add the amount of time spent on using this node
                tsTotal += nodeUse.EndTime - nodeUse.StartTime;
            }
            iAllJobThreads += nodeList.Count;
            tsAllJobUsage  += tsTotal;

            // Round up/down to seconds
            if (tsTotal.TotalSeconds >= iRoundToSecondsMinimum)
            {
                if (tsTotal.Milliseconds >= 500)
                {
                    tsTotal = tsTotal.Add(TimeSpan.FromSeconds(1));
                }
                tsTotal = TimeSpan.FromSeconds((int)tsTotal.TotalSeconds);
            }

            Console.WriteLine("Total nodes: {0} Total node usage: {1}", nodeList.Count, tsTotal);
            return;
        }
Example #3
0
        /// <summary>
        /// CoreDuration - Sort information from the rows returned from OpenJobAllocationHistoryEnumerator
        /// </summary>
        /// <param name="rows">RowSet, allocated Core information from the job</param>
        private static void CoreDuration(ISchedulerRowEnumerator rows)
        {
            TimeSpan tsTotal       = new TimeSpan(0);
            DateTime firstStart    = DateTime.MaxValue;
            DateTime lastEnd       = DateTime.MinValue;
            int      iTotalThreads = 0;

            if (bVerbose)
            {
                Console.WriteLine("Entering CoreDuration");
            }

            foreach (PropertyRow row in rows)
            {
                DateTime dtEnd = (row[(int)RowIndex.EndTime].Id == AllocationProperties.EndTime) ? (DateTime)row[(int)RowIndex.EndTime].Value : dtCurrent;
                // Show each core only if /detailed was set
                if (bDetailed)
                {
                    Console.WriteLine("{0} {1}.{2} Start: {3} End: {4}",
                                      row[(int)RowIndex.NodeName].Value,
                                      row[(int)RowIndex.NodeId].Value,
                                      row[(int)RowIndex.CoreId].Value,
                                      row[(int)RowIndex.StartTime].Value,
                                      ((dtEnd != dtCurrent) ? dtEnd.ToString() : CORERUNNING));
                }

                // Add the amount of time spent on using this core
                tsTotal += dtEnd - (DateTime)row[(int)RowIndex.StartTime].Value;

                // Set the earliest and latest times used by the job
                if (firstStart > (DateTime)row[(int)RowIndex.StartTime].Value)
                {
                    firstStart = (DateTime)row[(int)RowIndex.StartTime].Value;
                }
                if (lastEnd < dtEnd)
                {
                    lastEnd = dtEnd;
                }

                // Increment the number of cores opened by the job
                // Note:  The same core can be opened and closed multiple times and each duration will be incremented
                iTotalThreads++;
            }
            iAllJobThreads += iTotalThreads;
            tsAllJobUsage  += tsTotal;

            // Round up/down to seconds
            if (bVerbose)
            {
                Console.WriteLine("Total Seconds:  {0}", tsTotal.TotalSeconds);
            }
            if (tsTotal.TotalSeconds >= iRoundToSecondsMinimum)
            {
                if (tsTotal.Milliseconds >= 500)
                {
                    tsTotal = tsTotal.Add(TimeSpan.FromSeconds(1));
                }
                //tsTotal = tsTotal.Subtract(TimeSpan.FromMilliseconds(tsTotal.Milliseconds));
                tsTotal = TimeSpan.FromSeconds((int)tsTotal.TotalSeconds);
            }

            Console.WriteLine("Total cores: {0} Total core usage: {1}", iTotalThreads, tsTotal.ToString());
            return;
        }
Example #4
0
        /// <summary>
        /// Gets the task info
        /// </summary>
        /// <returns>returns the task info as a dictionary, keyed by task id</returns>
        /// <remarks>
        /// This method returns a list of task info which ChangeTime property is in this rank: [this.lastChangeTime, DateTime.Now].
        /// This method does not change this.lastChangeTime to DateTime.Now after getting tasks because it may fail when sending those information to broker
        /// So changeTime is outputed and this.lastChangeTime should be modified to this time after suceeded sending back task info
        /// </remarks>
        private List <TaskInfo> GetTaskInfo()
        {
            DateTime changeTime = DateTime.UtcNow;

            try
            {
                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Query task info...");

                // Step 1: Query task allocation history to fetch node id and core id for tasks
                Dictionary <int, TaskAllocationHistoryItem> taskInfoDic = new Dictionary <int, TaskAllocationHistoryItem>();
                PropertyIdCollection allocationPropertyCollection       = new PropertyIdCollection();
                allocationPropertyCollection.AddPropertyId(AllocationProperties.TaskId);
                allocationPropertyCollection.AddPropertyId(AllocationProperties.CoreId);
                allocationPropertyCollection.AddPropertyId(AllocationProperties.NodeName);
                using (ISchedulerRowEnumerator rows = this.schedulerJob.OpenTaskAllocationHistoryEnumerator(allocationPropertyCollection))
                {
                    foreach (PropertyRow row in rows)
                    {
                        // Note: Finished/Failed/Canceled task will also be enumerated here
                        // We are going to add them into the dic regaredless of the state
                        // because only running tasks will be queried in the following logic.
                        int objectId = (int)row[AllocationProperties.TaskId].Value;
                        TaskAllocationHistoryItem taskInfo;
                        if (taskInfoDic.TryGetValue(objectId, out taskInfo))
                        {
                            // For each task instance cache the assigned resource with the lowest coreId. This is needed when node or socket allocation is used
                            //   in order to generate the correct port to connect to the service host
                            int coreId = (int)row[AllocationProperties.CoreId].Value;
                            if (taskInfo.FirstCoreId > coreId)
                            {
                                taskInfo.FirstCoreId = coreId;
                            }

                            taskInfo.Capacity++;
                        }
                        else
                        {
                            taskInfo             = new TaskAllocationHistoryItem();
                            taskInfo.Capacity    = 1;
                            taskInfo.FirstCoreId = (int)row[AllocationProperties.CoreId].Value;
                            taskInfo.NodeName    = (string)row[AllocationProperties.NodeName].Value;
                            taskInfoDic.Add(objectId, taskInfo);
                        }
                    }
                }

                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Query task info (got task allocation history).");

                // Step 2: Get task states from scheduler
                // Only task whose change time is between lastChangeTime and Now will be queried
                // Only task id and state are required, will get node name according to node id from allocation history got from step 1.
                IPropertyIdCollection collection = new PropertyIdCollection();
                collection.AddPropertyId(TaskPropertyIds.Id);
                collection.AddPropertyId(TaskPropertyIds.State);
                FilterCollection fc = new FilterCollection();
                fc.Add(FilterOperator.GreaterThan, TaskPropertyIds.ChangeTime, this.lastChangeTime);
                fc.Add(FilterOperator.LessThanOrEqual, TaskPropertyIds.ChangeTime, changeTime);

                // FIXME: There's performance impact on this query because we look for TaskPropertyIds.Type
                // which is requires a table join. Need to have a better way to do so.
                fc.Add(FilterOperator.Equal, TaskPropertyIds.Type, TaskType.Service);

                List <PropertyRow> taskRows = new List <PropertyRow>();

                // The ISchedulerRowSet object is a snapshot and is always a new object, so no lock is needed
                foreach (var taskRow in this.schedulerJob.OpenTaskEnumerator(collection, fc, null, true))
                {
                    taskRows.Add(taskRow);
                }

                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose,
                                       "[JobMonitorEntry] GetTaskInfo, got {0} rows.", taskRows.Count);

                if (taskRows.Count == 0)
                {
                    // no service task dispathed yet.
                    TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning,
                                           "[JobMonitorEntry] Failed to get task property rows.");

                    return(null);
                }

                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Query task info (got task info rows from scheduler).");

                this.schedulerJob.Refresh();
                int jobRequeueCount = this.schedulerJob.RequeueCount;
                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Job requeue count is {0}", jobRequeueCount);

                List <TaskInfo> results = new List <TaskInfo>(taskRows.Count);
                foreach (PropertyRow row in taskRows)
                {
                    int objectId = (int)row[TaskPropertyIds.Id].Value;

                    TaskAllocationHistoryItem taskInfo;
                    if (!taskInfoDic.TryGetValue(objectId, out taskInfo))
                    {
                        continue;
                    }

                    TaskState state = (TaskState)row[TaskPropertyIds.State].Value;

                    if (state == TaskState.Running || state == TaskState.Dispatching)
                    {
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Information, "[JobMonitorEntry] Task {0} changed into Running", objectId);

                        string       machineName              = taskInfo.NodeName;
                        NodeLocation location                 = NodeLocation.OnPremise;
                        string       azureServiceName         = null;
                        string       azureLoadBalancerAddress = null;

                        try
                        {
                            this.GetNodeInfo(machineName, out location, out azureServiceName, out azureLoadBalancerAddress);
                        }
                        catch (Exception e)
                        {
                            // if exception happens when querying node info, just skip this node temporarily.
                            TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[JobMonitorEntry] -> Get node info for task {0} throws exception. Exception: {1}", objectId, e);
                            continue;
                        }

                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] ->Get machine name for task {0}: {1}", objectId, machineName);

                        int capacity  = taskInfo.Capacity;
                        int coreindex = taskInfo.FirstCoreId;

                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] ->Get coreid for task {0}: {1}", objectId, coreindex);
                        TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] ->Get AzureLoadBalancerAddress for task {0}: {1}", objectId, azureLoadBalancerAddress);

                        TaskInfo info = new TaskInfo();
                        info.Id       = objectId;
                        info.Capacity = capacity;

                        if (SoaHelper.IsOnAzure())
                        {
                            info.MachineVirtualName = machineName;
                        }
                        else
                        {
                            info.MachineName = machineName;
                        }

                        info.Location                 = NodeLocationConverter.FromHpcNodeLocation(location);
                        info.ProxyServiceName         = azureServiceName;
                        info.AzureLoadBalancerAddress = azureLoadBalancerAddress;
                        info.State           = TaskStateConverter.FromHpcTaskState(state);
                        info.FirstCoreIndex  = coreindex;
                        info.JobRequeueCount = jobRequeueCount;
                        results.Add(info);
                    }
                    else if (state == TaskState.Failed || state == TaskState.Canceled || state == TaskState.Canceling || state == TaskState.Finished || state == TaskState.Finishing)
                    {
                        TaskInfo info = new TaskInfo();
                        info.Id              = objectId;
                        info.State           = TaskStateConverter.FromHpcTaskState(state);
                        info.JobRequeueCount = jobRequeueCount;
                        results.Add(info);
                    }
                }

                this.lastChangeTime = changeTime;
                return(results);
            }
            catch (Exception ex)
            {
                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Warning, "[JobMonitorEntry] Fail when get task info: {0}", ex);
                return(null);
            }
            finally
            {
                TraceHelper.TraceEvent(this.sessionid, TraceEventType.Verbose, "[JobMonitorEntry] Query task info finished.");
            }
        }