예제 #1
0
        // Calls the Batch service to get job metrics. This is done in two parts:
        //
        // 1. List all jobs in the account.
        // 2. For each job, collect metrics for that job (see CollectTaskMetricsAsync).
        //
        // For simplicity, job metrics (step 2) are collected serially.  You could reduce latency
        // by performing the CollectTaskMetricsAsync calls in parallel, but would need to
        // take care to synchronize access to the MetricsBuilder that accumulates the results.
        private async Task <MetricEvent> CollectMetricsAsync()
        {
            MetricEvent.Builder metricsBuilder = new MetricEvent.Builder {
                CollectionStarted = DateTime.UtcNow
            };

            try
            {
                var totalLatencyStopWatch = Stopwatch.StartNew();

                var listJobsTimer = Stopwatch.StartNew();
                var jobs          = await this.batchClient.JobOperations.ListJobs(DetailLevels.IdAndState.AllEntities).ToListAsync(this.runCancel.Token);

                listJobsTimer.Stop();

                metricsBuilder.ListJobsLatency = listJobsTimer.Elapsed;

                foreach (var job in jobs)
                {
                    await CollectTaskMetricsAsync(metricsBuilder, job);
                }

                totalLatencyStopWatch.Stop();
                metricsBuilder.TotalLatency        = totalLatencyStopWatch.Elapsed;
                metricsBuilder.CollectionCompleted = DateTime.UtcNow;

                return(metricsBuilder.Build());
            }
            catch (Exception ex)
            {
                return(new MetricEvent(metricsBuilder.CollectionStarted, DateTime.UtcNow, ex));
            }
        }
예제 #2
0
        // Calls the Batch service to get metrics for a single job.  The first time the
        // MetricMonitor sees a job, it creates a TaskStateCache to hold task state information,
        // and queries the states of *all* tasks in the job. Subsequent times, it queries
        // only for tasks whose states have changed since the previous query -- this significant
        // reduces download volumes for large jobs. In either case, it then updates the
        // cached task states and aggregates them into a TaskStateCounts object.
        private async Task CollectTaskMetricsAsync(MetricEvent.Builder metricsBuilder, CloudJob job)
        {
            TaskStateCache taskStateCache;

            bool firstTime = !this.jobStateCache.ContainsKey(job.Id);

            if (firstTime)
            {
                taskStateCache = new TaskStateCache();
                this.jobStateCache.Add(job.Id, taskStateCache);
            }
            else
            {
                taskStateCache = this.jobStateCache[job.Id];
            }

            // If the monitor API is called for the first time, it has to issue a query to enumerate all the tasks once to get its state.
            // This is a relatively slow query.
            // Subsequent calls to the monitor API will only look for changes to the task state since the last time the query was issued and
            // a clock skew (which is within 30 seconds approximately for Azure). Thus if the monitoring API periodicity is 1 minute, then the query
            // should look for changes in the last minute and 30 seconds.

            // TODO: it would be better to record the time at which the last query was issued and use that,
            // rather than subtracting the monitor interval from the current time
            DateTime since       = DateTime.UtcNow - (this.monitorInterval + MaximumClockSkew);
            var      tasksToList = firstTime ? DetailLevels.IdAndState.AllEntities : DetailLevels.IdAndState.OnlyChangedAfter(since);

            var listTasksTimer = Stopwatch.StartNew();
            var tasks          = await job.ListTasks(tasksToList).ToListAsync(this.runCancel.Token);

            listTasksTimer.Stop();

            var listTasksLatency = listTasksTimer.Elapsed;

            foreach (var task in tasks)
            {
                taskStateCache.UpdateTaskState(task.Id, task.State.Value);
            }

            var taskStateCounts = taskStateCache.GetTaskStateCounts();

            metricsBuilder.JobStats.Add(job.Id, new JobMetrics(listTasksLatency, taskStateCounts));
        }
예제 #3
0
        // Calls the Batch service to get job metrics. This is done in two parts:
        //
        // 1. List all jobs in the account.
        // 2. For each job, collect metrics for that job (see CollectTaskMetricsAsync).
        //
        // For simplicity, job metrics (step 2) are collected serially.  You could reduce latency
        // by performing the CollectTaskMetricsAsync calls in parallel, but would need to
        // take care to synchronize access to the MetricsBuilder that accumulates the results.
        private async Task<MetricEvent> CollectMetricsAsync()
        {
            MetricEvent.Builder metricsBuilder = new MetricEvent.Builder { CollectionStarted = DateTime.UtcNow };

            try
            {
                var totalLatencyStopWatch = Stopwatch.StartNew();

                var listJobsTimer = Stopwatch.StartNew();
                var jobs = await this.batchClient.JobOperations.ListJobs(DetailLevels.IdAndState.AllEntities).ToListAsync(this.runCancel.Token);
                listJobsTimer.Stop();

                metricsBuilder.ListJobsLatency = listJobsTimer.Elapsed;

                foreach (var job in jobs)
                {
                    await CollectTaskMetricsAsync(metricsBuilder, job);
                }

                totalLatencyStopWatch.Stop();
                metricsBuilder.TotalLatency = totalLatencyStopWatch.Elapsed;
                metricsBuilder.CollectionCompleted = DateTime.UtcNow;

                return metricsBuilder.Build();
            }
            catch (Exception ex)
            {
                return new MetricEvent(metricsBuilder.CollectionStarted, DateTime.UtcNow, ex);
            }
        }