Example #1
0
        public async Task <int> ComputeNodeReportedAsync([FromBody] ComputeClusterNodeInformation nodeInfo, CancellationToken token)
        {
            try
            {
                var nodeName = nodeInfo.Name.ToLowerInvariant();

                this.logger.LogInformation("ComputeNodeReported. NodeName {0}, JobCount {1}", nodeName, nodeInfo.Jobs?.Count);

                var nodeTable = this.utilities.GetNodesTable();

                var jsonTableEntity = new JsonTableEntity(
                    this.utilities.NodesPartitionKey,
                    this.utilities.GetHeartbeatKey(nodeName),
                    nodeInfo);

                var result = await nodeTable.ExecuteAsync(TableOperation.InsertOrReplace(jsonTableEntity), null, null, token);

                using (HttpResponseMessage r = new HttpResponseMessage((HttpStatusCode)result.HttpStatusCode))
                {
                    r.EnsureSuccessStatusCode();
                }

                // 30 s
                return(this.utilities.Option.HeartbeatIntervalSeconds * 1000);
            }
            catch (Exception ex)
            {
                this.logger.LogError(ex, "ComputeNodeReported. NodeName {0}, JobCount {1}", nodeInfo.Name, nodeInfo.Jobs?.Count);
            }

            return(this.utilities.Option.RetryOnFailureSeconds * 1000);
        }
Example #2
0
        public async T.Task Sync(ComputeClusterNodeInformation nodeInfo, CancellationToken token)
        {
            var jobsTable = this.Utilities.GetJobsTable();

            foreach (var j in nodeInfo.Jobs)
            {
                var job = await jobsTable.RetrieveAsync <Job>(this.Utilities.GetJobPartitionKey(JobType.ClusRun, j.JobId), this.Utilities.JobEntryKey, token)
                          ?? await jobsTable.RetrieveAsync <Job>(this.Utilities.GetJobPartitionKey(JobType.Diagnostics, j.JobId), this.Utilities.JobEntryKey, token);

                if (job == null || job.State == JobState.Canceled || job.State == JobState.Failed || job.State == JobState.Finished)
                {
                    this.Logger.Warning("Node {0}, {1} job {2} is reported running, but actually {3} in store.", nodeInfo.Name, job?.Type, j.JobId, job == null ? "null" : job.State.ToString());
                    var q = this.Utilities.GetNodeCancelQueue(this.ServerOptions.HostName);

                    // For non-exist job, we don't care about the type, the cancel logic should handle it.
                    await q.AddMessageAsync(new CloudQueueMessage(
                                                JsonConvert.SerializeObject(new TaskEventMessage()
                    {
                        JobId = j.JobId, Id = 0, JobType = job?.Type ?? JobType.ClusRun, RequeueCount = 0, EventVerb = "cancel"
                    })),
                                            null, null, null, null, token);

                    /* no need cancel tasks in this scenario.
                     * // cancel the job and tasks
                     * foreach (var t in j.Tasks)
                     * {
                     *  this.Logger.Information("Node {0}, {1} job {2}, sending cancel for task {3}.{4}.", nodeInfo.Name, job?.Type, j.JobId, t?.TaskId, t?.TaskRequeueCount);
                     *  await q.AddMessageAsync(new CloudQueueMessage(
                     *      JsonConvert.SerializeObject(new TaskEventMessage() { JobId = j.JobId, Id = t.TaskId, JobType = job?.Type ?? JobType.ClusRun, RequeueCount = t.TaskRequeueCount ?? 0, EventVerb = "cancel" })),
                     *      null, null, null, null, token);
                     * }
                     */
                }
            }
        }
        public async T.Task <int> ComputeNodeReportedAsync([FromBody] ComputeClusterNodeInformation nodeInfo, CancellationToken token)
        {
            try
            {
                token.ThrowIfCancellationRequested();
                var nodeName = nodeInfo?.Name?.ToLowerInvariant();

                this.logger.Information("ComputeNodeReported. NodeName {0}, JobCount {1}", nodeName, nodeInfo?.Jobs?.Count);

                var nodeTable = this.utilities.GetNodesTable();

                var result = await nodeTable.InsertOrReplaceAsync(
                    this.utilities.NodesPartitionKey,
                    this.utilities.GetHeartbeatKey(nodeName),
                    nodeInfo,
                    token);

                using (HttpResponseMessage r = new HttpResponseMessage((HttpStatusCode)result.HttpStatusCode))
                {
                    r.EnsureSuccessStatusCode();
                }

                await this.synchronizer.Sync(nodeInfo, token);

                // 30 s
                return(this.utilities.Option.HeartbeatIntervalSeconds * 1000);
            }
            catch (Exception ex)
            {
                this.logger.Error(ex, "ComputeNodeReported. NodeName {0}, JobCount {1}", nodeInfo?.Name, nodeInfo?.Jobs?.Count);
            }

            return(this.utilities.Option.RetryOnFailureSeconds * 1000);
        }
        public int ComputeNodeReported(string nodeName, [FromBody] ComputeClusterNodeInformation nodeInfo)
        {
            try
            {
                var arg = new ComputeNodeInfoEventArg(nodeInfo.Name, nodeInfo);
                LinuxCommunicator.Instance.Tracer.TraceInfo("Linux ComputeNodeReported. NodeName {0}, JobCount {1}", arg.NodeName, arg.NodeInfo.Jobs.Count);
                int nextPing = LinuxCommunicator.Instance.SchedulerCallbacks.ComputeNodeReported(arg);

                return(nextPing);
            }
            catch (Exception ex)
            {
                LinuxCommunicator.Instance.Tracer.TraceException(ex);
            }

            return(5000);
        }
Example #5
0
        public async Task <IActionResult> GetAsync(string name, CancellationToken token)
        {
            name = name.ToLowerInvariant();
            var registrationKey = this.utilities.GetRegistrationKey(name);

            var nodes  = this.utilities.GetNodesTable();
            var result = await nodes.ExecuteAsync(TableOperation.Retrieve <JsonTableEntity>(this.utilities.NodesPartitionKey, registrationKey), null, null, token);

            if (!result.IsSuccessfulStatusCode())
            {
                return(new StatusCodeResult(result.HttpStatusCode));
            }

            ComputeClusterRegistrationInformation registerInfo = (result.Result as JsonTableEntity)?.GetObject <ComputeClusterRegistrationInformation>();

            var heartbeatKey = this.utilities.GetHeartbeatKey(name);

            result = await nodes.ExecuteAsync(TableOperation.Retrieve <JsonTableEntity>(this.utilities.NodesPartitionKey, heartbeatKey), null, null, token);

            if (!result.IsSuccessfulStatusCode())
            {
                return(new StatusCodeResult(result.HttpStatusCode));
            }

            var entity = result.Result as JsonTableEntity;
            ComputeClusterNodeInformation nodeInfo = entity?.GetObject <ComputeClusterNodeInformation>();

            var node = new Node()
            {
                NodeRegistrationInfo = registerInfo, Name = name,
            };

            if (entity?.Timestamp.AddSeconds(this.utilities.Option.MaxMissedHeartbeats * this.utilities.Option.HeartbeatIntervalSeconds) > DateTimeOffset.UtcNow)
            {
                node.Health          = NodeHealth.OK;
                node.RunningJobCount = nodeInfo.Jobs.Count;
                node.EventCount      = 5;
            }
            else
            {
                node.Health = NodeHealth.Error;
            }

            node.State = NodeState.Online;

            var nodeDetails = new NodeDetails()
            {
                NodeInfo = node, Jobs = nodeInfo?.Jobs,
            };

            var metricsKey = this.utilities.GetMinuteHistoryKey();

            result = await nodes.ExecuteAsync(TableOperation.Retrieve <JsonTableEntity>(this.utilities.GetNodePartitionKey(name), metricsKey), null, null, token);

            if (!result.IsSuccessfulStatusCode())
            {
                return(new StatusCodeResult(result.HttpStatusCode));
            }

            var historyEntity = result.Result as JsonTableEntity;

            nodeDetails.History = historyEntity.GetObject <MetricHistory>();

            return(new OkObjectResult(nodeDetails));
        }