private async Task ScanTasks() { while (true) { if ((_nodeStateService.Role == NodeState.Follower || _nodeStateService.Role == NodeState.Leader) && _nodeStateService.IsBootstrapped && _nodeStateService.InCluster) { _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Starting task watch."); //Check tasks assigned to this node var tasks = _stateMachine.CurrentState.GetNodeClusterTasks(new ClusterTaskStatuses[] { ClusterTaskStatuses.Created }, _nodeStateService.Id).ToList(); var currentTasksNo = _nodeTasks.Where(t => !t.Value.Task.IsCompleted).Count(); var numberOfTasksToAssign = (tasks.Count() > (_clusterOptions.ConcurrentTasks - currentTasksNo)) ? (_clusterOptions.ConcurrentTasks - currentTasksNo) : tasks.Count(); _logger.LogDebug(_nodeStateService.GetNodeLogId() + numberOfTasksToAssign + "tasks to run. || " + currentTasksNo); if (numberOfTasksToAssign > 0) { await _clusterClient.Send(new ExecuteCommands() { Commands = new List <BaseCommand>() { new UpdateClusterTasks() { TasksToUpdate = tasks.GetRange(0, numberOfTasksToAssign).Select(t => new TaskUpdate() { Status = ClusterTaskStatuses.InProgress, TaskId = t.Id }).ToList() } }, WaitForCommits = true }); //Create a thread for each task for (var i = 0; i < numberOfTasksToAssign; i++) { _logger.LogDebug(_nodeStateService.GetNodeLogId() + " is starting task " + tasks[i].ToString()); try { var newTask = StartNodeTask(SystemExtension.Clone(tasks[i])); _nodeTasks.TryAdd(tasks[i].Id , new NodeTaskMetadata() { Id = tasks[i].Id, Task = Task.Run(() => newTask) }); } catch (Exception e) { _logger.LogCritical(_nodeStateService.GetNodeLogId() + "Failed to fail step " + tasks[i].Id + " gracefully."); } } } } await Task.Delay(1000); } }
public async Task <WriteShardDataResponse> WriteShardData(ShardData data, ShardOperationOptions operationType, string operationId, DateTime transactionDate) { ShardWriteOperation operation = new ShardWriteOperation() { Data = data, Id = operationId, Operation = operationType, TransactionDate = transactionDate }; ShardWriteOperation lastOperation = await GetOrPopulateOperationCache(operation.Data.ShardId.Value); //Start at 1 operation.Pos = lastOperation == null ? 1 : lastOperation.Pos + 1; var hash = lastOperation == null ? "" : lastOperation.ShardHash; operation.ShardHash = ObjectUtility.HashStrings(hash, operation.Id); _logger.LogDebug(_nodeStateService.GetNodeLogId() + "writing new operation " + operationId + " with data " + Environment.NewLine + JsonConvert.SerializeObject(data, Formatting.Indented)); //Write the data var writeOperation = await _shardRepository.AddShardWriteOperationAsync(operation); //Add shard operation if (writeOperation) { ApplyOperationToDatastore(operation); var shardMetadata = _stateMachine.GetShard(operation.Data.ShardType, operation.Data.ShardId.Value); //Mark operation as applied await _shardRepository.MarkShardWriteOperationAppliedAsync(operation.Id); //Update the cache UpdateOperationCache(operation.Data.ShardId.Value, operation); ConcurrentBag <Guid> InvalidNodes = new ConcurrentBag <Guid>(); //All allocations except for your own var tasks = shardMetadata.InsyncAllocations.Where(id => id != _nodeStateService.Id).Select(async allocation => { try { var result = await _clusterClient.Send(allocation, new ReplicateShardWriteOperation() { Operation = operation }); if (result.IsSuccessful) { _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Successfully replicated all " + shardMetadata.Id + "shards."); } else { throw new Exception("Failed to replicate data to shard " + shardMetadata.Id + " to node " + allocation + " for operation " + operation.ToString() + Environment.NewLine + JsonConvert.SerializeObject(operation, Formatting.Indented)); } } catch (TaskCanceledException e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to replicate shard " + shardMetadata.Id + " on node " + allocation + " for operation " + operation.Pos + " as request timed out, marking shard as not insync..."); InvalidNodes.Add(allocation); } catch (Exception e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to replicate shard " + shardMetadata.Id + " for operation " + operation.Pos + " with error " + e.Message + ", marking shard as not insync..." + Environment.NewLine + e.StackTrace); InvalidNodes.Add(allocation); } }); await Task.WhenAll(tasks); if (InvalidNodes.Count() > 0) { await _clusterClient.Send(new ExecuteCommands() { Commands = new List <BaseCommand>() { new UpdateShardMetadataAllocations() { ShardId = data.ShardId.Value, Type = data.ShardType, StaleAllocationsToAdd = InvalidNodes.ToHashSet(), InsyncAllocationsToRemove = InvalidNodes.ToHashSet() } }, WaitForCommits = true }); _logger.LogInformation(_nodeStateService.GetNodeLogId() + " had stale virtual machines."); } return(new WriteShardDataResponse() { Pos = operation.Pos, ShardHash = operation.ShardHash, IsSuccessful = true }); } else { return(new WriteShardDataResponse() { IsSuccessful = false }); } }
public async Task <TResponse> Handle <TResponse>(IClusterRequest <TResponse> request) where TResponse : BaseResponse, new() { try { DateTime commandStartTime = DateTime.Now; TResponse response; switch (request) { case RequestDataShard t1: response = (TResponse)(object) await RequestDataShardHandler(t1); break; case AddShardWriteOperation t1: response = (TResponse)(object) await AddShardWriteOperationHandler(t1); break; case RequestCreateIndex t1: response = (TResponse)(object) await RequestCreateIndexHandler(t1); break; case AllocateShard t1: response = (TResponse)(object) await AllocateShardHandler(t1); break; case ReplicateShardWriteOperation t1: response = (TResponse)(object) await ReplicateShardWriteOperationHandler(t1); break; case RequestShardWriteOperations t1: response = (TResponse)(object) await RequestShardWriteOperationsHandler(t1); break; case RequestShardSync t1: response = (TResponse)(object) await RequestShardSyncHandler(t1); break; default: throw new Exception("Request is not implemented"); } return(response); } catch (TaskCanceledException e) { _logger.LogWarning(_nodeStateService.GetNodeLogId() + "Request " + request.RequestName + " timed out..."); return(new TResponse() { IsSuccessful = false }); } catch (Exception e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to handle request " + request.RequestName + " with error " + e.Message + Environment.StackTrace + e.StackTrace); return(new TResponse() { IsSuccessful = false }); } }
public async Task <bool> CreateIndexAsync(string type, int dataTransferTimeoutMs, int numberOfShards) { bool successfulAllocation = false; while (!successfulAllocation) { try { //This is for the primary copy var eligbleNodes = _stateMachine.CurrentState.Nodes.Where(n => n.Value.IsContactable).ToDictionary(k => k.Key, v => v.Value); var rand = new Random(); DateTime startTime = DateTime.Now; while (eligbleNodes.Count() == 0) { if ((DateTime.Now - startTime).TotalMilliseconds > dataTransferTimeoutMs) { _logger.LogError("Failed to create indext type " + type + " request timed out..."); throw new ClusterOperationTimeoutException("Failed to create indext type " + type + " request timed out..."); } _logger.LogWarning(_nodeStateService.GetNodeLogId() + "No eligible nodes found, awaiting eligible nodes."); await Task.Delay(1000); eligbleNodes = _stateMachine.CurrentState.Nodes.Where(n => n.Value.IsContactable).ToDictionary(k => k.Key, v => v.Value); } List <ShardAllocationMetadata> Shards = new List <ShardAllocationMetadata>(); for (var i = 0; i < numberOfShards; i++) { Shards.Add(new ShardAllocationMetadata() { InsyncAllocations = eligbleNodes.Keys.ToHashSet(), PrimaryAllocation = eligbleNodes.ElementAt(rand.Next(0, eligbleNodes.Count())).Key, Id = Guid.NewGuid(), Type = type }); foreach (var allocationI in Shards[i].InsyncAllocations) { if (allocationI != _nodeStateService.Id) { await _clusterClient.Send(allocationI, new AllocateShard() { ShardId = Shards[i].Id, Type = type }); } else { AllocateShard(Shards[i].Id, type); } } } var result = await _clusterClient.Send(_nodeStateService.CurrentLeader.Value, new ExecuteCommands() { Commands = new List <CreateIndex>() { new CreateIndex() { Type = type, Shards = Shards } }, WaitForCommits = true }); successfulAllocation = true; } catch (Exception e) { _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Error while assigning primary node " + e.StackTrace); return(false); } } return(true); }
public async Task <TResponse> Handle <TResponse>(IClusterRequest <TResponse> request) where TResponse : BaseResponse, new() { _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Detected RPC " + request.GetType().Name + "." + Environment.NewLine + JsonConvert.SerializeObject(request, Formatting.Indented)); if (!_nodeStateService.IsBootstrapped) { _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Node is not ready..."); return(new TResponse() { IsSuccessful = false, ErrorMessage = "Node is not ready..." }); } if (IsClusterRequest <TResponse>(request) && !_nodeStateService.InCluster) { _logger.LogWarning(_nodeStateService.GetNodeLogId() + "Reqeuest rejected, node is not apart of cluster..."); return(new TResponse() { IsSuccessful = false, ErrorMessage = "Node is not apart of cluster..." }); } DateTime commandStartTime = DateTime.Now; try { TResponse response; switch (request) { case ExecuteCommands t1: response = await HandleIfLeaderOrReroute(request, async() => (TResponse)(object)await _raftService.Handle(t1)); break; case RequestVote t1: response = (TResponse)(object)await _raftService.Handle(t1); break; case AppendEntry t1: response = (TResponse)(object)await _raftService.Handle(t1); break; case InstallSnapshot t1: response = (TResponse)(object)await _raftService.Handle(t1); break; case RequestCreateIndex t1: response = await HandleIfLeaderOrReroute(request, async() => (TResponse)(object)await _dataService.Handle(t1)); break; case AddShardWriteOperation t1: response = (TResponse)(object)await _dataService.Handle(t1); break; case RequestDataShard t1: response = (TResponse)(object)await _dataService.Handle(t1); break; case AllocateShard t1: response = (TResponse)(object)await _dataService.Handle(t1); break; case ReplicateShardWriteOperation t1: response = (TResponse)(object)await _dataService.Handle(t1); break; case RequestShardWriteOperations t1: response = (TResponse)(object)await _dataService.Handle(t1); break; default: throw new Exception("Request is not implemented"); } if (MetricGenerated != null && _nodeStateService.Role == NodeState.Leader && request.Metric) { //Add and send if (!lastMetricGenerated.ContainsKey(request.RequestName)) { lastMetricGenerated.TryAdd(request.RequestName, DateTime.Now); MetricGenerated.Invoke(this, new Metric() { Date = DateTime.Now, IntervalMs = 0, Type = MetricTypes.ClusterCommandElapsed(request.RequestName), Value = (DateTime.Now - commandStartTime).TotalMilliseconds }); } else if ((DateTime.Now - lastMetricGenerated[request.RequestName]).TotalMilliseconds > _clusterOptions.MetricsIntervalMs) { lastMetricGenerated.TryUpdate(request.RequestName, DateTime.Now, lastMetricGenerated[request.RequestName]); MetricGenerated.Invoke(this, new Metric() { Date = DateTime.Now, IntervalMs = 0, Type = MetricTypes.ClusterCommandElapsed(request.RequestName), Value = (DateTime.Now - commandStartTime).TotalMilliseconds }); } } return(response); } catch (TaskCanceledException e) { _logger.LogWarning(_nodeStateService.GetNodeLogId() + "Request " + request.RequestName + " timed out..."); return(new TResponse() { IsSuccessful = false }); } catch (Exception e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to handle request " + request.RequestName + " with error " + e.Message + Environment.StackTrace + e.StackTrace); return(new TResponse() { IsSuccessful = false }); } }
public async Task <ShardData> GetData(Guid objectId, string type, int timeoutMs, Guid?shardId = null) { Guid?FoundShard = null; Guid?FoundOnNode = null; var currentTime = DateTime.Now; if (shardId == null) { var shards = _stateMachine.GetAllPrimaryShards(type); bool foundResult = false; ShardData finalObject = null; var totalRespondedShards = 0; var tasks = shards.Select(async shard => { if (shard.Value != _nodeStateService.Id) { try { var result = await _clusterClient.Send(shard.Value, new RequestDataShard() { ObjectId = objectId, ShardId = shard.Key, //Set the shard Type = type }); if (result.IsSuccessful) { foundResult = true; finalObject = result.Data; FoundShard = result.ShardId; FoundOnNode = result.NodeId; } Interlocked.Increment(ref totalRespondedShards); } catch (Exception e) { _logger.LogError(_nodeStateService.GetNodeLogId() + "Error thrown while getting " + e.Message); } } else { finalObject = await _dataRouter.GetDataAsync(type, objectId); foundResult = finalObject != null ? true : false; FoundShard = shard.Key; FoundShard = shard.Value; Interlocked.Increment(ref totalRespondedShards); } }); //Don't await, this will trigger the tasks Task.WhenAll(tasks); while (!foundResult && totalRespondedShards < shards.Count) { if ((DateTime.Now - currentTime).TotalMilliseconds > timeoutMs) { throw new ClusterOperationTimeoutException("Get data request for object " + objectId + " from shard " + shardId + " timed out."); } await Task.Delay(10); } return(finalObject); } else { return(await _dataRouter.GetDataAsync(type, objectId)); } }