Exemplo n.º 1
0
        private async Task ScanTasks()
        {
            while (true)
            {
                if ((_nodeStateService.Role == NodeState.Follower || _nodeStateService.Role == NodeState.Leader) && _nodeStateService.IsBootstrapped && _nodeStateService.InCluster)
                {
                    _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Starting task watch.");
                    //Check tasks assigned to this node
                    var tasks                 = _stateMachine.CurrentState.GetNodeClusterTasks(new ClusterTaskStatuses[] { ClusterTaskStatuses.Created }, _nodeStateService.Id).ToList();
                    var currentTasksNo        = _nodeTasks.Where(t => !t.Value.Task.IsCompleted).Count();
                    var numberOfTasksToAssign = (tasks.Count() > (_clusterOptions.ConcurrentTasks - currentTasksNo)) ? (_clusterOptions.ConcurrentTasks - currentTasksNo) : tasks.Count();

                    _logger.LogDebug(_nodeStateService.GetNodeLogId() + numberOfTasksToAssign + "tasks to run. || " + currentTasksNo);
                    if (numberOfTasksToAssign > 0)
                    {
                        await _clusterClient.Send(new ExecuteCommands()
                        {
                            Commands = new List <BaseCommand>()
                            {
                                new UpdateClusterTasks()
                                {
                                    TasksToUpdate = tasks.GetRange(0, numberOfTasksToAssign).Select(t => new TaskUpdate()
                                    {
                                        Status = ClusterTaskStatuses.InProgress,
                                        TaskId = t.Id
                                    }).ToList()
                                }
                            },
                            WaitForCommits = true
                        });

                        //Create a thread for each task
                        for (var i = 0; i < numberOfTasksToAssign; i++)
                        {
                            _logger.LogDebug(_nodeStateService.GetNodeLogId() + " is starting task " + tasks[i].ToString());
                            try
                            {
                                var newTask = StartNodeTask(SystemExtension.Clone(tasks[i]));
                                _nodeTasks.TryAdd(tasks[i].Id
                                                  , new NodeTaskMetadata()
                                {
                                    Id   = tasks[i].Id,
                                    Task = Task.Run(() => newTask)
                                });
                            }
                            catch (Exception e)
                            {
                                _logger.LogCritical(_nodeStateService.GetNodeLogId() + "Failed to fail step " + tasks[i].Id + " gracefully.");
                            }
                        }
                    }
                }
                await Task.Delay(1000);
            }
        }
Exemplo n.º 2
0
        public async Task <WriteShardDataResponse> WriteShardData(ShardData data, ShardOperationOptions operationType, string operationId, DateTime transactionDate)
        {
            ShardWriteOperation operation = new ShardWriteOperation()
            {
                Data            = data,
                Id              = operationId,
                Operation       = operationType,
                TransactionDate = transactionDate
            };

            ShardWriteOperation lastOperation = await GetOrPopulateOperationCache(operation.Data.ShardId.Value);

            //Start at 1
            operation.Pos = lastOperation == null ? 1 : lastOperation.Pos + 1;
            var hash = lastOperation == null ? "" : lastOperation.ShardHash;

            operation.ShardHash = ObjectUtility.HashStrings(hash, operation.Id);
            _logger.LogDebug(_nodeStateService.GetNodeLogId() + "writing new operation " + operationId + " with data " + Environment.NewLine + JsonConvert.SerializeObject(data, Formatting.Indented));
            //Write the data

            var writeOperation = await _shardRepository.AddShardWriteOperationAsync(operation); //Add shard operation

            if (writeOperation)
            {
                ApplyOperationToDatastore(operation);
                var shardMetadata = _stateMachine.GetShard(operation.Data.ShardType, operation.Data.ShardId.Value);
                //Mark operation as applied
                await _shardRepository.MarkShardWriteOperationAppliedAsync(operation.Id);

                //Update the cache
                UpdateOperationCache(operation.Data.ShardId.Value, operation);
                ConcurrentBag <Guid> InvalidNodes = new ConcurrentBag <Guid>();
                //All allocations except for your own
                var tasks = shardMetadata.InsyncAllocations.Where(id => id != _nodeStateService.Id).Select(async allocation =>
                {
                    try
                    {
                        var result = await _clusterClient.Send(allocation, new ReplicateShardWriteOperation()
                        {
                            Operation = operation
                        });

                        if (result.IsSuccessful)
                        {
                            _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Successfully replicated all " + shardMetadata.Id + "shards.");
                        }
                        else
                        {
                            throw new Exception("Failed to replicate data to shard " + shardMetadata.Id + " to node " + allocation + " for operation " + operation.ToString() + Environment.NewLine + JsonConvert.SerializeObject(operation, Formatting.Indented));
                        }
                    }
                    catch (TaskCanceledException e)
                    {
                        _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to replicate shard " + shardMetadata.Id + " on node " + allocation + " for operation " + operation.Pos + " as request timed out, marking shard as not insync...");
                        InvalidNodes.Add(allocation);
                    }
                    catch (Exception e)
                    {
                        _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to replicate shard " + shardMetadata.Id + " for operation " + operation.Pos + " with error " + e.Message + ", marking shard as not insync..." + Environment.NewLine + e.StackTrace);
                        InvalidNodes.Add(allocation);
                    }
                });

                await Task.WhenAll(tasks);

                if (InvalidNodes.Count() > 0)
                {
                    await _clusterClient.Send(new ExecuteCommands()
                    {
                        Commands = new List <BaseCommand>()
                        {
                            new UpdateShardMetadataAllocations()
                            {
                                ShardId = data.ShardId.Value,
                                Type    = data.ShardType,
                                StaleAllocationsToAdd     = InvalidNodes.ToHashSet(),
                                InsyncAllocationsToRemove = InvalidNodes.ToHashSet()
                            }
                        },
                        WaitForCommits = true
                    });

                    _logger.LogInformation(_nodeStateService.GetNodeLogId() + " had stale virtual machines.");
                }

                return(new WriteShardDataResponse()
                {
                    Pos = operation.Pos,
                    ShardHash = operation.ShardHash,
                    IsSuccessful = true
                });
            }
            else
            {
                return(new WriteShardDataResponse()
                {
                    IsSuccessful = false
                });
            }
        }
Exemplo n.º 3
0
        public async Task <TResponse> Handle <TResponse>(IClusterRequest <TResponse> request) where TResponse : BaseResponse, new()
        {
            try
            {
                DateTime  commandStartTime = DateTime.Now;
                TResponse response;
                switch (request)
                {
                case RequestDataShard t1:
                    response = (TResponse)(object) await RequestDataShardHandler(t1);

                    break;

                case AddShardWriteOperation t1:
                    response = (TResponse)(object) await AddShardWriteOperationHandler(t1);

                    break;

                case RequestCreateIndex t1:
                    response = (TResponse)(object) await RequestCreateIndexHandler(t1);

                    break;

                case AllocateShard t1:
                    response = (TResponse)(object) await AllocateShardHandler(t1);

                    break;

                case ReplicateShardWriteOperation t1:
                    response = (TResponse)(object) await ReplicateShardWriteOperationHandler(t1);

                    break;

                case RequestShardWriteOperations t1:
                    response = (TResponse)(object) await RequestShardWriteOperationsHandler(t1);

                    break;

                case RequestShardSync t1:
                    response = (TResponse)(object) await RequestShardSyncHandler(t1);

                    break;

                default:
                    throw new Exception("Request is not implemented");
                }

                return(response);
            }
            catch (TaskCanceledException e)
            {
                _logger.LogWarning(_nodeStateService.GetNodeLogId() + "Request " + request.RequestName + " timed out...");
                return(new TResponse()
                {
                    IsSuccessful = false
                });
            }
            catch (Exception e)
            {
                _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to handle request " + request.RequestName + " with error " + e.Message + Environment.StackTrace + e.StackTrace);
                return(new TResponse()
                {
                    IsSuccessful = false
                });
            }
        }
Exemplo n.º 4
0
        public async Task <bool> CreateIndexAsync(string type, int dataTransferTimeoutMs, int numberOfShards)
        {
            bool successfulAllocation = false;

            while (!successfulAllocation)
            {
                try
                {
                    //This is for the primary copy
                    var      eligbleNodes = _stateMachine.CurrentState.Nodes.Where(n => n.Value.IsContactable).ToDictionary(k => k.Key, v => v.Value);
                    var      rand         = new Random();
                    DateTime startTime    = DateTime.Now;
                    while (eligbleNodes.Count() == 0)
                    {
                        if ((DateTime.Now - startTime).TotalMilliseconds > dataTransferTimeoutMs)
                        {
                            _logger.LogError("Failed to create indext type " + type + " request timed out...");
                            throw new ClusterOperationTimeoutException("Failed to create indext type " + type + " request timed out...");
                        }
                        _logger.LogWarning(_nodeStateService.GetNodeLogId() + "No eligible nodes found, awaiting eligible nodes.");
                        await Task.Delay(1000);

                        eligbleNodes = _stateMachine.CurrentState.Nodes.Where(n => n.Value.IsContactable).ToDictionary(k => k.Key, v => v.Value);
                    }

                    List <ShardAllocationMetadata> Shards = new List <ShardAllocationMetadata>();

                    for (var i = 0; i < numberOfShards; i++)
                    {
                        Shards.Add(new ShardAllocationMetadata()
                        {
                            InsyncAllocations = eligbleNodes.Keys.ToHashSet(),
                            PrimaryAllocation = eligbleNodes.ElementAt(rand.Next(0, eligbleNodes.Count())).Key,
                            Id   = Guid.NewGuid(),
                            Type = type
                        });

                        foreach (var allocationI in Shards[i].InsyncAllocations)
                        {
                            if (allocationI != _nodeStateService.Id)
                            {
                                await _clusterClient.Send(allocationI, new AllocateShard()
                                {
                                    ShardId = Shards[i].Id,
                                    Type    = type
                                });
                            }
                            else
                            {
                                AllocateShard(Shards[i].Id, type);
                            }
                        }
                    }

                    var result = await _clusterClient.Send(_nodeStateService.CurrentLeader.Value, new ExecuteCommands()
                    {
                        Commands = new List <CreateIndex>()
                        {
                            new CreateIndex()
                            {
                                Type   = type,
                                Shards = Shards
                            }
                        },
                        WaitForCommits = true
                    });

                    successfulAllocation = true;
                }
                catch (Exception e)
                {
                    _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Error while assigning primary node " + e.StackTrace);
                    return(false);
                }
            }
            return(true);
        }
Exemplo n.º 5
0
        public async Task <TResponse> Handle <TResponse>(IClusterRequest <TResponse> request) where TResponse : BaseResponse, new()
        {
            _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Detected RPC " + request.GetType().Name + "." + Environment.NewLine + JsonConvert.SerializeObject(request, Formatting.Indented));
            if (!_nodeStateService.IsBootstrapped)
            {
                _logger.LogDebug(_nodeStateService.GetNodeLogId() + "Node is not ready...");

                return(new TResponse()
                {
                    IsSuccessful = false,
                    ErrorMessage = "Node is not ready..."
                });
            }

            if (IsClusterRequest <TResponse>(request) && !_nodeStateService.InCluster)
            {
                _logger.LogWarning(_nodeStateService.GetNodeLogId() + "Reqeuest rejected, node is not apart of cluster...");
                return(new TResponse()
                {
                    IsSuccessful = false,
                    ErrorMessage = "Node is not apart of cluster..."
                });
            }

            DateTime commandStartTime = DateTime.Now;

            try
            {
                TResponse response;
                switch (request)
                {
                case ExecuteCommands t1:
                    response = await HandleIfLeaderOrReroute(request, async() => (TResponse)(object)await _raftService.Handle(t1));

                    break;

                case RequestVote t1:
                    response = (TResponse)(object)await _raftService.Handle(t1);

                    break;

                case AppendEntry t1:
                    response = (TResponse)(object)await _raftService.Handle(t1);

                    break;

                case InstallSnapshot t1:
                    response = (TResponse)(object)await _raftService.Handle(t1);

                    break;

                case RequestCreateIndex t1:
                    response = await HandleIfLeaderOrReroute(request, async() => (TResponse)(object)await _dataService.Handle(t1));

                    break;

                case AddShardWriteOperation t1:
                    response = (TResponse)(object)await _dataService.Handle(t1);

                    break;

                case RequestDataShard t1:
                    response = (TResponse)(object)await _dataService.Handle(t1);

                    break;

                case AllocateShard t1:
                    response = (TResponse)(object)await _dataService.Handle(t1);

                    break;

                case ReplicateShardWriteOperation t1:
                    response = (TResponse)(object)await _dataService.Handle(t1);

                    break;

                case RequestShardWriteOperations t1:
                    response = (TResponse)(object)await _dataService.Handle(t1);

                    break;

                default:
                    throw new Exception("Request is not implemented");
                }

                if (MetricGenerated != null && _nodeStateService.Role == NodeState.Leader && request.Metric)
                {
                    //Add and send
                    if (!lastMetricGenerated.ContainsKey(request.RequestName))
                    {
                        lastMetricGenerated.TryAdd(request.RequestName, DateTime.Now);
                        MetricGenerated.Invoke(this, new Metric()
                        {
                            Date       = DateTime.Now,
                            IntervalMs = 0,
                            Type       = MetricTypes.ClusterCommandElapsed(request.RequestName),
                            Value      = (DateTime.Now - commandStartTime).TotalMilliseconds
                        });
                    }
                    else if ((DateTime.Now - lastMetricGenerated[request.RequestName]).TotalMilliseconds > _clusterOptions.MetricsIntervalMs)
                    {
                        lastMetricGenerated.TryUpdate(request.RequestName, DateTime.Now, lastMetricGenerated[request.RequestName]);
                        MetricGenerated.Invoke(this, new Metric()
                        {
                            Date       = DateTime.Now,
                            IntervalMs = 0,
                            Type       = MetricTypes.ClusterCommandElapsed(request.RequestName),
                            Value      = (DateTime.Now - commandStartTime).TotalMilliseconds
                        });
                    }
                }

                return(response);
            }
            catch (TaskCanceledException e)
            {
                _logger.LogWarning(_nodeStateService.GetNodeLogId() + "Request " + request.RequestName + " timed out...");
                return(new TResponse()
                {
                    IsSuccessful = false
                });
            }
            catch (Exception e)
            {
                _logger.LogError(_nodeStateService.GetNodeLogId() + "Failed to handle request " + request.RequestName + " with error " + e.Message + Environment.StackTrace + e.StackTrace);
                return(new TResponse()
                {
                    IsSuccessful = false
                });
            }
        }
Exemplo n.º 6
0
        public async Task <ShardData> GetData(Guid objectId, string type, int timeoutMs, Guid?shardId = null)
        {
            Guid?FoundShard  = null;
            Guid?FoundOnNode = null;
            var  currentTime = DateTime.Now;

            if (shardId == null)
            {
                var shards = _stateMachine.GetAllPrimaryShards(type);

                bool      foundResult          = false;
                ShardData finalObject          = null;
                var       totalRespondedShards = 0;

                var tasks = shards.Select(async shard =>
                {
                    if (shard.Value != _nodeStateService.Id)
                    {
                        try
                        {
                            var result = await _clusterClient.Send(shard.Value, new RequestDataShard()
                            {
                                ObjectId = objectId,
                                ShardId  = shard.Key, //Set the shard
                                Type     = type
                            });

                            if (result.IsSuccessful)
                            {
                                foundResult = true;
                                finalObject = result.Data;
                                FoundShard  = result.ShardId;
                                FoundOnNode = result.NodeId;
                            }

                            Interlocked.Increment(ref totalRespondedShards);
                        }
                        catch (Exception e)
                        {
                            _logger.LogError(_nodeStateService.GetNodeLogId() + "Error thrown while getting " + e.Message);
                        }
                    }
                    else
                    {
                        finalObject = await _dataRouter.GetDataAsync(type, objectId);
                        foundResult = finalObject != null ? true : false;
                        FoundShard  = shard.Key;
                        FoundShard  = shard.Value;
                        Interlocked.Increment(ref totalRespondedShards);
                    }
                });

                //Don't await, this will trigger the tasks
                Task.WhenAll(tasks);

                while (!foundResult && totalRespondedShards < shards.Count)
                {
                    if ((DateTime.Now - currentTime).TotalMilliseconds > timeoutMs)
                    {
                        throw new ClusterOperationTimeoutException("Get data request for object " + objectId + " from shard " + shardId + " timed out.");
                    }
                    await Task.Delay(10);
                }

                return(finalObject);
            }
            else
            {
                return(await _dataRouter.GetDataAsync(type, objectId));
            }
        }