Beispiel #1
0
        public async Task AllocateShards()
        {
            while (true)
            {
                try
                {
                    if (_nodeStateService.Role == NodeState.Leader && _nodeStateService.InCluster)
                    {
                        _logger.LogDebug("Allocating shards...");
                        var updates   = new List <BaseCommand>();
                        var newTasks  = new List <BaseTask>();
                        var allShards = _stateMachine.GetShards();

                        foreach (var shard in allShards)
                        {
                            //Scan for new allocations first
                            var newAllocations = Allocator.GetAllocationCandidates(shard.Id, shard.Type);
                            if (newAllocations.Count() > 0)
                            {
                                _logger.LogInformation("Found new allocations for shard " + shard.Id);
                                updates.Add(new UpdateShardMetadataAllocations()
                                {
                                    StaleAllocationsToAdd = newAllocations.Select(na => na.NodeId).ToHashSet <Guid>(),
                                    ShardId = shard.Id,
                                    Type    = shard.Type
                                });

                                foreach (var candidate in newAllocations)
                                {
                                    var      taskId       = ResyncShard.GetTaskUniqueId(shard.Id, candidate.NodeId);
                                    BaseTask recoveryTask = _stateMachine.GetRunningTask(taskId);
                                    if (recoveryTask == null)
                                    {
                                        newTasks.Add(new ResyncShard()
                                        {
                                            Id              = Guid.NewGuid(),
                                            ShardId         = shard.Id,
                                            NodeId          = candidate.NodeId,
                                            Type            = shard.Type,
                                            UniqueRunningId = taskId,
                                            CreatedOn       = DateTime.UtcNow
                                        });
                                    }
                                }
                            }

                            var staleAllocationsToRemove = new List <Guid>();
                            //Scan for stale Allocations
                            foreach (var staleAllocation in shard.StaleAllocations)
                            {
                                //If the node is just stale then try resync it
                                if (_stateMachine.GetNode(staleAllocation) != null)
                                {
                                    _logger.LogInformation("Found stale allocation " + staleAllocation + " for shard " + shard.Id);
                                    var      taskId       = ResyncShard.GetTaskUniqueId(shard.Id, staleAllocation);
                                    BaseTask recoveryTask = _stateMachine.GetRunningTask(taskId);
                                    if (recoveryTask == null)
                                    {
                                        newTasks.Add(new ResyncShard()
                                        {
                                            Id              = Guid.NewGuid(),
                                            ShardId         = shard.Id,
                                            NodeId          = staleAllocation,
                                            Type            = shard.Type,
                                            UniqueRunningId = taskId,
                                            CreatedOn       = DateTime.UtcNow
                                        });
                                    }
                                }
                                else
                                {
                                    staleAllocationsToRemove.Add(staleAllocation);
                                }
                            }

                            if (staleAllocationsToRemove.Count() > 0)
                            {
                                updates.Add(new UpdateShardMetadataAllocations()
                                {
                                    ShardId = shard.Id,
                                    Type    = shard.Type,
                                    StaleAllocationsToRemove = staleAllocationsToRemove.ToHashSet()
                                });
                            }

                            //If there are new stale allocations
                            var stillInsync      = shard.InsyncAllocations.Where(insync => _stateMachine.IsNodeContactable(insync));
                            var staleAllocations = shard.InsyncAllocations.Where(ia => !stillInsync.Contains(ia));
                            if (staleAllocations.Count() > 0)
                            {
                                if (stillInsync.Count() > 0)
                                {
                                    updates.Add(new UpdateShardMetadataAllocations()
                                    {
                                        ShardId                   = shard.Id,
                                        Type                      = shard.Type,
                                        PrimaryAllocation         = stillInsync.Contains(shard.PrimaryAllocation) ? shard.PrimaryAllocation : stillInsync.First(),
                                        StaleAllocationsToAdd     = staleAllocations.ToHashSet(),
                                        InsyncAllocationsToRemove = staleAllocations.ToHashSet()
                                    });

                                    //Scan for primary allocations or in-sync allocations becoming unavailable
                                    foreach (var staleAllocation in staleAllocations)
                                    {
                                        _logger.LogInformation("Found stale allocation " + staleAllocation + " for shard " + shard.Id);
                                        var      taskId       = ResyncShard.GetTaskUniqueId(shard.Id, staleAllocation);
                                        BaseTask recoveryTask = _stateMachine.GetRunningTask(taskId);
                                        if (recoveryTask == null)
                                        {
                                            newTasks.Add(new ResyncShard()
                                            {
                                                Id              = Guid.NewGuid(),
                                                ShardId         = shard.Id,
                                                NodeId          = staleAllocation,
                                                Type            = shard.Type,
                                                UniqueRunningId = taskId,
                                                CreatedOn       = DateTime.UtcNow
                                            });
                                        }
                                    }
                                }
                                else
                                {
                                    _logger.LogError("Shard " + shard.Id + " has no primaries available, shard is unavailable...");
                                }
                            }

                            //Get the latest position of the shard
                            var latestOperation = await _clusterClient.Send(new RequestShardWriteOperations()
                            {
                                From    = 0,
                                To      = 0,
                                ShardId = shard.Id,
                                Type    = shard.Type
                            });
                        }

                        if (newTasks.Count > 0)
                        {
                            updates.Add(new UpdateClusterTasks()
                            {
                                TasksToAdd = newTasks
                            });
                        }

                        if (updates.Count > 0)
                        {
                            await _clusterClient.Send(new ExecuteCommands()
                            {
                                Commands       = updates,
                                WaitForCommits = true
                            });
                        }

                        await Task.Delay(3000);
                    }
                    else
                    {
                        await Task.Delay(5000);
                    }
                }
                catch (Exception e)
                {
                    _logger.LogError("Failed to allocate shards with error " + e.Message + Environment.NewLine + e.StackTrace);
                }
            }
        }