示例#1
0
        private bool IsRebalancingRequired(ClientsZnode clients, ResourcesZnode resources)
        {
            // if this is the first rebalancing as coordinator or the last one was not successful then rebalancing is required
            if (this.store.GetAssignmentStatus() == AssignmentStatus.NoAssignmentYet || !lastRebalancingResult.HasValue || lastRebalancingResult.Value != RebalancingResult.Complete)
                return true;

            // any change to resources requires a rebalancing
            if (resources.HasResourceChange())
                return true;

            // given a client was either added or removed

            // if there are less clients than resources then we require a rebalancing
            if (clients.ClientPaths.Count < resources.Resources.Count)
                return true;

            // given we have an equal or greater number clients than resources

            // if an existing client is currently assigned more than one resource we require a rebalancing
            if (resources.ResourceAssignments.Assignments.GroupBy(x => x.ClientId).Any(x => x.Count() > 1))
                return true;

            // given all existing assignments are one client to one resource

            // if any client for the existing assignments is no longer around then we require a rebalancing
            var clientIds = clients.ClientPaths.Select(GetClientId).ToList();
            foreach (var assignment in resources.ResourceAssignments.Assignments)
            {
                if (!clientIds.Contains(assignment.ClientId, StringComparer.Ordinal))
                    return true;
            }

            // otherwise no rebalancing is required
            return false;
        }
示例#2
0
        private async Task <RebalancingResult> ProcessStatusChangeAsync(CancellationToken rebalancingToken)
        {
            await store.InvokeOnStopActionsAsync(clientId, "Follower");

            ResourcesZnode resources = await zooKeeperService.GetResourcesAsync(null, null);

            List <string> assignedResources = resources.ResourceAssignments.Assignments
                                              .Where(x => x.ClientId.Equals(clientId))
                                              .Select(x => x.Resource)
                                              .ToList();

            if (onStartDelay.Ticks > 0)
            {
                logger.Info(clientId, $"Follower - Delaying on start for {(int)onStartDelay.TotalMilliseconds}ms");
                await WaitFor(onStartDelay, rebalancingToken);
            }

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            await store.InvokeOnStartActionsAsync(clientId, "Follower", assignedResources, rebalancingToken,
                                                  followerToken);

            return(RebalancingResult.Complete);
        }
示例#3
0
        public async Task <BecomeCoordinatorResult> BecomeCoordinatorAsync(int currentEpoch)
        {
            try
            {
                ignoreWatches = false;
                await zooKeeperService.IncrementAndWatchEpochAsync(currentEpoch, this);

                await zooKeeperService.WatchNodesAsync(this);

                ResourcesZnode getResourcesRes = await zooKeeperService.GetResourcesAsync(this, null);

                resourcesVersion = getResourcesRes.Version;
            }
            catch (ZkStaleVersionException e)
            {
                logger.Error(clientId, "Could not become coordinator as a stale version number was used", e);
                return(BecomeCoordinatorResult.StaleEpoch);
            }
            catch (ZkInvalidOperationException e)
            {
                logger.Error(clientId, "Could not become coordinator as an invalid ZooKeeper operation occurred", e);
                return(BecomeCoordinatorResult.Error);
            }

            events.Add(CoordinatorEvent.RebalancingTriggered);
            return(BecomeCoordinatorResult.Ok);
        }
        private async Task <RebalancingResult> AssignResourcesPhaseAsync(CancellationToken rebalancingToken,
                                                                         ResourcesZnode resources,
                                                                         ClientsZnode clients)
        {
            logger.Info(this.clientId, "Coordinator - Assign resources to clients");
            var resourcesToAssign   = new Queue <string>(resources.Resources);
            var resourceAssignments = new List <ResourceAssignment>();
            var clientIndex         = 0;

            while (resourcesToAssign.Any())
            {
                resourceAssignments.Add(new ResourceAssignment()
                {
                    ClientId = GetClientId(clients.ClientPaths[clientIndex]),
                    Resource = resourcesToAssign.Dequeue()
                });

                clientIndex++;
                if (clientIndex >= clients.ClientPaths.Count)
                {
                    clientIndex = 0;
                }
            }

            // write assignments back to resources znode
            resources.ResourceAssignments.Assignments = resourceAssignments;
            this.resourcesVersion = await this.zooKeeperService.SetResourcesAsync(resources);

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            this.status.RebalancingStatus = RebalancingStatus.ResourcesGranted;
            this.status.Version           = await this.zooKeeperService.SetStatus(this.status);

            if (this.onStartDelay.Ticks > 0)
            {
                this.logger.Info(this.clientId, $"Coordinator - Delaying on start for {(int)this.onStartDelay.TotalMilliseconds}ms");
                await WaitFor(this.onStartDelay, rebalancingToken);
            }

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            var leaderAssignments = resourceAssignments.Where(x => x.ClientId == this.clientId).Select(x => x.Resource).ToList();

            await this.store.InvokeOnStartActionsAsync(this.clientId, "Coordinator", leaderAssignments, rebalancingToken, this.coordinatorToken);

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            return(RebalancingResult.Complete);
        }
示例#5
0
        private async Task CheckForRebalancingAsync()
        {
            ResourcesZnode resources = await zooKeeperService.GetResourcesAsync(null, null);

            List <string> assignedResources = resources.ResourceAssignments.Assignments
                                              .Where(x => x.ClientId.Equals(clientId))
                                              .Select(x => x.Resource)
                                              .ToList();

            if (assignedResources.Any())
            {
                events.Add(FollowerEvent.RebalancingTriggered);
            }
        }
示例#6
0
        private async Task <RebalancingResult> RebalanceAsync(CancellationToken rebalancingToken)
        {
            Stopwatch sw = new();

            sw.Start();

            logger.Info(clientId, "Coordinator - Get clients and resources list");
            ClientsZnode clients = await zooKeeperService.GetActiveClientsAsync();

            ResourcesZnode resources = await zooKeeperService.GetResourcesAsync(null, null);

            if (resources.Version != resourcesVersion)
            {
                throw new ZkStaleVersionException(
                          "Resources znode version does not match expected value, indicates another client has been made coordinator and is executing a rebalancing.");
            }

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            // if no resources were changed and there are more clients than resources then check
            // to see if rebalancing is necessary. If existing assignments are still valid then
            // a new client or the loss of a client with no assignments need not trigger a rebalancing
            if (!IsRebalancingRequired(clients, resources))
            {
                logger.Info(clientId,
                            "Coordinator - No rebalancing required. No resource change. No change to existing clients. More clients than resources.");
                return(RebalancingResult.Complete);
            }

            logger.Info(clientId,
                        $"Coordinator - Assign resources ({string.Join(",", resources.Resources)}) to clients ({string.Join(",", clients.ClientPaths.Select(GetClientId))})");
            Queue <string>            resourcesToAssign   = new(resources.Resources);
            List <ResourceAssignment> resourceAssignments = new();
            int clientIndex = 0;

            while (resourcesToAssign.Any())
            {
                resourceAssignments.Add(new ResourceAssignment
                {
                    ClientId = GetClientId(clients.ClientPaths[clientIndex]), Resource = resourcesToAssign.Dequeue()
                });

                clientIndex++;
                if (clientIndex >= clients.ClientPaths.Count)
                {
                    clientIndex = 0;
                }
            }

            // write assignments back to resources znode
            resources.ResourceAssignments.Assignments = resourceAssignments;
            resourcesVersion = await zooKeeperService.SetResourcesAsync(resources);

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            await store.InvokeOnStopActionsAsync(clientId, "Coordinator");

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            if (onStartDelay.Ticks > 0)
            {
                logger.Info(clientId, $"Coordinator - Delaying on start for {(int)onStartDelay.TotalMilliseconds}ms");
                await WaitFor(onStartDelay, rebalancingToken);
            }

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            List <string> leaderAssignments = resourceAssignments
                                              .Where(x => x.ClientId == clientId)
                                              .Select(x => x.Resource)
                                              .ToList();
            await store.InvokeOnStartActionsAsync(clientId, "Coordinator", leaderAssignments, rebalancingToken,
                                                  coordinatorToken);

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            return(RebalancingResult.Complete);
        }
示例#7
0
        private async Task <RebalancingResult> ProcessStatusChangeAsync(CancellationToken rebalancingToken)
        {
            StatusZnode status = await zooKeeperService.WatchStatusAsync(this);

            if (status.Version != statusVersion)
            {
                logger.Warn(clientId, "Follower - The status has changed between the notification and response");
            }

            if (rebalancingToken.IsCancellationRequested)
            {
                return(RebalancingResult.Cancelled);
            }

            if (status.RebalancingStatus == RebalancingStatus.StopActivity)
            {
                logger.Info(clientId, "Follower - Status change received - stop activity");
                await store.InvokeOnStopActionsAsync(clientId, "Follower");

                if (rebalancingToken.IsCancellationRequested)
                {
                    return(RebalancingResult.Cancelled);
                }

                await zooKeeperService.SetFollowerAsStopped(clientId);

                logger.Info(clientId, "Follower - Created follower stopped node");
            }
            else if (status.RebalancingStatus == RebalancingStatus.ResourcesGranted)
            {
                logger.Info(clientId, "Follower - Status change received - resources granted");
                ResourcesZnode resources = await zooKeeperService.GetResourcesAsync(null, null);

                List <string> assignedResources = resources.ResourceAssignments.Assignments
                                                  .Where(x => x.ClientId.Equals(clientId))
                                                  .Select(x => x.Resource)
                                                  .ToList();

                logger.Info(clientId, $"Follower - {assignedResources.Count} resources granted");

                if (store.IsInStartedState())
                {
                    logger.Warn(clientId,
                                "Follower - The resources granted status change has been received while already in the started state. Stopped all activity first");
                    await store.InvokeOnStopActionsAsync(clientId, "Follower");
                }

                if (onStartDelay.Ticks > 0)
                {
                    logger.Info(clientId, $"Follower - Delaying on start for {(int)onStartDelay.TotalMilliseconds}ms");
                    await WaitFor(onStartDelay, rebalancingToken);
                }

                if (rebalancingToken.IsCancellationRequested)
                {
                    return(RebalancingResult.Cancelled);
                }

                await store.InvokeOnStartActionsAsync(clientId, "Follower", assignedResources, rebalancingToken,
                                                      followerToken);

                if (rebalancingToken.IsCancellationRequested)
                {
                    return(RebalancingResult.Cancelled);
                }

                await zooKeeperService.SetFollowerAsStarted(clientId);

                logger.Info(clientId, "Follower - Removed follower stopped node");
            }
            else if (status.RebalancingStatus == RebalancingStatus.StartConfirmed)
            {
                logger.Info(clientId, "Follower - All followers confirm started"); // no longer used
            }
            else
            {
                logger.Error(clientId, "Follower - Non-supported status received - ignoring");
            }

            return(RebalancingResult.Complete);
        }
示例#8
0
        private async Task <StopPhaseResult> StopActivityPhaseAsync(CancellationToken rebalancingToken)
        {
            logger.Info(clientId, "Coordinator - Get active clients and resources");
            ClientsZnode clients = await zooKeeperService.GetActiveClientsAsync();

            List <string>  followerIds = clients.ClientPaths.Select(GetClientId).Where(x => x != clientId).ToList();
            ResourcesZnode resources   = await zooKeeperService.GetResourcesAsync(null, null);

            logger.Info(clientId,
                        $"Coordinator - {followerIds.Count} followers in scope and {resources.Resources.Count} resources in scope");
            logger.Info(clientId,
                        $"Coordinator - Assign resources ({string.Join(",", resources.Resources)}) to clients ({string.Join(",", clients.ClientPaths.Select(GetClientId))})");

            if (resources.Version != resourcesVersion)
            {
                throw new ZkStaleVersionException(
                          "Resources znode version does not match expected value, indicates another client has been made coordinator and is executing a rebalancing.");
            }

            if (rebalancingToken.IsCancellationRequested)
            {
                return(new StopPhaseResult(RebalancingResult.Cancelled));
            }

            // if no resources were changed and there are more clients than resources then check
            // to see if rebalancing is necessary. If existing assignments are still valid then
            // a new client or the loss of a client with no assignments need not trigger a rebalancing
            if (!IsRebalancingRequired(clients, resources))
            {
                logger.Info(clientId,
                            "Coordinator - No rebalancing required. No resource change. No change to existing assigned clients. More clients than resources.");
                return(new StopPhaseResult(RebalancingResult.NotRequired));
            }

            logger.Info(clientId, "Coordinator - Command followers to stop");
            status.RebalancingStatus = RebalancingStatus.StopActivity;
            status.Version           = await zooKeeperService.SetStatus(status);

            if (rebalancingToken.IsCancellationRequested)
            {
                return(new StopPhaseResult(RebalancingResult.Cancelled));
            }

            await store.InvokeOnStopActionsAsync(clientId, "Coordinator");

            // wait for confirmation that all followers have stopped or for time limit
            while (!rebalancingToken.IsCancellationRequested)
            {
                List <string> stopped = await zooKeeperService.GetStoppedAsync();

                if (AreClientsStopped(followerIds, stopped))
                {
                    logger.Info(clientId, $"Coordinator - All {stopped.Count} in scope followers have stopped");
                    break;
                }

                // check that a client hasn't died mid-rebalancing, if so, trigger a new rebalancing and abort this one.
                // else wait and check again
                ClientsZnode latestClients = await zooKeeperService.GetActiveClientsAsync();

                List <string> missingClients = GetMissing(followerIds, latestClients.ClientPaths);
                if (missingClients.Any())
                {
                    logger.Info(clientId,
                                $"Coordinator - {missingClients.Count} followers have disappeared. Missing: {string.Join(",", missingClients)}. Triggering new rebalancing.");
                    events.Add(CoordinatorEvent.RebalancingTriggered);
                    return(new StopPhaseResult(RebalancingResult.Cancelled));
                }

                List <string> pendingClientIds = GetMissing(followerIds, stopped);
                logger.Info(clientId,
                            $"Coordinator - waiting for followers to stop: {string.Join(",", pendingClientIds)}");
                await WaitFor(TimeSpan.FromSeconds(2)); // try again in 2s
            }

            if (rebalancingToken.IsCancellationRequested)
            {
                return(new StopPhaseResult(RebalancingResult.Cancelled));
            }

            StopPhaseResult phaseResult = new(RebalancingResult.Complete)
            {
                ResourcesZnode = resources, ClientsZnode = clients, FollowerIds = followerIds
            };

            return(phaseResult);
        }