Exemple #1
0
        public async Task PerformActionAsync(IRebalancerLogger logger)
        {
            if (Started)
            {
                logger.Info("TEST RUNNER", "Stopping client");
                Monitor.RegisterRemoveClient(Id);
                await StopAsync();

                logger.Info("TEST RUNNER", "Stopped client");
            }
            else
            {
                logger.Info("TEST RUNNER", "Starting client");
                await StartAsync(logger);

                logger.Info("TEST RUNNER", "Started client");
            }
        }
Exemple #2
0
        private void CreateNewClient(IRebalancerLogger logger)
        {
            Id = $"Client{ClientNumber}";
            ClientNumber++;
            Monitor.RegisterAddClient(Id);
            Client = new RebalancerClient();
            Client.OnAssignment += (sender, args) =>
            {
                Resources = args.Resources;
                foreach (string resource in args.Resources)
                {
                    Monitor.ClaimResource(resource, Id);
                }

                if (onStartTime > TimeSpan.Zero)
                {
                    if (randomiseTimes)
                    {
                        double waitTime = onStartTime.TotalMilliseconds * rand.NextDouble();
                        Thread.Sleep((int)waitTime);
                    }
                    else
                    {
                        Thread.Sleep(onStartTime);
                    }
                }
            };

            Client.OnUnassignment += (sender, args) =>
            {
                foreach (string resource in Resources)
                {
                    Monitor.ReleaseResource(resource, Id);
                }

                Resources.Clear();

                if (onStopTime > TimeSpan.Zero)
                {
                    if (randomiseTimes)
                    {
                        double waitTime = onStopTime.TotalMilliseconds * rand.NextDouble();
                        Thread.Sleep((int)waitTime);
                    }
                    else
                    {
                        Thread.Sleep(onStopTime);
                    }
                }
            };

            Client.OnAborted += (sender, args) =>
            {
                logger.Info("CLIENT", $"CLIENT ABORTED: {args.AbortReason}");
            };
        }
Exemple #3
0
        private async Task TriggerRebalancingAsync(Guid coordinatorClientId,
                                                   ClientEvent clientEvent,
                                                   List <Client> clients,
                                                   List <string> resources,
                                                   OnChangeActions onChangeActions,
                                                   CancellationToken token)
        {
            logger.Info(coordinatorClientId.ToString(), "---------- Rebalancing triggered -----------");

            // request stop of all clients
            logger.Info(coordinatorClientId.ToString(), "COORDINATOR: Requested stop");
            if (clients.Any())
            {
                ModifyClientResult result = await clientService.StopActivityAsync(clientEvent.FencingToken, clients);

                if (result == ModifyClientResult.FencingTokenViolation)
                {
                    clientEvent.CoordinatorToken.FencingTokenViolation = true;
                    return;
                }

                if (result == ModifyClientResult.Error)
                {
                    logger.Error(coordinatorClientId.ToString(), "COORDINATOR: Rebalancing error");
                    return;
                }
            }

            // stop all resource activity in local coordinator client
            foreach (Action onStopAction in onChangeActions.OnStopActions)
            {
                onStopAction.Invoke();
            }

            // wait for all live clients to confirm stopped
            bool          allClientsWaiting = false;
            List <Client> clientsNow        = null;

            while (!allClientsWaiting && !token.IsCancellationRequested)
            {
                await WaitFor(TimeSpan.FromSeconds(5), token);

                clientsNow = await GetLiveClientsAsync(clientEvent, coordinatorClientId);

                if (!clientsNow.Any())
                {
                    allClientsWaiting = true;
                }
                else
                {
                    allClientsWaiting = clientsNow.All(x => x.ClientStatus == ClientStatus.Waiting);
                }
            }

            logger.Info(coordinatorClientId.ToString(), "COORDINATOR: Stop confirmed");

            // assign resources first to coordinator then to other live clients
            if (token.IsCancellationRequested)
            {
                return;
            }

            if (allClientsWaiting)
            {
                Queue <string>            resourcesToAssign   = new(resources);
                List <ClientStartRequest> clientStartRequests = new();
                int remainingClients   = clientsNow.Count + 1;
                int resourcesPerClient = Math.Max(1, resourcesToAssign.Count / remainingClients);

                ClientStartRequest coordinatorRequest = new() { ClientId = coordinatorClientId };
                while (coordinatorRequest.AssignedResources.Count < resourcesPerClient && resourcesToAssign.Any())
                {
                    coordinatorRequest.AssignedResources.Add(resourcesToAssign.Dequeue());
                }

                clientStartRequests.Add(coordinatorRequest);
                remainingClients--;

                foreach (Client client in clientsNow)
                {
                    resourcesPerClient = Math.Max(1, resourcesToAssign.Count / remainingClients);

                    ClientStartRequest request = new() { ClientId = client.ClientId };

                    while (request.AssignedResources.Count < resourcesPerClient && resourcesToAssign.Any())
                    {
                        request.AssignedResources.Add(resourcesToAssign.Dequeue());
                    }

                    clientStartRequests.Add(request);
                    remainingClients--;
                }

                if (token.IsCancellationRequested)
                {
                    return;
                }

                logger.Info(coordinatorClientId.ToString(), "COORDINATOR: Resources assigned");
                ModifyClientResult startResult =
                    await clientService.StartActivityAsync(clientEvent.FencingToken, clientStartRequests);

                if (startResult == ModifyClientResult.FencingTokenViolation)
                {
                    clientEvent.CoordinatorToken.FencingTokenViolation = true;
                    return;
                }

                if (startResult == ModifyClientResult.Error)
                {
                    logger.Error(coordinatorClientId.ToString(), "COORDINATOR: Rebalancing error");
                    return;
                }

                store.SetResources(new SetResourcesRequest
                {
                    AssignmentStatus = AssignmentStatus.ResourcesAssigned,
                    Resources        = coordinatorRequest.AssignedResources
                });
                foreach (Action <IList <string> > onStartAction in onChangeActions.OnStartActions)
                {
                    onStartAction.Invoke(coordinatorRequest.AssignedResources);
                }

                logger.Debug(coordinatorClientId.ToString(), "COORDINATOR: Local client started");

                List <Guid> clientIds = clientsNow.Select(x => x.ClientId).ToList();
                clientIds.Add(coordinatorClientId);
                this.clients   = clientIds;
                this.resources = resources;
                logger.Info(coordinatorClientId.ToString(), "---------- Activity Started -----------");
            }
            else
            {
                // log it
                logger.Info(coordinatorClientId.ToString(), "!!!");
            }
        }
Exemple #4
0
        public async Task <CoordinatorExitReason> StartEventLoopAsync()
        {
            Stopwatch rebalanceTimer = new();

            while (!coordinatorToken.IsCancellationRequested)
            {
                if (disconnectedTimer.IsRunning && disconnectedTimer.Elapsed > sessionTimeout)
                {
                    zooKeeperService.SessionExpired();
                    await CleanUpAsync();

                    return(CoordinatorExitReason.SessionExpired);
                }

                if (events.TryTake(out CoordinatorEvent coordinatorEvent))
                {
                    switch (coordinatorEvent)
                    {
                    case CoordinatorEvent.SessionExpired:
                        zooKeeperService.SessionExpired();
                        await CleanUpAsync();

                        return(CoordinatorExitReason.SessionExpired);

                    case CoordinatorEvent.NoLongerCoordinator:
                        await CleanUpAsync();

                        return(CoordinatorExitReason.NoLongerCoordinator);

                    case CoordinatorEvent.PotentialInconsistentState:
                        await CleanUpAsync();

                        return(CoordinatorExitReason.PotentialInconsistentState);

                    case CoordinatorEvent.FatalError:
                        await CleanUpAsync();

                        return(CoordinatorExitReason.FatalError);

                    case CoordinatorEvent.RebalancingTriggered:
                        if (events.Any())
                        {
                            // skip this event. All other events take precedence over rebalancing
                            // there may be multiple rebalancing events, so if the events collection
                            // consists only of rebalancing events then we'll just process the last one
                        }
                        else if (!rebalanceTimer.IsRunning || rebalanceTimer.Elapsed > minimumRebalancingInterval)
                        {
                            await CancelRebalancingIfInProgressAsync();

                            rebalanceTimer.Reset();
                            rebalanceTimer.Start();
                            logger.Info(clientId, "Coordinator - Rebalancing triggered");
                            rebalancingTask = Task.Run(async() => await TriggerRebalancing(rebalancingCts.Token));
                        }
                        else
                        {
                            // if enough time has not passed since the last rebalancing just readd it
                            events.Add(CoordinatorEvent.RebalancingTriggered);
                        }

                        break;

                    default:
                        await CleanUpAsync();

                        return(CoordinatorExitReason.PotentialInconsistentState);
                    }
                }

                await WaitFor(TimeSpan.FromSeconds(1));
            }

            if (coordinatorToken.IsCancellationRequested)
            {
                await CancelRebalancingIfInProgressAsync();

                await zooKeeperService.CloseSessionAsync();

                return(CoordinatorExitReason.Cancelled);
            }

            return(CoordinatorExitReason.PotentialInconsistentState); // if this happens then we have a correctness bug
        }
Exemple #5
0
        public async Task ExecuteFollowerRoleAsync(Guid followerClientId,
                                                   ClientEvent clientEvent,
                                                   OnChangeActions onChangeActions,
                                                   CancellationToken token)
        {
            Client self = await clientService.KeepAliveAsync(followerClientId);

            logger.Debug(followerClientId.ToString(),
                         $"FOLLOWER : Keep Alive sent. Coordinator: {self.CoordinatorStatus} Client: {self.ClientStatus}");
            if (self.CoordinatorStatus == CoordinatorStatus.StopActivity)
            {
                if (self.ClientStatus == ClientStatus.Active)
                {
                    logger.Info(followerClientId.ToString(), "-------------- Stopping activity ---------------");
                    logger.Debug(followerClientId.ToString(), "FOLLOWER : Invoking on stop actions");
                    foreach (Action stopAction in onChangeActions.OnStopActions)
                    {
                        stopAction.Invoke();
                    }

                    store.SetResources(new SetResourcesRequest
                    {
                        AssignmentStatus = AssignmentStatus.AssignmentInProgress, Resources = new List <string>()
                    });
                    await clientService.SetClientStatusAsync(followerClientId, ClientStatus.Waiting);

                    logger.Info(followerClientId.ToString(), $"FOLLOWER : State= {self.ClientStatus} -> WAITING");
                }
                else
                {
                    logger.Debug(followerClientId.ToString(), $"FOLLOWER : State= {self.ClientStatus}");
                }
            }
            else if (self.CoordinatorStatus == CoordinatorStatus.ResourcesGranted)
            {
                if (self.ClientStatus == ClientStatus.Waiting)
                {
                    if (self.AssignedResources.Any())
                    {
                        store.SetResources(new SetResourcesRequest
                        {
                            AssignmentStatus = AssignmentStatus.ResourcesAssigned,
                            Resources        = self.AssignedResources
                        });
                    }
                    else
                    {
                        store.SetResources(new SetResourcesRequest
                        {
                            AssignmentStatus = AssignmentStatus.NoResourcesAssigned, Resources = new List <string>()
                        });
                    }

                    if (token.IsCancellationRequested)
                    {
                        return;
                    }

                    await clientService.SetClientStatusAsync(followerClientId, ClientStatus.Active);

                    if (self.AssignedResources.Any())
                    {
                        logger.Info(followerClientId.ToString(),
                                    $"FOLLOWER : Granted resources={string.Join(",", self.AssignedResources)}");
                    }
                    else
                    {
                        logger.Info(followerClientId.ToString(), "FOLLOWER : No resources available to be assigned.");
                    }

                    foreach (Action <IList <string> > startAction in onChangeActions.OnStartActions)
                    {
                        startAction.Invoke(self.AssignedResources.Any() ? self.AssignedResources : new List <string>());
                    }

                    logger.Info(followerClientId.ToString(), $"FOLLOWER : State={self.ClientStatus} -> ACTIVE");
                    logger.Info(followerClientId.ToString(), "-------------- Activity started ---------------");
                }
                else
                {
                    logger.Debug(followerClientId.ToString(), $"FOLLOWER : State= {self.ClientStatus}");
                }
            }
        }
Exemple #6
0
        public async Task <BecomeFollowerResult> BecomeFollowerAsync()
        {
            try
            {
                ignoreWatches = false;
                await zooKeeperService.WatchSiblingNodeAsync(watchSiblingPath, this);

                logger.Info(clientId, $"Follower - Set a watch on sibling node {watchSiblingPath}");

                await zooKeeperService.WatchResourcesDataAsync(this);

                logger.Info(clientId, "Follower - Set a watch on resources node");
            }
            catch (ZkNoEphemeralNodeWatchException)
            {
                logger.Info(clientId, "Follower - Could not set a watch on the sibling node as it has gone");
                return(BecomeFollowerResult.WatchSiblingGone);
            }
            catch (Exception e)
            {
                logger.Error("Follower - Could not become a follower due to an error", e);
                return(BecomeFollowerResult.Error);
            }

            return(BecomeFollowerResult.Ok);
        }
Exemple #7
0
        public async Task TryPutResourceBarrierAsync(string resource, CancellationToken waitToken,
                                                     IRebalancerLogger logger)
        {
            Stopwatch sw = new();

            sw.Start();
            string actionToPerform = $"try put resource barrier on {resource}";
            bool   succeeded       = false;

            while (!succeeded)
            {
                await BlockUntilConnected(actionToPerform);

                try
                {
                    await zookeeper.createAsync(
                        $"{resourcesPath}/{resource}/barrier",
                        Encoding.UTF8.GetBytes(clientId),
                        ZooDefs.Ids.OPEN_ACL_UNSAFE,
                        CreateMode.EPHEMERAL);

                    succeeded = true;
                }
                catch (KeeperException.NodeExistsException)
                {
                    (bool exists, string owner) = await GetResourceBarrierOwnerAsync(resource);

                    if (exists && owner.Equals(clientId))
                    {
                        succeeded = true;
                    }
                    else
                    {
                        logger.Info(clientId, $"Waiting for {owner} to release its barrier on {resource}");
                        // wait for two seconds, will retry in next iteration
                        for (int i = 0; i < 20; i++)
                        {
                            await WaitFor(TimeSpan.FromMilliseconds(100));

                            if (waitToken.IsCancellationRequested)
                            {
                                throw new ZkOperationCancelledException(
                                          $"Could not {actionToPerform} as the operation was cancelled.");
                            }
                        }
                    }
                }
                catch (KeeperException.NoNodeException e)
                {
                    throw new ZkInvalidOperationException(
                              $"Could not {actionToPerform} as the resource node does not exist.", e);
                }
                catch (KeeperException.ConnectionLossException)
                {
                    // do nothing, the next iteration will try again
                }
                catch (KeeperException.SessionExpiredException e)
                {
                    throw new ZkSessionExpiredException($"Could not {actionToPerform} as the session has expired: ", e);
                }
                catch (Exception e)
                {
                    throw new ZkInvalidOperationException($"Could not {actionToPerform} due to an unexpected error", e);
                }
            }
        }