public async Task PerformActionAsync(IRebalancerLogger logger) { if (Started) { logger.Info("TEST RUNNER", "Stopping client"); Monitor.RegisterRemoveClient(Id); await StopAsync(); logger.Info("TEST RUNNER", "Stopped client"); } else { logger.Info("TEST RUNNER", "Starting client"); await StartAsync(logger); logger.Info("TEST RUNNER", "Started client"); } }
private void CreateNewClient(IRebalancerLogger logger) { Id = $"Client{ClientNumber}"; ClientNumber++; Monitor.RegisterAddClient(Id); Client = new RebalancerClient(); Client.OnAssignment += (sender, args) => { Resources = args.Resources; foreach (string resource in args.Resources) { Monitor.ClaimResource(resource, Id); } if (onStartTime > TimeSpan.Zero) { if (randomiseTimes) { double waitTime = onStartTime.TotalMilliseconds * rand.NextDouble(); Thread.Sleep((int)waitTime); } else { Thread.Sleep(onStartTime); } } }; Client.OnUnassignment += (sender, args) => { foreach (string resource in Resources) { Monitor.ReleaseResource(resource, Id); } Resources.Clear(); if (onStopTime > TimeSpan.Zero) { if (randomiseTimes) { double waitTime = onStopTime.TotalMilliseconds * rand.NextDouble(); Thread.Sleep((int)waitTime); } else { Thread.Sleep(onStopTime); } } }; Client.OnAborted += (sender, args) => { logger.Info("CLIENT", $"CLIENT ABORTED: {args.AbortReason}"); }; }
private async Task TriggerRebalancingAsync(Guid coordinatorClientId, ClientEvent clientEvent, List <Client> clients, List <string> resources, OnChangeActions onChangeActions, CancellationToken token) { logger.Info(coordinatorClientId.ToString(), "---------- Rebalancing triggered -----------"); // request stop of all clients logger.Info(coordinatorClientId.ToString(), "COORDINATOR: Requested stop"); if (clients.Any()) { ModifyClientResult result = await clientService.StopActivityAsync(clientEvent.FencingToken, clients); if (result == ModifyClientResult.FencingTokenViolation) { clientEvent.CoordinatorToken.FencingTokenViolation = true; return; } if (result == ModifyClientResult.Error) { logger.Error(coordinatorClientId.ToString(), "COORDINATOR: Rebalancing error"); return; } } // stop all resource activity in local coordinator client foreach (Action onStopAction in onChangeActions.OnStopActions) { onStopAction.Invoke(); } // wait for all live clients to confirm stopped bool allClientsWaiting = false; List <Client> clientsNow = null; while (!allClientsWaiting && !token.IsCancellationRequested) { await WaitFor(TimeSpan.FromSeconds(5), token); clientsNow = await GetLiveClientsAsync(clientEvent, coordinatorClientId); if (!clientsNow.Any()) { allClientsWaiting = true; } else { allClientsWaiting = clientsNow.All(x => x.ClientStatus == ClientStatus.Waiting); } } logger.Info(coordinatorClientId.ToString(), "COORDINATOR: Stop confirmed"); // assign resources first to coordinator then to other live clients if (token.IsCancellationRequested) { return; } if (allClientsWaiting) { Queue <string> resourcesToAssign = new(resources); List <ClientStartRequest> clientStartRequests = new(); int remainingClients = clientsNow.Count + 1; int resourcesPerClient = Math.Max(1, resourcesToAssign.Count / remainingClients); ClientStartRequest coordinatorRequest = new() { ClientId = coordinatorClientId }; while (coordinatorRequest.AssignedResources.Count < resourcesPerClient && resourcesToAssign.Any()) { coordinatorRequest.AssignedResources.Add(resourcesToAssign.Dequeue()); } clientStartRequests.Add(coordinatorRequest); remainingClients--; foreach (Client client in clientsNow) { resourcesPerClient = Math.Max(1, resourcesToAssign.Count / remainingClients); ClientStartRequest request = new() { ClientId = client.ClientId }; while (request.AssignedResources.Count < resourcesPerClient && resourcesToAssign.Any()) { request.AssignedResources.Add(resourcesToAssign.Dequeue()); } clientStartRequests.Add(request); remainingClients--; } if (token.IsCancellationRequested) { return; } logger.Info(coordinatorClientId.ToString(), "COORDINATOR: Resources assigned"); ModifyClientResult startResult = await clientService.StartActivityAsync(clientEvent.FencingToken, clientStartRequests); if (startResult == ModifyClientResult.FencingTokenViolation) { clientEvent.CoordinatorToken.FencingTokenViolation = true; return; } if (startResult == ModifyClientResult.Error) { logger.Error(coordinatorClientId.ToString(), "COORDINATOR: Rebalancing error"); return; } store.SetResources(new SetResourcesRequest { AssignmentStatus = AssignmentStatus.ResourcesAssigned, Resources = coordinatorRequest.AssignedResources }); foreach (Action <IList <string> > onStartAction in onChangeActions.OnStartActions) { onStartAction.Invoke(coordinatorRequest.AssignedResources); } logger.Debug(coordinatorClientId.ToString(), "COORDINATOR: Local client started"); List <Guid> clientIds = clientsNow.Select(x => x.ClientId).ToList(); clientIds.Add(coordinatorClientId); this.clients = clientIds; this.resources = resources; logger.Info(coordinatorClientId.ToString(), "---------- Activity Started -----------"); } else { // log it logger.Info(coordinatorClientId.ToString(), "!!!"); } }
public async Task <CoordinatorExitReason> StartEventLoopAsync() { Stopwatch rebalanceTimer = new(); while (!coordinatorToken.IsCancellationRequested) { if (disconnectedTimer.IsRunning && disconnectedTimer.Elapsed > sessionTimeout) { zooKeeperService.SessionExpired(); await CleanUpAsync(); return(CoordinatorExitReason.SessionExpired); } if (events.TryTake(out CoordinatorEvent coordinatorEvent)) { switch (coordinatorEvent) { case CoordinatorEvent.SessionExpired: zooKeeperService.SessionExpired(); await CleanUpAsync(); return(CoordinatorExitReason.SessionExpired); case CoordinatorEvent.NoLongerCoordinator: await CleanUpAsync(); return(CoordinatorExitReason.NoLongerCoordinator); case CoordinatorEvent.PotentialInconsistentState: await CleanUpAsync(); return(CoordinatorExitReason.PotentialInconsistentState); case CoordinatorEvent.FatalError: await CleanUpAsync(); return(CoordinatorExitReason.FatalError); case CoordinatorEvent.RebalancingTriggered: if (events.Any()) { // skip this event. All other events take precedence over rebalancing // there may be multiple rebalancing events, so if the events collection // consists only of rebalancing events then we'll just process the last one } else if (!rebalanceTimer.IsRunning || rebalanceTimer.Elapsed > minimumRebalancingInterval) { await CancelRebalancingIfInProgressAsync(); rebalanceTimer.Reset(); rebalanceTimer.Start(); logger.Info(clientId, "Coordinator - Rebalancing triggered"); rebalancingTask = Task.Run(async() => await TriggerRebalancing(rebalancingCts.Token)); } else { // if enough time has not passed since the last rebalancing just readd it events.Add(CoordinatorEvent.RebalancingTriggered); } break; default: await CleanUpAsync(); return(CoordinatorExitReason.PotentialInconsistentState); } } await WaitFor(TimeSpan.FromSeconds(1)); } if (coordinatorToken.IsCancellationRequested) { await CancelRebalancingIfInProgressAsync(); await zooKeeperService.CloseSessionAsync(); return(CoordinatorExitReason.Cancelled); } return(CoordinatorExitReason.PotentialInconsistentState); // if this happens then we have a correctness bug }
public async Task ExecuteFollowerRoleAsync(Guid followerClientId, ClientEvent clientEvent, OnChangeActions onChangeActions, CancellationToken token) { Client self = await clientService.KeepAliveAsync(followerClientId); logger.Debug(followerClientId.ToString(), $"FOLLOWER : Keep Alive sent. Coordinator: {self.CoordinatorStatus} Client: {self.ClientStatus}"); if (self.CoordinatorStatus == CoordinatorStatus.StopActivity) { if (self.ClientStatus == ClientStatus.Active) { logger.Info(followerClientId.ToString(), "-------------- Stopping activity ---------------"); logger.Debug(followerClientId.ToString(), "FOLLOWER : Invoking on stop actions"); foreach (Action stopAction in onChangeActions.OnStopActions) { stopAction.Invoke(); } store.SetResources(new SetResourcesRequest { AssignmentStatus = AssignmentStatus.AssignmentInProgress, Resources = new List <string>() }); await clientService.SetClientStatusAsync(followerClientId, ClientStatus.Waiting); logger.Info(followerClientId.ToString(), $"FOLLOWER : State= {self.ClientStatus} -> WAITING"); } else { logger.Debug(followerClientId.ToString(), $"FOLLOWER : State= {self.ClientStatus}"); } } else if (self.CoordinatorStatus == CoordinatorStatus.ResourcesGranted) { if (self.ClientStatus == ClientStatus.Waiting) { if (self.AssignedResources.Any()) { store.SetResources(new SetResourcesRequest { AssignmentStatus = AssignmentStatus.ResourcesAssigned, Resources = self.AssignedResources }); } else { store.SetResources(new SetResourcesRequest { AssignmentStatus = AssignmentStatus.NoResourcesAssigned, Resources = new List <string>() }); } if (token.IsCancellationRequested) { return; } await clientService.SetClientStatusAsync(followerClientId, ClientStatus.Active); if (self.AssignedResources.Any()) { logger.Info(followerClientId.ToString(), $"FOLLOWER : Granted resources={string.Join(",", self.AssignedResources)}"); } else { logger.Info(followerClientId.ToString(), "FOLLOWER : No resources available to be assigned."); } foreach (Action <IList <string> > startAction in onChangeActions.OnStartActions) { startAction.Invoke(self.AssignedResources.Any() ? self.AssignedResources : new List <string>()); } logger.Info(followerClientId.ToString(), $"FOLLOWER : State={self.ClientStatus} -> ACTIVE"); logger.Info(followerClientId.ToString(), "-------------- Activity started ---------------"); } else { logger.Debug(followerClientId.ToString(), $"FOLLOWER : State= {self.ClientStatus}"); } } }
public async Task <BecomeFollowerResult> BecomeFollowerAsync() { try { ignoreWatches = false; await zooKeeperService.WatchSiblingNodeAsync(watchSiblingPath, this); logger.Info(clientId, $"Follower - Set a watch on sibling node {watchSiblingPath}"); await zooKeeperService.WatchResourcesDataAsync(this); logger.Info(clientId, "Follower - Set a watch on resources node"); } catch (ZkNoEphemeralNodeWatchException) { logger.Info(clientId, "Follower - Could not set a watch on the sibling node as it has gone"); return(BecomeFollowerResult.WatchSiblingGone); } catch (Exception e) { logger.Error("Follower - Could not become a follower due to an error", e); return(BecomeFollowerResult.Error); } return(BecomeFollowerResult.Ok); }
public async Task TryPutResourceBarrierAsync(string resource, CancellationToken waitToken, IRebalancerLogger logger) { Stopwatch sw = new(); sw.Start(); string actionToPerform = $"try put resource barrier on {resource}"; bool succeeded = false; while (!succeeded) { await BlockUntilConnected(actionToPerform); try { await zookeeper.createAsync( $"{resourcesPath}/{resource}/barrier", Encoding.UTF8.GetBytes(clientId), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL); succeeded = true; } catch (KeeperException.NodeExistsException) { (bool exists, string owner) = await GetResourceBarrierOwnerAsync(resource); if (exists && owner.Equals(clientId)) { succeeded = true; } else { logger.Info(clientId, $"Waiting for {owner} to release its barrier on {resource}"); // wait for two seconds, will retry in next iteration for (int i = 0; i < 20; i++) { await WaitFor(TimeSpan.FromMilliseconds(100)); if (waitToken.IsCancellationRequested) { throw new ZkOperationCancelledException( $"Could not {actionToPerform} as the operation was cancelled."); } } } } catch (KeeperException.NoNodeException e) { throw new ZkInvalidOperationException( $"Could not {actionToPerform} as the resource node does not exist.", e); } catch (KeeperException.ConnectionLossException) { // do nothing, the next iteration will try again } catch (KeeperException.SessionExpiredException e) { throw new ZkSessionExpiredException($"Could not {actionToPerform} as the session has expired: ", e); } catch (Exception e) { throw new ZkInvalidOperationException($"Could not {actionToPerform} due to an unexpected error", e); } } }