Ejemplo n.º 1
0
        public async Task <BecomeCoordinatorResult> BecomeCoordinatorAsync(int currentEpoch)
        {
            try
            {
                ignoreWatches = false;
                await zooKeeperService.IncrementAndWatchEpochAsync(currentEpoch, this);

                await zooKeeperService.WatchNodesAsync(this);

                ResourcesZnode getResourcesRes = await zooKeeperService.GetResourcesAsync(this, null);

                resourcesVersion = getResourcesRes.Version;
            }
            catch (ZkStaleVersionException e)
            {
                logger.Error(clientId, "Could not become coordinator as a stale version number was used", e);
                return(BecomeCoordinatorResult.StaleEpoch);
            }
            catch (ZkInvalidOperationException e)
            {
                logger.Error(clientId, "Could not become coordinator as an invalid ZooKeeper operation occurred", e);
                return(BecomeCoordinatorResult.Error);
            }

            events.Add(CoordinatorEvent.RebalancingTriggered);
            return(BecomeCoordinatorResult.Ok);
        }
Ejemplo n.º 2
0
        public async Task <BecomeFollowerResult> BecomeFollowerAsync()
        {
            try
            {
                ignoreWatches = false;
                await zooKeeperService.WatchSiblingNodeAsync(watchSiblingPath, this);

                logger.Info(clientId, $"Follower - Set a watch on sibling node {watchSiblingPath}");

                await zooKeeperService.WatchResourcesDataAsync(this);

                logger.Info(clientId, "Follower - Set a watch on resources node");
            }
            catch (ZkNoEphemeralNodeWatchException)
            {
                logger.Info(clientId, "Follower - Could not set a watch on the sibling node as it has gone");
                return(BecomeFollowerResult.WatchSiblingGone);
            }
            catch (Exception e)
            {
                logger.Error("Follower - Could not become a follower due to an error", e);
                return(BecomeFollowerResult.Error);
            }

            return(BecomeFollowerResult.Ok);
        }
Ejemplo n.º 3
0
        private async Task TriggerRebalancingAsync(Guid coordinatorClientId,
                                                   ClientEvent clientEvent,
                                                   List <Client> clients,
                                                   List <string> resources,
                                                   OnChangeActions onChangeActions,
                                                   CancellationToken token)
        {
            logger.Info(coordinatorClientId.ToString(), "---------- Rebalancing triggered -----------");

            // request stop of all clients
            logger.Info(coordinatorClientId.ToString(), "COORDINATOR: Requested stop");
            if (clients.Any())
            {
                ModifyClientResult result = await clientService.StopActivityAsync(clientEvent.FencingToken, clients);

                if (result == ModifyClientResult.FencingTokenViolation)
                {
                    clientEvent.CoordinatorToken.FencingTokenViolation = true;
                    return;
                }

                if (result == ModifyClientResult.Error)
                {
                    logger.Error(coordinatorClientId.ToString(), "COORDINATOR: Rebalancing error");
                    return;
                }
            }

            // stop all resource activity in local coordinator client
            foreach (Action onStopAction in onChangeActions.OnStopActions)
            {
                onStopAction.Invoke();
            }

            // wait for all live clients to confirm stopped
            bool          allClientsWaiting = false;
            List <Client> clientsNow        = null;

            while (!allClientsWaiting && !token.IsCancellationRequested)
            {
                await WaitFor(TimeSpan.FromSeconds(5), token);

                clientsNow = await GetLiveClientsAsync(clientEvent, coordinatorClientId);

                if (!clientsNow.Any())
                {
                    allClientsWaiting = true;
                }
                else
                {
                    allClientsWaiting = clientsNow.All(x => x.ClientStatus == ClientStatus.Waiting);
                }
            }

            logger.Info(coordinatorClientId.ToString(), "COORDINATOR: Stop confirmed");

            // assign resources first to coordinator then to other live clients
            if (token.IsCancellationRequested)
            {
                return;
            }

            if (allClientsWaiting)
            {
                Queue <string>            resourcesToAssign   = new(resources);
                List <ClientStartRequest> clientStartRequests = new();
                int remainingClients   = clientsNow.Count + 1;
                int resourcesPerClient = Math.Max(1, resourcesToAssign.Count / remainingClients);

                ClientStartRequest coordinatorRequest = new() { ClientId = coordinatorClientId };
                while (coordinatorRequest.AssignedResources.Count < resourcesPerClient && resourcesToAssign.Any())
                {
                    coordinatorRequest.AssignedResources.Add(resourcesToAssign.Dequeue());
                }

                clientStartRequests.Add(coordinatorRequest);
                remainingClients--;

                foreach (Client client in clientsNow)
                {
                    resourcesPerClient = Math.Max(1, resourcesToAssign.Count / remainingClients);

                    ClientStartRequest request = new() { ClientId = client.ClientId };

                    while (request.AssignedResources.Count < resourcesPerClient && resourcesToAssign.Any())
                    {
                        request.AssignedResources.Add(resourcesToAssign.Dequeue());
                    }

                    clientStartRequests.Add(request);
                    remainingClients--;
                }

                if (token.IsCancellationRequested)
                {
                    return;
                }

                logger.Info(coordinatorClientId.ToString(), "COORDINATOR: Resources assigned");
                ModifyClientResult startResult =
                    await clientService.StartActivityAsync(clientEvent.FencingToken, clientStartRequests);

                if (startResult == ModifyClientResult.FencingTokenViolation)
                {
                    clientEvent.CoordinatorToken.FencingTokenViolation = true;
                    return;
                }

                if (startResult == ModifyClientResult.Error)
                {
                    logger.Error(coordinatorClientId.ToString(), "COORDINATOR: Rebalancing error");
                    return;
                }

                store.SetResources(new SetResourcesRequest
                {
                    AssignmentStatus = AssignmentStatus.ResourcesAssigned,
                    Resources        = coordinatorRequest.AssignedResources
                });
                foreach (Action <IList <string> > onStartAction in onChangeActions.OnStartActions)
                {
                    onStartAction.Invoke(coordinatorRequest.AssignedResources);
                }

                logger.Debug(coordinatorClientId.ToString(), "COORDINATOR: Local client started");

                List <Guid> clientIds = clientsNow.Select(x => x.ClientId).ToList();
                clientIds.Add(coordinatorClientId);
                this.clients   = clientIds;
                this.resources = resources;
                logger.Info(coordinatorClientId.ToString(), "---------- Activity Started -----------");
            }
            else
            {
                // log it
                logger.Info(coordinatorClientId.ToString(), "!!!");
            }
        }
Ejemplo n.º 4
0
        public async Task <LeaseResponse> TryAcquireLeaseAsync(AcquireLeaseRequest acquireLeaseRequest)
        {
            using (SqlConnection conn = await ConnectionHelper.GetOpenConnectionAsync(connectionString))
            {
                SqlTransaction transaction = conn.BeginTransaction(IsolationLevel.Serializable);
                SqlCommand     command     = conn.CreateCommand();
                command.Transaction = transaction;

                try
                {
                    // obtain lock on the record blocking other nodes until the transaction is committed
                    command.CommandText =
                        "UPDATE [RBR].[ResourceGroups] SET LockedByClient = @ClientId WHERE ResourceGroup = @ResourceGroup";
                    command.Parameters.AddWithValue("@ClientId", acquireLeaseRequest.ClientId);
                    command.Parameters.Add("@ResourceGroup", SqlDbType.VarChar, 100).Value =
                        acquireLeaseRequest.ResourceGroup;
                    await command.ExecuteNonQueryAsync();

                    // get the resource group (TODO, use OUTPUT on UPDATE query instead of another query)
                    command.Parameters.Clear();
                    command.CommandText = @"SELECT [ResourceGroup]
      ,[CoordinatorId]
      ,[LastCoordinatorRenewal]
      ,[CoordinatorServer]
      ,[LockedByClient]
      ,[FencingToken]
      ,[LeaseExpirySeconds]
      ,[HeartbeatSeconds]
	  ,GETUTCDATE() AS [TimeNow]
FROM [RBR].[ResourceGroups]
WHERE ResourceGroup = @ResourceGroup";
                    command.Parameters.Add("@ResourceGroup", SqlDbType.VarChar, 100).Value =
                        acquireLeaseRequest.ResourceGroup;

                    ResourceGroup rg = null;
                    using (SqlDataReader reader = await command.ExecuteReaderAsync())
                    {
                        if (await reader.ReadAsync())
                        {
                            rg = new ResourceGroup
                            {
                                Name                   = acquireLeaseRequest.ResourceGroup,
                                CoordinatorId          = GetGuidFromNullableGuid(reader, "CoordinatorId"),
                                CoordinatorServer      = GetStringFromNullableGuid(reader, "CoordinatorServer"),
                                LastCoordinatorRenewal = GetDateTimeFromNullable(reader, "LastCoordinatorRenewal"),
                                TimeNow                = (DateTime)reader["TimeNow"],
                                LockedByClientId       = GetGuidFromNullableGuid(reader, "LockedByClient"),
                                FencingToken           = (int)reader["FencingToken"],
                                LeaseExpirySeconds     = (int)reader["LeaseExpirySeconds"],
                                HeartbeatSeconds       = (int)reader["HeartbeatSeconds"]
                            };
                        }
                    }

                    if (rg == null)
                    {
                        return(new LeaseResponse
                        {
                            Result = LeaseResult.NoLease,
                            Lease = new Lease
                            {
                                ExpiryPeriod = TimeSpan.FromMinutes(1),
                                HeartbeatPeriod = TimeSpan.FromSeconds(25)
                            }
                        });
                    }

                    // determine the response, if the CoordinatorId is empty or expired then grant, else deny
                    LeaseResponse response = new() { Lease = new Lease() };
                    if (rg.CoordinatorId == Guid.Empty ||
                        (rg.TimeNow - rg.LastCoordinatorRenewal).TotalSeconds > rg.LeaseExpirySeconds)
                    {
                        response.Lease.ResourceGroup   = acquireLeaseRequest.ResourceGroup;
                        response.Lease.ClientId        = acquireLeaseRequest.ClientId;
                        response.Lease.ExpiryPeriod    = TimeSpan.FromSeconds(rg.LeaseExpirySeconds);
                        response.Lease.HeartbeatPeriod = TimeSpan.FromSeconds(rg.HeartbeatSeconds);
                        response.Lease.FencingToken    = ++rg.FencingToken;
                        response.Result = LeaseResult.Granted;

                        command.Parameters.Clear();
                        command.CommandText = @"UPDATE [RBR].[ResourceGroups]
   SET [CoordinatorId] = @ClientId
      ,[LastCoordinatorRenewal] = GETUTCDATE()
      ,[CoordinatorServer] = @Server
      ,[FencingToken] = @FencingToken
 WHERE ResourceGroup = @ResourceGroup";
                        command.Parameters.AddWithValue("@ClientId", acquireLeaseRequest.ClientId);
                        command.Parameters.AddWithValue("@FencingToken", response.Lease.FencingToken);
                        command.Parameters.Add("@Server", SqlDbType.NVarChar, 500).Value       = Environment.MachineName;
                        command.Parameters.Add("@ResourceGroup", SqlDbType.VarChar, 100).Value =
                            acquireLeaseRequest.ResourceGroup;
                        await command.ExecuteNonQueryAsync();
                    }
                    else
                    {
                        response.Lease.ExpiryPeriod    = TimeSpan.FromSeconds(rg.LeaseExpirySeconds);
                        response.Lease.HeartbeatPeriod = TimeSpan.FromSeconds(rg.HeartbeatSeconds);
                        response.Result = LeaseResult.Denied;
                    }

                    transaction.Commit();

                    return(response);
                }
                catch (Exception ex)
                {
                    try
                    {
                        logger.Error("Rolling back lease acquisition: ", ex);
                        transaction.Rollback();
                    }
                    catch (Exception rex)
                    {
                        logger.Error("Rollback of lease acquisition failed: ", rex);
                    }

                    return(new LeaseResponse
                    {
                        Result =
                            TransientErrorDetector.IsTransient(ex) ? LeaseResult.TransientError : LeaseResult.Error,
                        Message = "Lease acquisition failure",
                        Exception = ex
                    });
                }
            }
        }