private Task <(long Index, object Result)> UpdateTopology(UpdateTopologyCommand cmd) { if (_engine.LeaderTag != _server.NodeTag) { throw new NotLeadingException("This node is no longer the leader, so we abort updating the database topology"); } return(_engine.PutAsync(cmd)); }
public void UpdateTopology() { if (Interlocked.CompareExchange(ref pendingUpdating, 0, 0) != 0) { return; } Interlocked.Increment(ref pendingUpdating); var command = new UpdateTopologyCommand(master, this); command.Completed += () => Interlocked.Decrement(ref pendingUpdating); executer.RegisterForExecution(command); }
private async Task AnalyzeLatestStats( Dictionary <string, ClusterNodeStatusReport> newStats, Dictionary <string, ClusterNodeStatusReport> prevStats ) { var currentLeader = _engine.CurrentLeader; if (currentLeader == null) { return; } using (_contextPool.AllocateOperationContext(out TransactionOperationContext context)) { var updateCommands = new List <(UpdateTopologyCommand Update, string Reason)>(); List <DeleteDatabaseCommand> deletions = null; using (context.OpenReadTransaction()) { var clusterTopology = _server.GetClusterTopology(context); foreach (var database in _engine.StateMachine.GetDatabaseNames(context)) { var databaseRecord = _engine.StateMachine.ReadDatabase(context, database, out long etag); if (databaseRecord == null) { if (_logger.IsInfoEnabled) { _logger.Info($"Can't analyze the stats of database the {database}, because the database record is null."); } continue; } var topologyStamp = databaseRecord.Topology?.Stamp ?? new LeaderStamp { Index = -1, LeadersTicks = -1, Term = -1 }; var graceIfLeaderChanged = _term > topologyStamp.Term && currentLeader.LeaderShipDuration < _stabilizationTime; var letStatsBecomeStable = _term == topologyStamp.Term && (currentLeader.LeaderShipDuration - topologyStamp.LeadersTicks < _stabilizationTime); if (graceIfLeaderChanged || letStatsBecomeStable) { if (_logger.IsInfoEnabled) { _logger.Info($"We give more time for the '{database}' stats to become stable, so we skip analyzing it for now."); } continue; } var updateReason = UpdateDatabaseTopology(database, databaseRecord, clusterTopology, newStats, prevStats, ref deletions); if (updateReason != null) { AddToDecisionLog(database, updateReason); var cmd = new UpdateTopologyCommand(database) { Topology = databaseRecord.Topology, RaftCommandIndex = etag }; updateCommands.Add((cmd, updateReason)); } } } foreach (var command in updateCommands) { try { await UpdateTopology(command.Update); var alert = AlertRaised.Create( command.Update.DatabaseName, $"Topology of database '{command.Update.DatabaseName}' was changed", command.Reason, AlertType.DatabaseTopologyWarning, NotificationSeverity.Warning ); NotificationCenter.Add(alert); } catch (ConcurrencyException) { // this is sort of expected, if the database was // modified by someone else, we'll avoid changing // it and run the logic again on the next round } } if (deletions != null) { foreach (var command in deletions) { AddToDecisionLog(command.DatabaseName, $"We reached the replication factor on '{command.DatabaseName}', so we try to remove promotables/rehabs from: {string.Join(", ", command.FromNodes)}"); await Delete(command); } } } }