Exemple #1
0
        private void RemoveOtherNodesIfNeeded(string dbName, DatabaseTopology topology, ref List <DeleteDatabaseCommand> deletions)
        {
            if (topology.Members.Count < topology.ReplicationFactor)
            {
                return;
            }

            if (topology.Promotables.Count == 0 &&
                topology.Rehabs.Count == 0)
            {
                return;
            }

            if (_logger.IsOperationsEnabled)
            {
                _logger.Operations("We reached the replication factor, so we remove all other rehab/promotable nodes.");
            }

            var nodesToDelete = topology.Promotables.Concat(topology.Rehabs);
            var deletionCmd   = new DeleteDatabaseCommand
            {
                ErrorOnDatabaseDoesNotExists = false,
                DatabaseName            = dbName,
                FromNodes               = nodesToDelete.ToArray(),
                HardDelete              = _hardDeleteOnReplacement,
                UpdateReplicationFactor = false
            };

            if (deletions == null)
            {
                deletions = new List <DeleteDatabaseCommand>();
            }
            deletions.Add(deletionCmd);
        }
Exemple #2
0
        private void RemoveOtherNodesIfNeeded(string dbName, DatabaseRecord record, ClusterTopology clusterTopology,
                                              Dictionary <string, ClusterNodeStatusReport> current, ref List <DeleteDatabaseCommand> deletions)
        {
            var topology = record.Topology;

            if (topology.Members.Count < topology.ReplicationFactor)
            {
                return;
            }

            if (topology.Promotables.Count == 0 &&
                topology.Rehabs.Count == 0)
            {
                return;
            }

            if (_logger.IsOperationsEnabled)
            {
                _logger.Operations("We reached the replication factor, so we try to remove redundant nodes.");
            }

            var nodesToDelete      = new List <string>();
            var mentorChangeVector = new Dictionary <string, string>();

            foreach (var node in topology.Promotables.Concat(topology.Rehabs))
            {
                if (TryGetMentorNode(dbName, topology, clusterTopology, node, out var mentorNode) == false ||
                    current.TryGetValue(mentorNode, out var metorStats) == false ||
                    metorStats.Report.TryGetValue(dbName, out var dbReport) == false)
                {
                    continue;
                }
                if (record.DeletionInProgress?.ContainsKey(node) == true)
                {
                    continue;
                }
                nodesToDelete.Add(node);
                mentorChangeVector.Add(node, dbReport.DatabaseChangeVector);
            }

            if (nodesToDelete.Count > 0)
            {
                var deletionCmd = new DeleteDatabaseCommand
                {
                    ErrorOnDatabaseDoesNotExists = false,
                    DatabaseName            = dbName,
                    FromNodes               = nodesToDelete.ToArray(),
                    HardDelete              = _hardDeleteOnReplacement,
                    UpdateReplicationFactor = false,
                    MentorChangeVector      = mentorChangeVector
                };

                if (deletions == null)
                {
                    deletions = new List <DeleteDatabaseCommand>();
                }
                deletions.Add(deletionCmd);
            }
        }
Exemple #3
0
 private Task <(long Index, object Result)> Delete(DeleteDatabaseCommand cmd)
 {
     if (_engine.LeaderTag != _server.NodeTag)
     {
         throw new NotLeadingException("This node is no longer the leader, so we abort the deletion command");
     }
     return(_engine.PutAsync(cmd));
 }
Exemple #4
0
        public void DeleteDatabaseCommand_Verify_1()
        {
            Mock <IIrbisConnection> mock       = GetConnectionMock();
            IIrbisConnection        connection = mock.Object;
            DeleteDatabaseCommand   command    = new DeleteDatabaseCommand(connection);

            Assert.IsFalse(command.Verify(false));
        }
Exemple #5
0
        public void DeleteDatabaseCommand_CreateQuery_1()
        {
            Mock <IIrbisConnection> mock       = GetConnectionMock();
            IIrbisConnection        connection = mock.Object;
            DeleteDatabaseCommand   command    = new DeleteDatabaseCommand(connection);

            command.CreateQuery();
        }
Exemple #6
0
        public void DeleteDatabaseCommand_Construciton_1()
        {
            Mock <IIrbisConnection> mock       = GetConnectionMock();
            IIrbisConnection        connection = mock.Object;
            DeleteDatabaseCommand   command    = new DeleteDatabaseCommand(connection);

            Assert.AreSame(connection, command.Connection);
        }
Exemple #7
0
        public void DeleteDatabaseCommand_CreateQuery_2()
        {
            Mock <IIrbisConnection> mock       = GetConnectionMock();
            IIrbisConnection        connection = mock.Object;
            DeleteDatabaseCommand   command    = new DeleteDatabaseCommand(connection)
            {
                Database = "IBIS2"
            };
            ClientQuery query = command.CreateQuery();

            Assert.IsNotNull(query);
        }
        private void RenderDelete(CodeDom.Database database, Entity entity)
        {
            var deleteCommand = new DeleteDatabaseCommand
            {
                Schema         = entity.Schema,
                TableName      = entity.TableName,
                AutoDeleteType = true,
                Type           = new CodeDom.TypeReference {
                    Namespace = entity.Namespace, Type = entity.Name
                }
            };

            database.Commands.Add(deleteCommand);
        }
Exemple #9
0
        public void DeleteDatabaseCommand_ExecuteRequest_1()
        {
            int returnCode = 0;
            Mock <IIrbisConnection> mock       = GetConnectionMock();
            IIrbisConnection        connection = mock.Object;
            DeleteDatabaseCommand   command    = new DeleteDatabaseCommand(connection)
            {
                Database = "IBIS2"
            };
            ResponseBuilder builder = new ResponseBuilder()
                                      .StandardHeader(CommandCode.DeleteDatabase, 123, 456)
                                      .NewLine()
                                      .Append(returnCode)
                                      .NewLine();
            TestingSocket socket = (TestingSocket)connection.Socket;

            socket.Response = builder.Encode();
            ClientQuery    query    = command.CreateQuery();
            ServerResponse response = command.Execute(query);

            Assert.AreEqual(returnCode, response.ReturnCode);
        }
Exemple #10
0
        private string UpdateDatabaseTopology(string dbName, DatabaseRecord record, ClusterTopology clusterTopology,
                                              Dictionary <string, ClusterNodeStatusReport> current,
                                              Dictionary <string, ClusterNodeStatusReport> previous,
                                              ref List <DeleteDatabaseCommand> deletions)
        {
            if (record.Disabled)
            {
                return(null);
            }

            var topology       = record.Topology;
            var hasLivingNodes = false;

            foreach (var member in topology.Members)
            {
                var status = None;
                if (current.TryGetValue(member, out var nodeStats) == false)
                {
                    // there isn't much we can do here, except for log it.
                    if (previous.TryGetValue(member, out _))
                    {
                        // if we found this node in the previous report, we will ignore it this time and wait for the next report.
                        continue;
                    }

                    var msg =
                        $"The member node {member} was not found in both current and previous reports of the cluster observer. " +
                        $"If this error continue to raise, check the latency between the cluster nodes.";
                    if (_logger.IsInfoEnabled)
                    {
                        _logger.Info(msg);
                    }
                    RaiseNodeNotFoundAlert(msg, member);
                    continue;
                }
                if (nodeStats.Status == ClusterNodeStatusReport.ReportStatus.Ok &&
                    nodeStats.Report.TryGetValue(dbName, out var dbStats))
                {
                    status = dbStats.Status;
                    if (status == Loaded ||
                        status == Loading ||
                        status == Unloaded)
                    {
                        hasLivingNodes = true;

                        if (topology.PromotablesStatus.TryGetValue(member, out var _))
                        {
                            topology.DemotionReasons.Remove(member);
                            topology.PromotablesStatus.Remove(member);
                            return($"Node {member} is online");
                        }
                        continue;
                    }
                }

                // Give one minute of grace before we move the node to a rehab
                if (DateTime.UtcNow.AddMilliseconds(-_moveToRehabTime) < current[member]?.LastSuccessfulUpdateDateTime)
                {
                    continue;
                }

                if (TryMoveToRehab(dbName, topology, current, member))
                {
                    return($"Node {member} is currently not responding (with status: {status}) and moved to rehab");
                }

                // database distribution is off and the node is down
                if (topology.DynamicNodesDistribution == false && (
                        topology.PromotablesStatus.TryGetValue(member, out var currentStatus) == false ||
                        currentStatus != DatabasePromotionStatus.NotResponding))
                {
                    topology.DemotionReasons[member]   = "Not responding";
                    topology.PromotablesStatus[member] = DatabasePromotionStatus.NotResponding;
                    return($"Node {member} is currently not responding with the status '{status}'");
                }
            }

            if (hasLivingNodes == false)
            {
                var recoverable = new List <string>();
                foreach (var rehab in topology.Rehabs)
                {
                    if (FailedDatabaseInstanceOrNode(clusterTopology, rehab, dbName, current) == DatabaseHealth.Good)
                    {
                        recoverable.Add(rehab);
                    }
                }

                if (recoverable.Count > 0)
                {
                    var node = FindMostUpToDateNode(recoverable, dbName, current);
                    topology.Rehabs.Remove(node);
                    topology.Members.Add(node);

                    RaiseNoLivingNodesAlert($"None of '{dbName}' database nodes are responding to the supervisor, promoting {node} from rehab to avoid making the database completely unreachable.", dbName);
                    return($"None of '{dbName}' nodes are responding, promoting {node} from rehab");
                }

                if (topology.Members.Count == 0 && record.DeletionInProgress?.Count > 0)
                {
                    return(null); // We delete the whole database.
                }

                RaiseNoLivingNodesAlert($"None of '{dbName}' database nodes are responding to the supervisor, the database is unreachable.", dbName);
            }

            var shouldUpdateTopologyStatus = false;
            var updateTopologyStatusReason = new StringBuilder();

            foreach (var promotable in topology.Promotables)
            {
                if (FailedDatabaseInstanceOrNode(clusterTopology, promotable, dbName, current) == DatabaseHealth.Bad)
                {
                    // database distribution is off and the node is down
                    if (topology.DynamicNodesDistribution == false)
                    {
                        if (topology.PromotablesStatus.TryGetValue(promotable, out var currentStatus) == false ||
                            currentStatus != DatabasePromotionStatus.NotResponding)
                        {
                            topology.DemotionReasons[promotable]   = "Not responding";
                            topology.PromotablesStatus[promotable] = DatabasePromotionStatus.NotResponding;
                            return($"Node {promotable} is currently not responding");
                        }
                        continue;
                    }

                    if (TryFindFitNode(promotable, dbName, topology, clusterTopology, current, out var node) == false)
                    {
                        if (topology.PromotablesStatus.TryGetValue(promotable, out var currentStatus) == false ||
                            currentStatus != DatabasePromotionStatus.NotResponding)
                        {
                            topology.DemotionReasons[promotable]   = "Not responding";
                            topology.PromotablesStatus[promotable] = DatabasePromotionStatus.NotResponding;
                            return($"Node {promotable} is currently not responding");
                        }
                        continue;
                    }

                    if (_server.LicenseManager.CanDynamicallyDistributeNodes(out _) == false)
                    {
                        continue;
                    }

                    // replace the bad promotable otherwise we will continue to add more and more nodes.
                    topology.Promotables.Add(node);
                    topology.DemotionReasons[node]   = $"Just replaced the promotable node {promotable}";
                    topology.PromotablesStatus[node] = DatabasePromotionStatus.WaitingForFirstPromotion;
                    var deletionCmd = new DeleteDatabaseCommand
                    {
                        ErrorOnDatabaseDoesNotExists = false,
                        DatabaseName            = dbName,
                        FromNodes               = new[] { promotable },
                        HardDelete              = _hardDeleteOnReplacement,
                        UpdateReplicationFactor = false
                    };

                    if (deletions == null)
                    {
                        deletions = new List <DeleteDatabaseCommand>();
                    }
                    deletions.Add(deletionCmd);
                    return($"The promotable {promotable} is not responsive, replace it with a node {node}");
                }

                if (TryGetMentorNode(dbName, topology, clusterTopology, promotable, out var mentorNode) == false)
                {
                    continue;
                }

                var tryPromote = TryPromote(dbName, topology, current, previous, mentorNode, promotable);
                if (tryPromote.Promote)
                {
                    topology.Promotables.Remove(promotable);
                    topology.Members.Add(promotable);
                    topology.PredefinedMentors.Remove(promotable);
                    RemoveOtherNodesIfNeeded(dbName, record, clusterTopology, current, ref deletions);
                    return($"Promoting node {promotable} to member");
                }
                if (tryPromote.UpdateTopologyReason != null)
                {
                    shouldUpdateTopologyStatus = true;
                    updateTopologyStatusReason.AppendLine(tryPromote.UpdateTopologyReason);
                }
            }

            var goodMembers   = GetNumberOfRespondingNodes(clusterTopology, dbName, topology, current);
            var pendingDelete = GetPendingDeleteNodes(record);

            foreach (var rehab in topology.Rehabs)
            {
                var health = FailedDatabaseInstanceOrNode(clusterTopology, rehab, dbName, current);
                switch (health)
                {
                case DatabaseHealth.Bad:
                    if (topology.DynamicNodesDistribution == false)
                    {
                        continue;
                    }

                    if (goodMembers < topology.ReplicationFactor &&
                        TryFindFitNode(rehab, dbName, topology, clusterTopology, current, out var node))
                    {
                        if (_server.LicenseManager.CanDynamicallyDistributeNodes(out _) == false)
                        {
                            continue;
                        }

                        topology.Promotables.Add(node);
                        topology.DemotionReasons[node]   = $"Maintain the replication factor and create new replica instead of node {rehab}";
                        topology.PromotablesStatus[node] = DatabasePromotionStatus.WaitingForFirstPromotion;
                        return($"The rehab node {rehab} was too long in rehabilitation, create node {node} to replace it");
                    }

                    if (topology.PromotablesStatus.TryGetValue(rehab, out var status) == false || status != DatabasePromotionStatus.NotResponding)
                    {
                        // was already online, but now we lost the connection again
                        if (TryMoveToRehab(dbName, topology, current, rehab))
                        {
                            return($"Node {rehab} is currently not responding");
                        }
                    }

                    break;

                case DatabaseHealth.Good:

                    if (pendingDelete.Contains(rehab) && topology.PromotablesStatus.ContainsKey(rehab) == false)
                    {
                        // already tried to promote, so we just ignore and continue
                        continue;
                    }

                    if (TryGetMentorNode(dbName, topology, clusterTopology, rehab, out var mentorNode) == false)
                    {
                        continue;
                    }

                    var tryPromote = TryPromote(dbName, topology, current, previous, mentorNode, rehab);
                    if (tryPromote.Promote)
                    {
                        if (_logger.IsOperationsEnabled)
                        {
                            _logger.Operations($"The database {dbName} on {rehab} is reachable and up to date, so we promote it back to member.");
                        }

                        topology.Members.Add(rehab);
                        topology.Rehabs.Remove(rehab);
                        RemoveOtherNodesIfNeeded(dbName, record, clusterTopology, current, ref deletions);
                        return($"Node {rehab} was recovered from rehabilitation and promoted back to member");
                    }
                    if (tryPromote.UpdateTopologyReason != null)
                    {
                        shouldUpdateTopologyStatus = true;
                        updateTopologyStatusReason.AppendLine(tryPromote.UpdateTopologyReason);
                    }
                    break;
                }
            }
            RemoveOtherNodesIfNeeded(dbName, record, clusterTopology, current, ref deletions);

            if (shouldUpdateTopologyStatus)
            {
                return(updateTopologyStatusReason.ToString());
            }

            return(null);
        }
Exemple #11
0
        private string UpdateDatabaseTopology(string dbName, DatabaseRecord record, ClusterTopology clusterTopology,
                                              Dictionary <string, ClusterNodeStatusReport> current,
                                              Dictionary <string, ClusterNodeStatusReport> previous,
                                              ref List <DeleteDatabaseCommand> deletions)
        {
            var topology       = record.Topology;
            var hasLivingNodes = false;

            foreach (var member in topology.Members)
            {
                if (current.TryGetValue(member, out var nodeStats) &&
                    nodeStats.Status == ClusterNodeStatusReport.ReportStatus.Ok &&
                    nodeStats.Report.TryGetValue(dbName, out var dbStats) &&
                    dbStats.Status == Loaded)
                {
                    hasLivingNodes = true;

                    if (topology.PromotablesStatus.TryGetValue(member, out var _))
                    {
                        topology.DemotionReasons.Remove(member);
                        topology.PromotablesStatus.Remove(member);
                        return($"Node {member} is online");
                    }

                    continue;
                }

                if (TryMoveToRehab(dbName, topology, current, member))
                {
                    return($"Node {member} is currently not responding and moved to rehab");
                }

                // node distribution is off and the node is down
                if (topology.DynamicNodesDistribution == false && (
                        topology.PromotablesStatus.TryGetValue(member, out var currentStatus) == false ||
                        currentStatus != DatabasePromotionStatus.NotResponding))
                {
                    topology.DemotionReasons[member]   = "Not responding";
                    topology.PromotablesStatus[member] = DatabasePromotionStatus.NotResponding;
                    return($"Node {member} is currently not responding");
                }
            }

            if (hasLivingNodes == false)
            {
                var recoverable = new List <string>();
                foreach (var rehab in topology.Rehabs)
                {
                    if (FailedDatabaseInstanceOrNode(clusterTopology, rehab, dbName, current) == DatabaseHealth.Good)
                    {
                        recoverable.Add(rehab);
                    }
                }
                if (recoverable.Count > 0)
                {
                    var node = FindMostUpToDateNode(recoverable, dbName, current);
                    topology.Rehabs.Remove(node);
                    topology.Members.Add(node);
                    RaiseNoLivingNodesAlert($"It appears that all nodes of the {dbName} database are not responding to the supervisor, promoting {node} from rehab to avoid making the database completely unreachable");
                    return($"All nodes are not responding, promoting {node} from rehab");
                }
                RaiseNoLivingNodesAlert($"It appears that all nodes of the {dbName} database are not responding to the supervisor, the database is not reachable");
            }

            var shouldUpdateTopologyStatus = false;
            var updateTopologyStatusReason = new StringBuilder();

            foreach (var promotable in topology.Promotables)
            {
                if (FailedDatabaseInstanceOrNode(clusterTopology, promotable, dbName, current) == DatabaseHealth.Bad)
                {
                    // node distribution is off and the node is down
                    if (topology.DynamicNodesDistribution == false)
                    {
                        if (topology.PromotablesStatus.TryGetValue(promotable, out var currentStatus) == false ||
                            currentStatus != DatabasePromotionStatus.NotResponding)
                        {
                            topology.DemotionReasons[promotable]   = "Not responding";
                            topology.PromotablesStatus[promotable] = DatabasePromotionStatus.NotResponding;
                            return($"Node {promotable} is currently not responding");
                        }
                        continue;
                    }

                    if (TryFindFitNode(promotable, dbName, topology, clusterTopology, current, out var node) == false)
                    {
                        if (topology.PromotablesStatus.TryGetValue(promotable, out var currentStatus) == false ||
                            currentStatus != DatabasePromotionStatus.NotResponding)
                        {
                            topology.DemotionReasons[promotable]   = "Not responding";
                            topology.PromotablesStatus[promotable] = DatabasePromotionStatus.NotResponding;
                            return($"Node {promotable} is currently not responding");
                        }
                        continue;
                    }

                    if (_server.LicenseManager.CanDynamicallyDistributeNodes() == false)
                    {
                        continue;
                    }

                    // replace the bad promotable otherwise we will continue to add more and more nodes.
                    topology.Promotables.Add(node);
                    topology.DemotionReasons[node]   = $"Just replaced the promotable node {promotable}";
                    topology.PromotablesStatus[node] = DatabasePromotionStatus.WaitingForFirstPromotion;
                    var deletionCmd = new DeleteDatabaseCommand
                    {
                        ErrorOnDatabaseDoesNotExists = false,
                        DatabaseName            = dbName,
                        FromNodes               = new[] { promotable },
                        HardDelete              = _hardDeleteOnReplacement,
                        UpdateReplicationFactor = false
                    };

                    if (deletions == null)
                    {
                        deletions = new List <DeleteDatabaseCommand>();
                    }
                    deletions.Add(deletionCmd);
                    return($"The promotable {promotable} is not responsive, replace it with a node {node}");
                }

                if (TryGetMentorNode(dbName, topology, clusterTopology, promotable, out var mentorNode) == false)
                {
                    continue;
                }

                var tryPromote = TryPromote(dbName, topology, current, previous, mentorNode, promotable);
                if (tryPromote.Promote)
                {
                    topology.Promotables.Remove(promotable);
                    topology.Members.Add(promotable);
                    topology.PredefinedMentors.Remove(promotable);
                    RemoveOtherNodesIfNeeded(dbName, record, clusterTopology, current, ref deletions);
                    return($"Promoting node {promotable} to member");
                }
                if (tryPromote.UpdateTopologyReason != null)
                {
                    shouldUpdateTopologyStatus = true;
                    updateTopologyStatusReason.AppendLine(tryPromote.UpdateTopologyReason);
                }
            }

            var goodMembers   = GetNumberOfRespondingNodes(clusterTopology, dbName, topology, current);
            var pendingDelete = GetPendingDeleteNodes(record);

            foreach (var rehab in topology.Rehabs)
            {
                var health = FailedDatabaseInstanceOrNode(clusterTopology, rehab, dbName, current);
                switch (health)
                {
                case DatabaseHealth.Bad:
                    if (topology.DynamicNodesDistribution == false)
                    {
                        continue;
                    }

                    if (goodMembers < topology.ReplicationFactor &&
                        TryFindFitNode(rehab, dbName, topology, clusterTopology, current, out var node))
                    {
                        if (_server.LicenseManager.CanDynamicallyDistributeNodes() == false)
                        {
                            continue;
                        }

                        topology.Promotables.Add(node);
                        topology.DemotionReasons[node]   = $"Maintain the replication factor and create new replica instead of node {rehab}";
                        topology.PromotablesStatus[node] = DatabasePromotionStatus.WaitingForFirstPromotion;
                        return($"The rehab node {rehab} was too long in rehabilitation, create node {node} to replace it");
                    }

                    if (topology.PromotablesStatus.TryGetValue(rehab, out var status) == false || status != DatabasePromotionStatus.NotResponding)
                    {
                        // was already online, but now we lost the connection again
                        if (TryMoveToRehab(dbName, topology, current, rehab))
                        {
                            return($"Node {rehab} is currently not responding");
                        }
                    }

                    break;

                case DatabaseHealth.Good:

                    if (pendingDelete.Contains(rehab) && topology.PromotablesStatus.ContainsKey(rehab) == false)
                    {
                        // already tried to promote, so we just ignore and continue
                        continue;
                    }

                    if (TryGetMentorNode(dbName, topology, clusterTopology, rehab, out var mentorNode) == false)
                    {
                        continue;
                    }

                    var tryPromote = TryPromote(dbName, topology, current, previous, mentorNode, rehab);
                    if (tryPromote.Promote)
                    {
                        if (pendingDelete.Contains(rehab))
                        {
                            var msg = $"Node {rehab} was recovered from rehabilitation, but was already replaced by an other node, so it's waiting for deletion.";
                            if (_logger.IsOperationsEnabled)
                            {
                                _logger.Operations(msg);
                            }
                            return(msg);
                        }

                        if (_logger.IsOperationsEnabled)
                        {
                            _logger.Operations($"The database {dbName} on {rehab} is reachable and update, so we promote it back to member.");
                        }

                        topology.Members.Add(rehab);
                        topology.Rehabs.Remove(rehab);
                        RemoveOtherNodesIfNeeded(dbName, record, clusterTopology, current, ref deletions);
                        return($"Node {rehab} was recovered from rehabilitation and promoted back to member");
                    }
                    if (tryPromote.UpdateTopologyReason != null)
                    {
                        shouldUpdateTopologyStatus = true;
                        updateTopologyStatusReason.AppendLine(tryPromote.UpdateTopologyReason);
                    }
                    break;
                }
            }
            RemoveOtherNodesIfNeeded(dbName, record, clusterTopology, current, ref deletions);

            if (shouldUpdateTopologyStatus)
            {
                return(updateTopologyStatusReason.ToString());
            }

            return(null);
        }
Exemple #12
0
        public void DeleteDatabase(string name)
        {
            var deleteDatabaseCommand = new DeleteDatabaseCommand(name);

            CouchProxy.Execute <CommandDefaultResult>(deleteDatabaseCommand);
        }