Beispiel #1
0
        private void HandleInternalReplication(DatabaseRecord newRecord, ref List <OutgoingReplicationHandler> instancesToDispose)
        {
            var clusterTopology         = GetClusterTopology();
            var newInternalDestinations = newRecord.Topology?.GetDestinations(_server.NodeTag, Database.Name, clusterTopology, _server.Engine.CurrentState);
            var internalConnections     = DatabaseTopology.FindChanges(_internalDestinations, newInternalDestinations);

            if (internalConnections.RemovedDestiantions.Count > 0)
            {
                var removed = internalConnections.RemovedDestiantions.Select(r => new InternalReplication
                {
                    NodeTag  = clusterTopology.TryGetNodeTagByUrl(r).NodeTag,
                    Url      = r,
                    Database = Database.Name
                });

                DropOutgoingConnections(removed, ref instancesToDispose);
            }
            if (internalConnections.AddedDestinations.Count > 0)
            {
                var added = internalConnections.AddedDestinations.Select(r => new InternalReplication
                {
                    NodeTag  = clusterTopology.TryGetNodeTagByUrl(r).NodeTag,
                    Url      = r,
                    Database = Database.Name
                });
                StartOutgoingConnections(added.ToList());
            }
            _internalDestinations.Clear();
            _internalDestinations.AddRange(newInternalDestinations);
        }
        protected async Task<T> WaitForValueOnGroupAsync<T>(DatabaseTopology topology, Func<ServerStore, T> func, T expected, int timeout = 15000)
        {
            var nodes = topology.AllNodes;
            var servers = new List<ServerStore>();
            var tasks = new Dictionary<string, Task<T>>();
            foreach (var node in nodes)
            {
                var server = Servers.Single(s => s.ServerStore.NodeTag == node);
                servers.Add(server.ServerStore);
            }
            foreach (var server in servers)
            {
                var task = WaitForValueAsync(() => func(server), expected, timeout);
                tasks.Add(server.NodeTag, task);
            }

            var res = await Task.WhenAll(tasks.Values);
            var hasExpectedVals = res.Where(t => t?.Equals(expected) ?? false);

            if (hasExpectedVals.Count() == servers.Count)
                return expected;

            var lookup = tasks.ToLookup(key => key.Value.Result, val => val.Key);

            var otherValues = "";
            foreach (var val in lookup)
            {
                otherValues += $"\n the value {val.Key} appears on ";
                foreach (string str in val)
                {
                    otherValues += str + ", ";
                }
            }
            throw new Exception($"Not all node in the group have the expected value of {expected}. {otherValues}");
        }
Beispiel #3
0
        private void RemoveOtherNodesIfNeeded(string dbName, DatabaseTopology topology, ref List <DeleteDatabaseCommand> deletions)
        {
            if (topology.Members.Count < topology.ReplicationFactor)
            {
                return;
            }

            if (topology.Promotables.Count == 0 &&
                topology.Rehabs.Count == 0)
            {
                return;
            }

            if (_logger.IsOperationsEnabled)
            {
                _logger.Operations("We reached the replication factor, so we remove all other rehab/promotable nodes.");
            }

            var nodesToDelete = topology.Promotables.Concat(topology.Rehabs);
            var deletionCmd   = new DeleteDatabaseCommand
            {
                ErrorOnDatabaseDoesNotExists = false,
                DatabaseName            = dbName,
                FromNodes               = nodesToDelete.ToArray(),
                HardDelete              = _hardDeleteOnReplacement,
                UpdateReplicationFactor = false
            };

            if (deletions == null)
            {
                deletions = new List <DeleteDatabaseCommand>();
            }
            deletions.Add(deletionCmd);
        }
Beispiel #4
0
        private static void SetNodeStatus(
            DatabaseTopology topology,
            string nodeTag,
            NodesTopology nodesTopology,
            Dictionary <string, NodeStatus> nodeStatuses)
        {
            var nodeStatus = new DatabaseGroupNodeStatus
            {
                LastStatus = DatabasePromotionStatus.Ok
            };

            if (topology.PromotablesStatus.TryGetValue(nodeTag, out var status))
            {
                nodeStatus.LastStatus = status;
            }
            if (topology.DemotionReasons.TryGetValue(nodeTag, out var reason))
            {
                nodeStatus.LastError = reason;
            }

            if (nodeStatus.LastStatus == DatabasePromotionStatus.Ok &&
                nodeStatuses.TryGetValue(nodeTag, out var serverNodeStatus) &&
                serverNodeStatus.Connected == false)
            {
                nodeStatus.LastError  = serverNodeStatus.ErrorDetails;
                nodeStatus.LastStatus = DatabasePromotionStatus.NotResponding;
            }

            nodesTopology.Status[nodeTag] = nodeStatus;
        }
Beispiel #5
0
        private bool TryFindFitNode(string badNode, string db, DatabaseTopology topology, ClusterTopology clusterTopology,
                                    Dictionary <string, ClusterNodeStatusReport> current, out string bestNode)
        {
            bestNode = null;
            var dbCount       = int.MaxValue;
            var databaseNodes = topology.AllNodes.ToList();

            if (topology.Members.Count == 0) // no one can be used as mentor
            {
                return(false);
            }

            foreach (var node in clusterTopology.AllNodes.Keys)
            {
                if (databaseNodes.Contains(node))
                {
                    continue;
                }

                if (FailedDatabaseInstanceOrNode(clusterTopology, node, db, current) == DatabaseHealth.Bad)
                {
                    continue;
                }

                if (current.TryGetValue(node, out var nodeReport) == false)
                {
                    if (bestNode == null)
                    {
                        bestNode = node;
                    }
                    continue;
                }

                if (dbCount > nodeReport.Report.Count)
                {
                    dbCount  = nodeReport.Report.Count;
                    bestNode = node;
                }
            }

            if (bestNode == null)
            {
                if (_logger.IsOperationsEnabled)
                {
                    _logger.Operations($"The database '{db}' on {badNode} has not responded for a long time, but there is no free node to reassign it.");
                }
                return(false);
            }
            if (_logger.IsOperationsEnabled)
            {
                _logger.Operations($"The database '{db}' on {badNode} has not responded for a long time, so we reassign it to {bestNode}.");
            }

            return(true);
        }
Beispiel #6
0
        private bool TryMoveToRehab(string dbName, DatabaseTopology topology, Dictionary <string, ClusterNodeStatusReport> current, string member)
        {
            DatabaseStatusReport dbStats = null;

            if (current.TryGetValue(member, out var nodeStats) &&
                nodeStats.Status == ClusterNodeStatusReport.ReportStatus.Ok &&
                nodeStats.Report.TryGetValue(dbName, out dbStats) && dbStats.Status != Faulted)
            {
                return(false);
            }

            string reason;

            if (nodeStats == null)
            {
                reason = "In rehabilitation because it had no status report in the latest cluster stats";
            }
            else if (nodeStats.Status != ClusterNodeStatusReport.ReportStatus.Ok)
            {
                reason = $"In rehabilitation because the last report status was \"{nodeStats.Status}\"";
            }
            else if (nodeStats.Report.TryGetValue(dbName, out var stats) && stats.Status == Faulted)
            {
                reason = "In rehabilitation because the DatabaseStatus for this node is Faulted";
            }
            else
            {
                reason = "In rehabilitation because the node is reachable but had no report about the database";
            }

            if (nodeStats?.Error != null)
            {
                reason += $". {nodeStats.Error}";
            }
            if (dbStats?.Error != null)
            {
                reason += $". {dbStats.Error}";
            }

            if (topology.Rehabs.Contains(member) == false)
            {
                topology.Members.Remove(member);
                topology.Rehabs.Add(member);
            }

            topology.DemotionReasons[member]   = reason;
            topology.PromotablesStatus[member] = DatabasePromotionStatus.NotResponding;

            if (_logger.IsOperationsEnabled)
            {
                _logger.Operations(reason);
            }

            return(true);
        }
 protected async Task<bool> WaitForDocumentInClusterAsync<T>(DatabaseTopology topology, string db, string docId, Func<T, bool> predicate, TimeSpan timeout, X509Certificate2 certificate = null)
 {
     var allNodes = topology.Members;
     var serversTopology = Servers.Where(s => allNodes.Contains(s.ServerStore.NodeTag));
     var nodes = serversTopology.Select(x => new ServerNode
     {
         Url = x.WebUrl,
         Database = db
     });
     var stores = GetDocumentStores(nodes, disableTopologyUpdates: true, certificate: certificate);
     return await WaitForDocumentInClusterAsyncInternal(docId, predicate, timeout, stores);
 }
Beispiel #8
0
        private static IEnumerable <OngoingTask> CollectBackupTasks(
            DatabaseRecord databaseRecord,
            DatabaseTopology dbTopology,
            ClusterTopology clusterTopology,
            ServerStore store)
        {
            if (dbTopology == null)
            {
                yield break;
            }

            if (databaseRecord.PeriodicBackups == null)
            {
                yield break;
            }

            if (databaseRecord.PeriodicBackups.Count == 0)
            {
                yield break;
            }

            var database = store.DatabasesLandlord.TryGetOrCreateResourceStore(databaseRecord.DatabaseName).Result;

            foreach (var backupConfiguration in databaseRecord.PeriodicBackups)
            {
                var tag = dbTopology.WhoseTaskIsIt(backupConfiguration, store.IsPassive());

                var backupDestinations = GetBackupDestinations(backupConfiguration);

                var backupStatus = database.PeriodicBackupRunner.GetBackupStatus(backupConfiguration.TaskId);
                var nextBackup   = database.PeriodicBackupRunner.GetNextBackupDetails(databaseRecord, backupConfiguration, backupStatus);

                yield return(new OngoingTaskBackup
                {
                    TaskId = backupConfiguration.TaskId,
                    BackupType = backupConfiguration.BackupType,
                    TaskName = backupConfiguration.Name,
                    TaskState = backupConfiguration.Disabled ? OngoingTaskState.Disabled : OngoingTaskState.Enabled,
                    LastFullBackup = backupStatus.LastFullBackup,
                    LastIncrementalBackup = backupStatus.LastIncrementalBackup,
                    NextBackup = nextBackup,
                    ResponsibleNode = new NodeId
                    {
                        NodeTag = tag,
                        NodeUrl = clusterTopology.GetUrlFromTag(tag)
                    },
                    BackupDestinations = backupDestinations
                });
            }
        }
        public async Task RedistributeDatabaseOnMultiFailure()
        {
            DebuggerAttachedTimeout.DisableLongTimespan = true;
            var clusterSize  = 5;
            var dbGroupSize  = 3;
            var databaseName = GetDatabaseName();
            var leader       = await CreateRaftClusterAndGetLeader(clusterSize, false, 0);

            using (var store = new DocumentStore
            {
                Urls = new[] { leader.WebUrl },
                Database = databaseName
            }.Initialize())
            {
                var doc = new DatabaseRecord(databaseName)
                {
                    Topology = new DatabaseTopology
                    {
                        DynamicNodesDistribution = true
                    }
                };
                doc.Topology.Members.Add("A");
                doc.Topology.Members.Add("B");
                doc.Topology.Members.Add("C");
                var databaseResult = await store.Maintenance.Server.SendAsync(new CreateDatabaseOperation(doc, dbGroupSize));

                Assert.Equal(dbGroupSize, databaseResult.Topology.Members.Count);
                await WaitForRaftIndexToBeAppliedInCluster(databaseResult.RaftCommandIndex, TimeSpan.FromSeconds(10));

                using (var session = store.OpenAsyncSession())
                {
                    await session.StoreAsync(new User { Name = "Karmel" }, "users/1");

                    await session.SaveChangesAsync();
                }
                Assert.True(await WaitForDocumentInClusterAsync <User>(doc.Topology, databaseName, "users/1", u => u.Name == "Karmel", TimeSpan.FromSeconds(5)));
                DisposeServerAndWaitForFinishOfDisposal(Servers[1]);
                DisposeServerAndWaitForFinishOfDisposal(Servers[2]);

                // the db should move to D & E
                var newTopology = new DatabaseTopology();
                newTopology.Members.Add("A");
                newTopology.Members.Add("D");
                newTopology.Members.Add("E");
                Assert.True(await WaitForDocumentInClusterAsync <User>(newTopology, databaseName, "users/1", u => u.Name == "Karmel", TimeSpan.FromSeconds(60)));
                var members = await WaitForValueAsync(async() => await GetMembersCount(store, databaseName), 3, 30_000);

                Assert.Equal(3, members);
            }
        }
Beispiel #10
0
        private bool TryGetMentorNode(string dbName, DatabaseTopology topology, ClusterTopology clusterTopology, string promotable, out string mentorNode)
        {
            var url  = clusterTopology.GetUrlFromTag(promotable);
            var task = new PromotableTask(promotable, url, dbName);

            mentorNode = topology.WhoseTaskIsIt(task, _server.IsPassive());

            if (mentorNode == null)
            {
                // We are in passive mode and were kicked out of the cluster.
                return(false);
            }
            return(true);
        }
Beispiel #11
0
        private TaskStatus GetTaskStatus(
            DatabaseTopology topology,
            PeriodicBackupConfiguration configuration,
            bool skipErrorLog = false)
        {
            if (configuration.Disabled)
            {
                return(TaskStatus.Disabled);
            }

            if (configuration.HasBackup() == false)
            {
                if (skipErrorLog == false)
                {
                    var message = $"All backup destinations are disabled for backup task id: {configuration.TaskId}";
                    _database.NotificationCenter.Add(AlertRaised.Create(
                                                         _database.Name,
                                                         "Periodic Backup",
                                                         message,
                                                         AlertType.PeriodicBackup,
                                                         NotificationSeverity.Info));
                }

                return(TaskStatus.Disabled);
            }

            var backupStatus  = GetBackupStatus(configuration.TaskId);
            var whoseTaskIsIt = _database.WhoseTaskIsIt(topology, configuration, backupStatus, keepTaskOnOriginalMemberNode: true);

            if (whoseTaskIsIt == null)
            {
                return(TaskStatus.Disabled);
            }

            if (whoseTaskIsIt == _serverStore.NodeTag)
            {
                return(TaskStatus.ActiveByCurrentNode);
            }

            if (_logger.IsInfoEnabled)
            {
                _logger.Info($"Backup job is skipped at {SystemTime.UtcNow}, because it is managed " +
                             $"by '{whoseTaskIsIt}' node and not the current node ({_serverStore.NodeTag})");
            }

            return(TaskStatus.ActiveByOtherNode);
        }
Beispiel #12
0
        private static void SetNodeStatus(DatabaseTopology topology, string nodeTag, NodesTopology nodesTopology)
        {
            var nodeStatus = new DbGroupNodeStatus
            {
                LastStatus = DatabasePromotionStatus.Ok
            };

            if (topology.PromotablesStatus.TryGetValue(nodeTag, out var status))
            {
                nodeStatus.LastStatus = status;
            }
            if (topology.DemotionReasons.TryGetValue(nodeTag, out var reason))
            {
                nodeStatus.LastError = reason;
            }
            nodesTopology.Status[nodeTag] = nodeStatus;
        }
Beispiel #13
0
        private bool TryGetMentorNode(string dbName, DatabaseTopology topology, ClusterTopology clusterTopology, string promotable, out string mentorNode)
        {
            var url = clusterTopology.GetUrlFromTag(promotable);

            topology.PredefinedMentors.TryGetValue(promotable, out var mentor);
            var task = new PromotableTask(promotable, url, dbName, mentor);

            mentorNode = topology.WhoseTaskIsIt(_server.Engine.CurrentState, task, null);

            if (mentorNode == null)
            {
                // We are in passive mode and were kicked out of the cluster.
                return(false);
            }

            return(true);
        }
Beispiel #14
0
        /// <summary>
        /// Generating a static topology of the requested size.
        /// </summary>
        /// <param name="options">Contains replication factor.</param>
        /// <param name="mainServer">The main server for which we generate the database, must be contained in the topology.</param>
        /// <returns></returns>
        private DatabaseTopology GenerateStaticTopology(Options options, RavenServer mainServer)
        {
            DatabaseTopology topology = new DatabaseTopology();
            var mainTag = mainServer.ServerStore.NodeTag;

            topology.Members.Add(mainTag);
            var rand       = new Random();
            var serverTags = Servers.Where(s => s != mainServer).Select(s => s.ServerStore.NodeTag).ToList();

            for (var i = 0; i < options.ReplicationFactor - 1; i++)
            {
                var position = rand.Next(0, serverTags.Count);
                topology.Members.Add(serverTags[position]);
                serverTags.RemoveAt(position);
            }

            return(topology);
        }
Beispiel #15
0
        public static Func <string> GetLastResponsibleNode(
            bool hasHighlyAvailableTasks,
            DatabaseTopology topology,
            string nodeTag)
        {
            return(() =>
            {
                if (hasHighlyAvailableTasks)
                {
                    return null;
                }

                if (topology.Members.Contains(nodeTag) == false)
                {
                    return null;
                }

                return nodeTag;
            });
        }
        private List <string> GetResponsibleNodes(DatabaseTopology topology, string databaseGroupId, PullReplicationDefinition pullReplication)
        {
            var list = new List <string>();
            // we distribute connections to have load balancing when many sinks are connected.
            // this is the hub cluster, so we make the decision which node will do the pull replication only once and only here,
            // for that we create a dummy IDatabaseTask.
            var mentorNodeTask = new PullNodeTask
            {
                Mentor          = pullReplication.MentorNode,
                DatabaseGroupId = databaseGroupId
            };

            while (topology.Members.Count > 0)
            {
                var next = topology.WhoseTaskIsIt(ServerStore.CurrentRachisState, mentorNodeTask, null);
                list.Add(next);
                topology.Members.Remove(next);
            }

            return(list);
        }
Beispiel #17
0
        private OngoingTaskReplication GetExternalReplicationInfo(DatabaseTopology dbTopology, ClusterTopology clusterTopology,
                                                                  ExternalReplication watcher)
        {
            NodeId responsibale = null;

            var tag = dbTopology.WhoseTaskIsIt(watcher, ServerStore.Engine.CurrentState);

            if (tag != null)
            {
                responsibale = new NodeId
                {
                    NodeTag = tag,
                    NodeUrl = clusterTopology.GetUrlFromTag(tag)
                };
            }

            (string Url, OngoingTaskConnectionStatus Status)res = (null, OngoingTaskConnectionStatus.None);
            if (tag == ServerStore.NodeTag)
            {
                res = Database.ReplicationLoader.GetExternalReplicationDestination(watcher.TaskId);
            }
            else
            {
                res.Status = OngoingTaskConnectionStatus.NotOnThisNode;
            }

            var taskInfo = new OngoingTaskReplication
            {
                TaskId               = watcher.TaskId,
                TaskName             = watcher.Name,
                ResponsibleNode      = responsibale,
                DestinationDatabase  = watcher.Database,
                TaskState            = watcher.Disabled ? OngoingTaskState.Disabled : OngoingTaskState.Enabled,
                DestinationUrl       = res.Url,
                TaskConnectionStatus = res.Status,
            };

            return(taskInfo);
        }
Beispiel #18
0
        public string WhoseTaskIsIt(
            DatabaseTopology databaseTopology,
            IDatabaseTask configuration,
            IDatabaseTaskStatus taskStatus,
            bool useLastResponsibleNodeIfNoAvailableNodes = false)
        {
            var whoseTaskIsIt = databaseTopology.WhoseTaskIsIt(
                ServerStore.Engine.CurrentState, configuration,
                getLastReponsibleNode:
                () => ServerStore.LicenseManager.GetLastResponsibleNodeForTask(
                    taskStatus,
                    databaseTopology,
                    configuration,
                    NotificationCenter));

            if (whoseTaskIsIt == null && useLastResponsibleNodeIfNoAvailableNodes)
            {
                return(taskStatus.NodeTag);
            }

            return(whoseTaskIsIt);
        }
Beispiel #19
0
        private (bool Promote, string UpdateTopologyReason) TryPromote(string dbName, DatabaseTopology topology, Dictionary <string, ClusterNodeStatusReport> current, Dictionary <string, ClusterNodeStatusReport> previous, string mentorNode, string promotable)
        {
            if (previous.TryGetValue(mentorNode, out var mentorPrevClusterStats) == false ||
                mentorPrevClusterStats.Report.TryGetValue(dbName, out var mentorPrevDbStats) == false)
            {
                return(false, null);
            }

            if (previous.TryGetValue(promotable, out var promotablePrevClusterStats) == false ||
                promotablePrevClusterStats.Report.TryGetValue(dbName, out var promotablePrevDbStats) == false)
            {
                return(false, null);
            }

            if (current.TryGetValue(mentorNode, out var mentorCurrClusterStats) == false ||
                mentorCurrClusterStats.Report.TryGetValue(dbName, out var mentorCurrDbStats) == false)
            {
                return(false, null);
            }

            if (current.TryGetValue(promotable, out var promotableClusterStats) == false ||
                promotableClusterStats.Report.TryGetValue(dbName, out var promotableDbStats) == false)
            {
                return(false, null);
            }

            if (topology.Members.Count == topology.ReplicationFactor)
            {
                return(false, null);
            }

            var mentorsEtag = mentorPrevDbStats.LastEtag;

            if (mentorCurrDbStats.LastSentEtag.TryGetValue(promotable, out var lastSentEtag) == false)
            {
                return(false, null);
            }

            var timeDiff = mentorCurrClusterStats.LastSuccessfulUpdateDateTime - mentorPrevClusterStats.LastSuccessfulUpdateDateTime;

            if (lastSentEtag < mentorsEtag || timeDiff > 3 * SupervisorSamplePeriod)
            {
                var msg = $"The database '{dbName}' on {promotable} not ready to be promoted, because the mentor hasn't sent all of the documents yet." + Environment.NewLine +
                          $"Last sent Etag: {lastSentEtag:#,#;;0}" + Environment.NewLine +
                          $"Mentor's Etag: {mentorsEtag:#,#;;0}";
                if (_logger.IsInfoEnabled)
                {
                    _logger.Info(msg);
                }

                if (msg.Equals(topology.DemotionReasons[promotable]) == false)
                {
                    topology.DemotionReasons[promotable]   = msg;
                    topology.PromotablesStatus[promotable] = DatabasePromotionStatus.ChangeVectorNotMerged;
                    return(false, msg);
                }
                return(false, null);
            }

            var indexesCatchedUp = CheckIndexProgress(promotablePrevDbStats.LastEtag, promotablePrevDbStats.LastIndexStats, promotableDbStats.LastIndexStats,
                                                      mentorCurrDbStats.LastIndexStats);

            if (indexesCatchedUp)
            {
                if (_logger.IsOperationsEnabled)
                {
                    _logger.Operations($"We try to promoted the database '{dbName}' on {promotable} to be a full member");
                }
                topology.PromotablesStatus.Remove(promotable);
                topology.DemotionReasons.Remove(promotable);

                return(true, $"Node {promotable} is up-to-date so promoting it to be member");
            }
            if (_logger.IsInfoEnabled)
            {
                _logger.Info($"The database '{dbName}' on {promotable} is not ready to be promoted, because the indexes are not up-to-date." + Environment.NewLine);
            }

            if (topology.PromotablesStatus.TryGetValue(promotable, out var currentStatus) == false ||
                currentStatus != DatabasePromotionStatus.IndexNotUpToDate)
            {
                topology.PromotablesStatus[promotable] = DatabasePromotionStatus.IndexNotUpToDate;
                return(false, $"Node {promotable} not ready to be a member, because the indexes are not up-to-date");
            }
            return(false, null);
        }
Beispiel #20
0
        private IEnumerable <OngoingTask> CollectExternalReplicationTasks(List <ExternalReplication> watchers, DatabaseTopology dbTopology, ClusterTopology clusterTopology)
        {
            if (dbTopology == null)
            {
                yield break;
            }

            foreach (var watcher in watchers)
            {
                var taskInfo = GetExternalReplicationInfo(dbTopology, clusterTopology, watcher);
                yield return(taskInfo);
            }
        }
            static void FillNodesAvailabilityReportForState(SubscriptionGeneralDataAndStats subscription, DatabaseTopology topology, Dictionary <string, string> databaseTopologyAvailabilityExplenation, List <string> stateGroup, string stateName)
            {
                foreach (var nodeInGroup in stateGroup)
                {
                    var rehabMessage = string.Empty;
                    if (subscription.MentorNode == nodeInGroup)
                    {
                        rehabMessage = $"Although this node is a mentor, it's state is {stateName} and can't run the subscription";
                    }
                    else
                    {
                        rehabMessage = $"Node's state is {stateName}, can't run subscription";
                    }

                    if (topology.DemotionReasons.TryGetValue(nodeInGroup, out var demotionReason))
                    {
                        rehabMessage = rehabMessage + ". Reason:" + demotionReason;
                    }

                    databaseTopologyAvailabilityExplenation[nodeInGroup] = rehabMessage;
                }
            }
        public async Task DontRemoveNodeWhileItHasNotReplicatedDocs()
        {
            var databaseName = "DontRemoveNodeWhileItHasNotReplicatedDocs" + Guid.NewGuid();
            var leader       = await CreateRaftClusterAndGetLeader(3, shouldRunInMemory : false);

            using (var leaderStore = new DocumentStore
            {
                Urls = new[] { leader.WebUrl },
                Database = databaseName,
            })
            {
                leaderStore.Initialize();
                var topology = new DatabaseTopology
                {
                    Members = new List <string>
                    {
                        "B",
                        "C"
                    },
                    DynamicNodesDistribution = true
                };
                var(index, dbGroupNodes) = await CreateDatabaseInCluster(new DatabaseRecord
                {
                    DatabaseName = databaseName,
                    Topology     = topology
                }, 2, leader.WebUrl);
                await WaitForRaftIndexToBeAppliedInCluster(index, TimeSpan.FromSeconds(30));

                using (var session = leaderStore.OpenSession())
                {
                    session.Store(new User(), "users/1");
                    session.SaveChanges();
                }
                var dbToplogy = (await leaderStore.Maintenance.Server.SendAsync(new GetDatabaseRecordOperation(databaseName))).Topology;
                Assert.Equal(2, dbToplogy.AllNodes.Count());
                Assert.Equal(0, dbToplogy.Promotables.Count);
                Assert.True(await WaitForDocumentInClusterAsync <User>(topology, databaseName, "users/1", null, TimeSpan.FromSeconds(30)));

                var serverA  = Servers.Single(s => s.ServerStore.NodeTag == "A");
                var urlsA    = new[] { serverA.WebUrl };
                var dataDirA = serverA.Configuration.Core.DataDirectory.FullPath.Split('/').Last();
                DisposeServerAndWaitForFinishOfDisposal(serverA);

                var serverB  = Servers.Single(s => s.ServerStore.NodeTag == "B");
                var urlsB    = new[] { serverB.WebUrl };
                var dataDirB = serverB.Configuration.Core.DataDirectory.FullPath.Split('/').Last();
                DisposeServerAndWaitForFinishOfDisposal(serverB);

                // write doc only to C
                using (var session = leaderStore.OpenSession())
                {
                    session.Store(new User(), "users/2");
                    session.SaveChanges();
                }

                var serverC  = Servers.Single(s => s.ServerStore.NodeTag == "C");
                var urlsC    = new[] { serverC.WebUrl };
                var dataDirC = serverC.Configuration.Core.DataDirectory.FullPath.Split('/').Last();
                DisposeServerAndWaitForFinishOfDisposal(serverC);

                Servers[0] = GetNewServer(new Dictionary <string, string> {
                    { RavenConfiguration.GetKey(x => x.Core.ServerUrls), urlsA[0] }
                }, runInMemory: false, deletePrevious: false, partialPath: dataDirA);
                Servers[1] = GetNewServer(new Dictionary <string, string> {
                    { RavenConfiguration.GetKey(x => x.Core.ServerUrls), urlsB[0] }
                }, runInMemory: false, deletePrevious: false, partialPath: dataDirB);
                await Task.Delay(TimeSpan.FromSeconds(10));

                Assert.Equal(2, await WaitForValueAsync(async() => await GetMembersCount(leaderStore, databaseName), 2));
                Assert.Equal(1, await WaitForValueAsync(async() => await GetRehabCount(leaderStore, databaseName), 1));

                using (var session = leaderStore.OpenSession())
                {
                    session.Store(new User(), "users/3");
                    session.SaveChanges();
                }
                Assert.True(await WaitForDocumentInClusterAsync <User>(new DatabaseTopology
                {
                    Members = new List <string> {
                        "A", "B"
                    }
                }, databaseName, "users/3", null, TimeSpan.FromSeconds(10)));

                Servers[2] = GetNewServer(new Dictionary <string, string> {
                    { RavenConfiguration.GetKey(x => x.Core.ServerUrls), urlsC[0] }
                }, runInMemory: false, deletePrevious: false, partialPath: dataDirC);
                Assert.Equal(2, await WaitForValueAsync(async() => await GetMembersCount(leaderStore, databaseName), 2));
                Assert.Equal(0, await WaitForValueAsync(async() => await GetRehabCount(leaderStore, databaseName), 0, 30_000));
                Assert.True(await WaitForDocumentInClusterAsync <User>(dbToplogy, databaseName, "users/3", null, TimeSpan.FromSeconds(10)));

                dbToplogy = (await leaderStore.Maintenance.Server.SendAsync(new GetDatabaseRecordOperation(databaseName))).Topology;
                Assert.Equal(2, dbToplogy.AllNodes.Count());
                Assert.Equal(2, dbToplogy.Members.Count);
                Assert.Equal(0, dbToplogy.Rehabs.Count);

                Assert.True(await WaitForDocumentInClusterAsync <User>(dbToplogy, databaseName, "users/1", null, TimeSpan.FromSeconds(10)));
                Assert.True(await WaitForDocumentInClusterAsync <User>(dbToplogy, databaseName, "users/3", null, TimeSpan.FromSeconds(10)));
                Assert.True(await WaitForDocumentInClusterAsync <User>(dbToplogy, databaseName, "users/2", null, TimeSpan.FromSeconds(30)));

                dbToplogy = (await leaderStore.Maintenance.Server.SendAsync(new GetDatabaseRecordOperation(databaseName))).Topology;
                Assert.Equal(2, dbToplogy.AllNodes.Count());
                Assert.Equal(2, dbToplogy.Members.Count);
                Assert.Equal(0, dbToplogy.Rehabs.Count);
            }
        }
        public async Task ChangesApiFailOver()
        {
            var db       = "Test";
            var topology = new DatabaseTopology
            {
                DynamicNodesDistribution = true
            };
            var leader = await CreateRaftClusterAndGetLeader(3, customSettings : new Dictionary <string, string>()
            {
                [RavenConfiguration.GetKey(x => x.Cluster.AddReplicaTimeout)]    = "1",
                [RavenConfiguration.GetKey(x => x.Cluster.MoveToRehabGraceTime)] = "0",
                [RavenConfiguration.GetKey(x => x.Cluster.StabilizationTime)]    = "1",
                [RavenConfiguration.GetKey(x => x.Cluster.ElectionTimeout)]      = "50"
            });

            await CreateDatabaseInCluster(new DatabaseRecord
            {
                DatabaseName = db,
                Topology     = topology
            }, 2, leader.WebUrl);

            using (var store = new DocumentStore
            {
                Database = db,
                Urls = new[] { leader.WebUrl }
            }.Initialize())
            {
                var list           = new BlockingCollection <DocumentChange>();
                var taskObservable = store.Changes();
                await taskObservable.EnsureConnectedNow();

                var observableWithTask = taskObservable.ForDocument("users/1");
                observableWithTask.Subscribe(list.Add);
                await observableWithTask.EnsureSubscribedNow();

                using (var session = store.OpenSession())
                {
                    session.Store(new User(), "users/1");
                    session.SaveChanges();
                }

                WaitForDocument(store, "users/1");

                var value = WaitForValue(() => list.Count, 1);
                Assert.Equal(1, value);

                var         currentUrl    = store.GetRequestExecutor().Url;
                RavenServer toDispose     = null;
                RavenServer workingServer = null;

                DisposeCurrentServer(currentUrl, ref toDispose, ref workingServer);

                await taskObservable.EnsureConnectedNow();

                WaitForTopologyStabilization(db, workingServer, 1, 2);

                using (var session = store.OpenAsyncSession())
                {
                    await session.StoreAsync(new User(), "users/1");

                    await session.SaveChangesAsync();
                }
                value = WaitForValue(() => list.Count, 2);
                Assert.Equal(2, value);

                currentUrl = store.GetRequestExecutor().Url;
                DisposeCurrentServer(currentUrl, ref toDispose, ref workingServer);

                await taskObservable.EnsureConnectedNow();

                WaitForTopologyStabilization(db, workingServer, 2, 1);

                using (var session = store.OpenSession())
                {
                    session.Store(new User(), "users/1");
                    session.SaveChanges();
                }
                value = WaitForValue(() => list.Count, 3);
                Assert.Equal(3, value);
            }
        }
Beispiel #24
0
        private static IEnumerable <OngoingTask> CollectEtlTasks(DatabaseRecord databaseRecord, DatabaseTopology dbTopology, ClusterTopology clusterTopology, ServerStore store)
        {
            if (dbTopology == null)
            {
                yield break;
            }

            if (databaseRecord.RavenEtls != null)
            {
                foreach (var ravenEtl in databaseRecord.RavenEtls)
                {
                    var tag = dbTopology.WhoseTaskIsIt(ravenEtl, store.IsPassive());

                    var taskState = OngoingTaskState.Enabled;

                    if (ravenEtl.Disabled || ravenEtl.Transforms.All(x => x.Disabled))
                    {
                        taskState = OngoingTaskState.Disabled;
                    }
                    else if (ravenEtl.Transforms.Any(x => x.Disabled))
                    {
                        taskState = OngoingTaskState.PartiallyEnabled;
                    }

                    if (databaseRecord.RavenConnectionStrings.TryGetValue(ravenEtl.ConnectionStringName, out var connection) == false)
                    {
                        throw new InvalidOperationException(
                                  $"Could not find connection string named '{ravenEtl.ConnectionStringName}' in the database record for '{ravenEtl.Name}' ETL");
                    }

                    yield return(new OngoingTaskRavenEtl
                    {
                        TaskId = ravenEtl.TaskId,
                        TaskName = ravenEtl.Name,
                        // TODO arek TaskConnectionStatus =
                        TaskState = taskState,
                        ResponsibleNode = new NodeId
                        {
                            NodeTag = tag,
                            NodeUrl = clusterTopology.GetUrlFromTag(tag)
                        },
                        DestinationUrl = connection.Url,
                        DestinationDatabase = connection.Database
                    });
                }
            }

            if (databaseRecord.SqlEtls != null)
            {
                foreach (var sqlEtl in databaseRecord.SqlEtls)
                {
                    var tag = dbTopology.WhoseTaskIsIt(sqlEtl, store.IsPassive());

                    var taskState = OngoingTaskState.Enabled;

                    if (sqlEtl.Disabled || sqlEtl.Transforms.All(x => x.Disabled))
                    {
                        taskState = OngoingTaskState.Disabled;
                    }
                    else if (sqlEtl.Transforms.Any(x => x.Disabled))
                    {
                        taskState = OngoingTaskState.PartiallyEnabled;
                    }

                    if (databaseRecord.SqlConnectionStrings.TryGetValue(sqlEtl.ConnectionStringName, out var sqlConnection) == false)
                    {
                        throw new InvalidOperationException(
                                  $"Could not find connection string named '{sqlEtl.ConnectionStringName}' in the database record for '{sqlEtl.Name}' ETL");
                    }

                    var(database, server) =
                        SqlConnectionStringParser.GetDatabaseAndServerFromConnectionString(sqlEtl.FactoryName, sqlConnection.ConnectionString);

                    yield return(new OngoingTaskSqlEtl
                    {
                        TaskId = sqlEtl.TaskId,
                        TaskName = sqlEtl.Name,
                        // TODO arek TaskConnectionStatus =
                        TaskState = taskState,
                        ResponsibleNode = new NodeId
                        {
                            NodeTag = tag,
                            NodeUrl = clusterTopology.GetUrlFromTag(tag)
                        },
                        DestinationServer = server,
                        DestinationDatabase = database
                    });
                }
            }
        }
Beispiel #25
0
        private (bool Promote, string UpdateTopologyReason) TryPromote(string dbName, DatabaseTopology topology, Dictionary <string, ClusterNodeStatusReport> current, Dictionary <string, ClusterNodeStatusReport> previous, string mentorNode, string promotable)
        {
            if (previous.TryGetValue(mentorNode, out var mentorPrevClusterStats) == false ||
                mentorPrevClusterStats.Report.TryGetValue(dbName, out var mentorPrevDbStats) == false)
            {
                return(false, null);
            }

            if (previous.TryGetValue(promotable, out var promotablePrevClusterStats) == false ||
                promotablePrevClusterStats.Report.TryGetValue(dbName, out var promotablePrevDbStats) == false)
            {
                return(false, null);
            }

            if (current.TryGetValue(mentorNode, out var mentorCurrClusterStats) == false ||
                mentorCurrClusterStats.Report.TryGetValue(dbName, out var mentorCurrDbStats) == false)
            {
                return(false, null);
            }

            if (current.TryGetValue(promotable, out var promotableClusterStats) == false ||
                promotableClusterStats.Report.TryGetValue(dbName, out var promotableDbStats) == false)
            {
                return(false, null);
            }

            var mentorsEtag = mentorPrevDbStats.LastEtag;

            if (mentorCurrDbStats.LastSentEtag.TryGetValue(promotable, out var lastSentEtag))
            {
                if (lastSentEtag < mentorsEtag)
                {
                    var msg = $"The database {dbName} on {promotable} not ready to be promoted, because the mentor hasn't sent all his documents.\n" +
                              $"Last sent Etag: {lastSentEtag}, Mentor's Etag: {mentorsEtag}";
                    if (_logger.IsInfoEnabled)
                    {
                        _logger.Info(msg);
                    }

                    if (msg.Equals(topology.DemotionReasons[promotable]) == false)
                    {
                        topology.DemotionReasons[promotable]   = msg;
                        topology.PromotablesStatus[promotable] = DatabasePromotionStatus.ChangeVectorNotMerged;
                        return(false, msg);
                    }
                    return(false, null);
                }
            }

            var indexesCatchedUp = CheckIndexProgress(promotablePrevDbStats.LastEtag, promotablePrevDbStats.LastIndexStats, promotableDbStats.LastIndexStats);

            if (indexesCatchedUp)
            {
                if (_logger.IsOperationsEnabled)
                {
                    _logger.Operations($"We try to promoted the database {dbName} on {promotable} to be a full member");
                }
                topology.PromotablesStatus.Remove(promotable);
                topology.DemotionReasons.Remove(promotable);

                return(true, $"Node {promotable} is up-to-date so promoting it to be member");
            }
            if (_logger.IsInfoEnabled)
            {
                _logger.Info($"The database {dbName} on {promotable} not ready to be promoted, because the indexes are not up-to-date.\n");
            }

            if (topology.PromotablesStatus.TryGetValue(promotable, out var currentStatus) == false ||
                currentStatus != DatabasePromotionStatus.IndexNotUpToDate)
            {
                topology.PromotablesStatus[promotable] = DatabasePromotionStatus.IndexNotUpToDate;
                return(false, $"Node {promotable} not ready to be a member, because the indexes are not up-to-date");
            }
            return(false, null);
        }
Beispiel #26
0
        private IEnumerable <OngoingTask> CollectEtlTasks(DatabaseRecord databaseRecord, DatabaseTopology dbTopology, ClusterTopology clusterTopology)
        {
            if (dbTopology == null)
            {
                yield break;
            }

            if (databaseRecord.RavenEtls != null)
            {
                foreach (var ravenEtl in databaseRecord.RavenEtls)
                {
                    var tag = dbTopology.WhoseTaskIsIt(ravenEtl, ServerStore.Engine.CurrentState);

                    var taskState = GetEtlTaskState(ravenEtl);

                    if (databaseRecord.RavenConnectionStrings.TryGetValue(ravenEtl.ConnectionStringName, out var connection) == false)
                    {
                        throw new InvalidOperationException(
                                  $"Could not find connection string named '{ravenEtl.ConnectionStringName}' in the database record for '{ravenEtl.Name}' ETL");
                    }


                    (string Url, OngoingTaskConnectionStatus Status)res = (null, OngoingTaskConnectionStatus.None);
                    string error = null;
                    if (tag == ServerStore.NodeTag)
                    {
                        foreach (var process in Database.EtlLoader.Processes)
                        {
                            if (process is RavenEtl etlProcess)
                            {
                                if (etlProcess.Name == ravenEtl.Name)
                                {
                                    res.Url    = etlProcess.Url;
                                    res.Status = OngoingTaskConnectionStatus.Active;
                                    break;
                                }
                            }
                        }
                        if (res.Status == OngoingTaskConnectionStatus.None)
                        {
                            error = $"The raven etl process'{ravenEtl.Name}' was not found.";
                        }
                    }
                    else
                    {
                        res.Status = OngoingTaskConnectionStatus.NotOnThisNode;
                    }

                    yield return(new OngoingTaskRavenEtlListView()
                    {
                        TaskId = ravenEtl.TaskId,
                        TaskName = ravenEtl.Name,
                        // TODO arek TaskConnectionStatus =
                        TaskState = taskState,
                        ResponsibleNode = new NodeId
                        {
                            NodeTag = tag,
                            NodeUrl = clusterTopology.GetUrlFromTag(tag)
                        },
                        DestinationUrl = res.Url,
                        TaskConnectionStatus = res.Status,
                        DestinationDatabase = connection.Database,
                        ConnectionStringName = ravenEtl.ConnectionStringName,
                        Error = error
                    });
                }
            }

            if (databaseRecord.SqlEtls != null)
            {
                foreach (var sqlEtl in databaseRecord.SqlEtls)
                {
                    var tag = dbTopology.WhoseTaskIsIt(sqlEtl, ServerStore.Engine.CurrentState);

                    var taskState = GetEtlTaskState(sqlEtl);

                    if (databaseRecord.SqlConnectionStrings.TryGetValue(sqlEtl.ConnectionStringName, out var sqlConnection) == false)
                    {
                        throw new InvalidOperationException(
                                  $"Could not find connection string named '{sqlEtl.ConnectionStringName}' in the database record for '{sqlEtl.Name}' ETL");
                    }

                    var(database, server) =
                        SqlConnectionStringParser.GetDatabaseAndServerFromConnectionString(sqlEtl.FactoryName, sqlConnection.ConnectionString);

                    yield return(new OngoingTaskSqlEtlListView()
                    {
                        TaskId = sqlEtl.TaskId,
                        TaskName = sqlEtl.Name,
                        // TODO arek TaskConnectionStatus =
                        TaskState = taskState,
                        ResponsibleNode = new NodeId
                        {
                            NodeTag = tag,
                            NodeUrl = clusterTopology.GetUrlFromTag(tag)
                        },
                        DestinationServer = server,
                        DestinationDatabase = database,
                        ConnectionStringName = sqlEtl.ConnectionStringName
                    });
                }
            }
        }
Beispiel #27
0
        private int GetNumberOfRespondingNodes(ClusterTopology clusterTopology, string dbName, DatabaseTopology topology, Dictionary <string, ClusterNodeStatusReport> current)
        {
            var goodMembers = topology.Members.Count;

            foreach (var promotable in topology.Promotables)
            {
                if (FailedDatabaseInstanceOrNode(clusterTopology, promotable, dbName, current) != DatabaseHealth.Bad)
                {
                    goodMembers++;
                }
            }
            foreach (var rehab in topology.Rehabs)
            {
                if (FailedDatabaseInstanceOrNode(clusterTopology, rehab, dbName, current) != DatabaseHealth.Bad)
                {
                    goodMembers++;
                }
            }
            return(goodMembers);
        }
Beispiel #28
0
        private bool TryMoveToRehab(string dbName, DatabaseTopology topology, Dictionary <string, ClusterNodeStatusReport> current, string member)
        {
            DatabaseStatusReport dbStats = null;

            if (current.TryGetValue(member, out var nodeStats) &&
                nodeStats.Status == ClusterNodeStatusReport.ReportStatus.Ok &&
                nodeStats.Report.TryGetValue(dbName, out dbStats) && dbStats.Status != Faulted)
            {
                return(false);
            }

            string reason;

            if (nodeStats == null)
            {
                reason = "Node in rehabilitation due to no status report in the latest cluster stats";
            }
            else if (nodeStats.Status != ClusterNodeStatusReport.ReportStatus.Ok)
            {
                switch (nodeStats.Status)
                {
                case ClusterNodeStatusReport.ReportStatus.Timeout:
                    reason = $"Node in rehabilitation due to timeout reached trying to get stats from node.{Environment.NewLine}";
                    break;

                default:
                    reason = $"Node in rehabilitation due to last report status being '{nodeStats.Status}'.{Environment.NewLine}";
                    break;
                }
            }
            else if (nodeStats.Report.TryGetValue(dbName, out var stats) && stats.Status == Faulted)
            {
                reason = $"In rehabilitation because the DatabaseStatus for this node is {nameof(Faulted)}.{Environment.NewLine}";
            }
            else
            {
                reason = $"In rehabilitation because the node is reachable but had no report about the database.{Environment.NewLine}";
            }

            if (nodeStats?.Error != null)
            {
                reason += $". {nodeStats.Error}";
            }
            if (dbStats?.Error != null)
            {
                reason += $". {dbStats.Error}";
            }

            if (topology.Rehabs.Contains(member) == false)
            {
                topology.Members.Remove(member);
                topology.Rehabs.Add(member);
            }

            topology.DemotionReasons[member]   = reason;
            topology.PromotablesStatus[member] = DatabasePromotionStatus.NotResponding;

            if (_logger.IsOperationsEnabled)
            {
                _logger.Operations($"Node {member} of database '{dbName}': {reason}");
            }

            return(true);
        }
Beispiel #29
0
        public ClusterTransactionCommand(string databaseName, char identityPartsSeparator, DatabaseTopology topology,
                                         ArraySegment <BatchRequestParser.CommandData> commandParsedCommands,
                                         ClusterTransactionOptions options, string uniqueRequestId) : base(uniqueRequestId)
        {
            DatabaseName         = databaseName;
            DatabaseRecordId     = topology.DatabaseTopologyIdBase64 ?? Guid.NewGuid().ToBase64Unpadded();
            ClusterTransactionId = topology.ClusterTransactionIdBase64 ?? Guid.NewGuid().ToBase64Unpadded();
            Options = options;
            CommandCreationTicks = SystemTime.UtcNow.Ticks;

            foreach (var commandData in commandParsedCommands)
            {
                var command = ClusterTransactionDataCommand.FromCommandData(commandData);
                ClusterCommandValidation(command, identityPartsSeparator);
                switch (commandData.Type)
                {
                case CommandType.PUT:
                case CommandType.DELETE:
                    DatabaseCommands.Add(command);
                    break;

                case CommandType.CompareExchangePUT:
                case CommandType.CompareExchangeDELETE:
                    ClusterCommands.Add(command);
                    break;

                default:
                    throw new RachisApplyException($"The type '{commandData.Type}' is not supported in '{nameof(ClusterTransactionCommand)}.'");
                }
            }

            DatabaseCommandsCount = DatabaseCommands.Count;
        }
Beispiel #30
0
        public static (List <string> Members, List <string> Promotables, List <string> Rehabs) Reorder(DatabaseTopology topology, List <string> order)
        {
            if (topology.Count != order.Count ||
                topology.AllNodes.All(order.Contains) == false)
            {
                throw new ArgumentException("The reordered list doesn't correspond to the existing nodes of the database group.");
            }

            var newMembers     = new List <string>();
            var newPromotables = new List <string>();
            var newRehabs      = new List <string>();

            foreach (var node in order)
            {
                if (topology.Members.Contains(node))
                {
                    newMembers.Add(node);
                }
                else if (topology.Promotables.Contains(node))
                {
                    newPromotables.Add(node);
                }
                else if (topology.Rehabs.Contains(node))
                {
                    newRehabs.Add(node);
                }
                else
                {
                    throw new ArgumentException($"Can't find node {node} in the topology");
                }
            }

            return(newMembers, newPromotables, newRehabs);
        }