/// <summary> /// Refreshes the replication information. /// Expert use only. /// </summary> public async Task RefreshReplicationInformationAsync(RavenFileSystemClient serverClient) { var serverHash = ServerHash.GetServerHash(serverClient.ServerUrl); try { var result = await serverClient.Config.GetConfig(SynchronizationConstants.RavenSynchronizationDestinations); if (result == null) { LastReplicationUpdate = SystemTime.UtcNow; // checked and not found } else { var urls = result.GetValues("url"); replicationDestinations = urls == null ? new List <string>() : urls.ToList(); } } catch (Exception e) { log.ErrorException("Could not contact master for new replication information", e); replicationDestinations = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash).ToList(); LastReplicationUpdate = SystemTime.UtcNow; return; } failureCounts[serverClient.ServerUrl] = new FailureCounter(); // we just hit the master, so we can reset its failure count ReplicationInformerLocalCache.TrySavingReplicationInformationToLocalCache(serverHash, replicationDestinations); UpdateReplicationInformationFromDocument(replicationDestinations); LastReplicationUpdate = SystemTime.UtcNow; }
//TODO: When counter replication will be refactored (simplified) -> the parameter should be removed; now its a constraint of the interface public void RefreshReplicationInformation() { JsonDocument document; var serverHash = ServerHash.GetServerHash(counterStore.Url); try { var replicationFetchTask = counterStore.GetReplicationsAsync(); replicationFetchTask.Wait(); if (replicationFetchTask.Status != TaskStatus.Faulted) { failureCounters.ResetFailureCount(counterStore.Url); } document = new JsonDocument { DataAsJson = RavenJObject.FromObject(replicationFetchTask.Result) }; } catch (Exception e) { log.ErrorException("Could not contact master for fetching replication information. Something is wrong.", e); document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); if (document == null || document.DataAsJson == null) { throw; } } ReplicationInformerLocalCache.TrySavingReplicationInformationToLocalCache(serverHash, document); UpdateReplicationInformationFromDocument(document); }
public override void ClearReplicationInformationLocalCache(IAsyncFilesCommands client) { var serverClient = (IAsyncFilesCommandsImpl)client; var urlForFilename = serverClient.UrlFor(); var serverHash = ServerHash.GetServerHash(urlForFilename); ReplicationInformerLocalCache.ClearReplicationInformationFromLocalCache(serverHash); }
/// <summary> /// Initializes this instance. /// </summary> /// <returns></returns> public IDocumentStore Initialize(bool ensureDatabaseExists) { if (initialized) { return(this); } AssertValidConfiguration(); jsonRequestFactory = new HttpJsonRequestFactory(MaxNumberOfCachedRequests, HttpMessageHandlerFactory, Conventions.AcceptGzipContent, Conventions.AuthenticationScheme); try { SecurityExtensions.InitializeSecurity(Conventions, jsonRequestFactory, Url); InitializeInternal(); if (Conventions.DocumentKeyGenerator == null)// don't overwrite what the user is doing { var generator = new MultiDatabaseHiLoGenerator(32); Conventions.DocumentKeyGenerator = (dbName, databaseCommands, entity) => generator.GenerateDocumentKey(dbName, databaseCommands, Conventions, entity); } if (Conventions.AsyncDocumentKeyGenerator == null && asyncDatabaseCommandsGenerator != null) { var generator = new AsyncMultiDatabaseHiLoKeyGenerator(32); Conventions.AsyncDocumentKeyGenerator = (dbName, commands, entity) => generator.GenerateDocumentKeyAsync(dbName, commands, Conventions, entity); } initialized = true; #if !(MONO || DNXCORE50) RecoverPendingTransactions(); #endif if (ensureDatabaseExists && string.IsNullOrEmpty(DefaultDatabase) == false && DefaultDatabase.Equals(Constants.SystemDatabase) == false) //system database exists anyway { //If we have indication that the database is part of a replication cluster we don't want to create it, //the reason for this is that we want the client to failover to a diffrent database. var serverHash = ServerHash.GetServerHash(DatabaseCommands.Url); var document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); var replicationDocument = document?.DataAsJson.JsonDeserialization <ReplicationDocumentWithClusterInformation>(); if (replicationDocument == null) { DatabaseCommands.ForSystemDatabase().GlobalAdmin.EnsureDatabaseExists(DefaultDatabase, true); } } } catch (Exception) { Dispose(); throw; } return(this); }
public override Task UpdateReplicationInformationIfNeeded(IAsyncFilesCommands commands) { var serverClient = (IAsyncFilesCommandsImpl)commands; if (conventions.FailoverBehavior == FailoverBehavior.FailImmediately) { return(new CompletedTask()); } if (lastReplicationUpdate.AddMinutes(5) > SystemTime.UtcNow) { return(new CompletedTask()); } lock (replicationLock) { if (firstTime) { var serverHash = ServerHash.GetServerHash(serverClient.ServerUrl); var destinations = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); if (destinations != null) { UpdateReplicationInformationFromDocument(destinations); } } firstTime = false; if (lastReplicationUpdate.AddMinutes(5) > SystemTime.UtcNow) { return(new CompletedTask()); } var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) { return(taskCopy); } return(refreshReplicationInformationTask = Task.Factory.StartNew(() => RefreshReplicationInformation(serverClient)) .ContinueWith(task => { if (task.Exception != null) { log.ErrorException("Failed to refresh replication information", task.Exception); } refreshReplicationInformationTask = null; })); } }
private Task UpdateReplicationInformationIfNeededInternal(IAsyncFilesCommands commands) { if (Conventions.FailoverBehavior == FailoverBehavior.FailImmediately) { return(new CompletedTask()); } if (LastReplicationUpdate.AddMinutes(5) > SystemTime.UtcNow) { return(new CompletedTask()); } var serverClient = (IAsyncFilesCommandsImpl)commands; lock (ReplicationLock) { if (FirstTime) { var serverHash = ServerHash.GetServerHash(serverClient.UrlFor()); var document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); if (IsInvalidDestinationsDocument(document) == false) { UpdateReplicationInformationFromDocument(document); } } FirstTime = false; if (LastReplicationUpdate.AddMinutes(5) > SystemTime.UtcNow) { return(new CompletedTask()); } var taskCopy = RefreshReplicationInformationTask; if (taskCopy != null) { return(taskCopy); } return(RefreshReplicationInformationTask = Task.Factory.StartNew(() => RefreshReplicationInformation(commands)) .ContinueWith(task => { if (task.Exception != null) { log.ErrorException("Failed to refresh replication information", task.Exception); } RefreshReplicationInformationTask = null; })); } }
public Task UpdateReplicationInformationIfNeeded(RavenFileSystemClient serverClient) #endif { if (Conventions.FailoverBehavior == FailoverBehavior.FailImmediately) { return(new CompletedTask()); } if (LastReplicationUpdate.AddMinutes(5) > SystemTime.UtcNow) { return(new CompletedTask()); } lock (replicationLock) { if (firstTime) { var serverHash = ServerHash.GetServerHash(serverClient.ServerUrl); var destinations = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); if (destinations != null) { UpdateReplicationInformationFromDocument(destinations); } } firstTime = false; if (LastReplicationUpdate.AddMinutes(5) > SystemTime.UtcNow) { return(new CompletedTask()); } var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) { return(taskCopy); } return(refreshReplicationInformationTask = RefreshReplicationInformationAsync(serverClient) .ContinueWith(task => { if (task.Exception != null) { log.ErrorException("Failed to refresh replication information", task.Exception); } refreshReplicationInformationTask = null; })); } }
/// <summary> /// Refreshes the replication information. /// Expert use only. /// </summary> public override void RefreshReplicationInformation(IAsyncFilesCommands commands) { lock (this) { var serverClient = (IAsyncFilesCommandsImpl)commands; string urlForFilename = serverClient.UrlFor(); var serverHash = ServerHash.GetServerHash(urlForFilename); JsonDocument document = null; try { var config = serverClient.Configuration.GetKeyAsync <RavenJObject>(SynchronizationConstants.RavenSynchronizationDestinations).Result; failureCounts[urlForFilename] = new FailureCounter(); // we just hit the master, so we can reset its failure count if (config != null) { var destinationsArray = config.Value <RavenJArray>("Destinations"); if (destinationsArray != null) { document = new JsonDocument(); document.DataAsJson = new RavenJObject() { { "Destinations", destinationsArray } }; } } } catch (Exception e) { log.ErrorException("Could not contact master for new replication information", e); document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); } if (document == null) { lastReplicationUpdate = SystemTime.UtcNow; // checked and not found return; } ReplicationInformerLocalCache.TrySavingReplicationInformationToLocalCache(serverHash, document); UpdateReplicationInformationFromDocument(document); lastReplicationUpdate = SystemTime.UtcNow; } }
private async Task UpdateTopology() { JsonOperationContext context; using (ContextPool.AllocateOperationContext(out context)) { var node = _topology.LeaderNode; var serverHash = ServerHash.GetServerHash(node.Url, node.Database); if (_firstTimeTryLoadFromTopologyCache) { _firstTimeTryLoadFromTopologyCache = false; var cachedTopology = TopologyLocalCache.TryLoadTopologyFromLocalCache(serverHash, context); if (cachedTopology != null && cachedTopology.Etag > 0) { _topology = cachedTopology; // we have cached topology, but we need to verify it is up to date, we'll check in // 1 second, and let the rest of the system start _updateTopologyTimer.Change(TimeSpan.FromSeconds(1), Timeout.InfiniteTimeSpan); return; } } var command = new GetTopologyCommand(); try { await ExecuteAsync(new ChoosenNode { Node = node }, context, command); if (_topology.Etag != command.Result.Etag) { _topology = command.Result; TopologyLocalCache.TrySavingTopologyToLocalCache(serverHash, _topology, context); } } catch (Exception ex) { if (Logger.IsInfoEnabled) { Logger.Info("Failed to update topology", ex); } } finally { _updateTopologyTimer.Change(TimeSpan.FromMinutes(5), Timeout.InfiniteTimeSpan); } } }
public Task UpdateReplicationInformationIfNeededAsync() { if (CountersConventions.FailoverBehavior == FailoverBehavior.FailImmediately) { return(new CompletedTask()); } var updateInterval = TimeSpan.FromMilliseconds(MaxIntervalBetweenUpdatesInMilliseconds); if (lastReplicationUpdate.AddMinutes(updateInterval.TotalMinutes) > SystemTime.UtcNow && firstTime == false) { return(new CompletedTask()); } lock (updateReplicationInformationSyncObj) { if (!firstTime) //first time the local cache is obviously empty { var serverHash = ServerHash.GetServerHash(counterStore.Url); var document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); if (IsInvalidDestinationsDocument(document) == false) { UpdateReplicationInformationFromDocument(document); } } firstTime = false; var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) { return(taskCopy); } return(refreshReplicationInformationTask = Task.Factory.StartNew(RefreshReplicationInformation) .ContinueWith(task => { if (task.Exception != null) { log.ErrorException("Failed to refresh replication information", task.Exception); } lastReplicationUpdate = SystemTime.UtcNow; refreshReplicationInformationTask = null; })); } }
/// <summary> /// Refreshes the replication information. /// Expert use only. /// </summary> public override void RefreshReplicationInformation(IAsyncFilesCommands commands) { lock (this) { var serverClient = (IAsyncFilesCommandsImpl)commands; var urlForFilename = serverClient.UrlFor(); var serverHash = ServerHash.GetServerHash(urlForFilename); JsonDocument document = null; try { var destinations = serverClient.Synchronization.GetDestinationsAsync().Result; FailureCounters.FailureCounts[urlForFilename] = new FailureCounter(); // we just hit the master, so we can reset its failure count if (destinations != null) { document = new JsonDocument { DataAsJson = new RavenJObject { { "Destinations", RavenJToken.FromObject(destinations) } } }; } } catch (Exception e) { Log.ErrorException("Could not contact master for new replication information", e); document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); } if (document == null) { lastReplicationUpdate = SystemTime.UtcNow; ReplicationDestinations.Clear(); // clear destinations that could be retrieved from local storage return; } if (IsInvalidDestinationsDocument(document) == false) { ReplicationInformerLocalCache.TrySavingReplicationInformationToLocalCache(serverHash, document); UpdateReplicationInformationFromDocument(document); } lastReplicationUpdate = SystemTime.UtcNow; } }
public List <DocumentStore> CreateRaftCluster(int numberOfNodes, string activeBundles = null, Action <DocumentStore> configureStore = null, [CallerMemberName] string databaseName = null, bool inMemory = true, bool fiddler = false) { if (configureStore == null) { configureStore = defaultConfigureStore; } var nodes = Enumerable.Range(0, numberOfNodes) .Select(x => GetNewServer(GetPort(), activeBundles: activeBundles, databaseName: databaseName, runInMemory: inMemory, configureConfig: configuration => { configuration.Cluster.ElectionTimeout *= 10; configuration.Cluster.HeartbeatTimeout *= 10; })) .ToList(); var allNodesFinishedJoining = new ManualResetEventSlim(); var random = new Random(); var leader = nodes[random.Next(0, numberOfNodes - 1)]; leader.Options.ClusterManager.Value.InitializeTopology(forceCandidateState: true); Assert.True(leader.Options.ClusterManager.Value.Engine.WaitForLeader(), "Leader was not elected by himself in time"); leader.Options.ClusterManager.Value.Engine.TopologyChanged += command => { if (command.Requested.AllNodeNames.All(command.Requested.IsVoter)) { allNodesFinishedJoining.Set(); } }; for (var i = 0; i < numberOfNodes; i++) { var n = nodes[i]; if (n == leader) { continue; } Assert.True(leader.Options.ClusterManager.Value.Engine.AddToClusterAsync(new NodeConnectionInfo { Name = RaftHelper.GetNodeName(n.SystemDatabase.TransactionalStorage.Id), Uri = RaftHelper.GetNodeUrl(n.SystemDatabase.Configuration.ServerUrl) }).Wait(3000), "Failed to add node to cluster"); } if (numberOfNodes == 1) { allNodesFinishedJoining.Set(); } Assert.True(allNodesFinishedJoining.Wait(10000 * numberOfNodes), "Not all nodes become voters. " + leader.Options.ClusterManager.Value.Engine.CurrentTopology); Assert.True(leader.Options.ClusterManager.Value.Engine.WaitForLeader(), "Wait for leader timedout"); WaitForClusterToBecomeNonStale(nodes); foreach (var node in nodes) { var url = node.SystemDatabase.ServerUrl.ForDatabase(databaseName); var serverHash = ServerHash.GetServerHash(url); ReplicationInformerLocalCache.ClearClusterNodesInformationLocalCache(serverHash); ReplicationInformerLocalCache.ClearReplicationInformationFromLocalCache(serverHash); } var documentStores = nodes .Select(node => NewRemoteDocumentStore(ravenDbServer: node, fiddler: fiddler, activeBundles: activeBundles, configureStore: configureStore, databaseName: databaseName)) .ToList(); foreach (var documentStore in documentStores) { ((ClusterAwareRequestExecuter)((ServerClient)documentStore.DatabaseCommands).RequestExecuter).WaitForLeaderTimeout = TimeSpan.FromSeconds(30); } return(documentStores); }
private Task UpdateReplicationInformationForCluster(AsyncServerClient serverClient, OperationMetadata primaryNode, Func <OperationMetadata, Task <ReplicationDocumentWithClusterInformation> > getReplicationDestinationsTask) { lock (this) { var serverHash = ServerHash.GetServerHash(primaryNode.Url); var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) { return(taskCopy); } if (firstTime) { firstTime = false; var document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); var nodes = GetNodes(primaryNode, document?.DataAsJson.JsonDeserialization <ReplicationDocumentWithClusterInformation>()); if (nodes != null) { Nodes = nodes; var newLeaderNode = GetLeaderNode(Nodes); if (newLeaderNode != null) { if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, Leader is {LeaderNode}\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToKnownLeader(newLeaderNode); return(new CompletedTask()); } if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, no leader found.\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToNull(); } } return(refreshReplicationInformationTask = Task.Factory.StartNew(() => { var tryFailoverServers = false; var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0; for (;;) { //taking a snapshot so we could tell if the value changed while we fetch the topology var prevLeader = LeaderNode; var nodes = NodeUrls.ToHashSet(); if (tryFailoverServers == false) { if (nodes.Count == 0) { nodes.Add(primaryNode); } } else { nodes.Add(primaryNode); // always check primary node during failover check foreach (var failoverServer in FailoverServers) { var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster); if (node != null) { nodes.Add(node); } } triedFailoverServers = true; } var replicationDocuments = nodes .Select(operationMetadata => new { Node = operationMetadata, Task = getReplicationDestinationsTask(operationMetadata) }) .ToArray(); var tasks = replicationDocuments .Select(x => (Task)x.Task) .ToArray(); var tasksCompleted = Task.WaitAll(tasks, ReplicationDestinationsTopologyTimeout); if (Log.IsDebugEnabled && tasksCompleted == false) { Log.Debug($"During fetch topology {tasks.Count(t=>t.IsCompleted)} servers have responded out of {tasks.Length}"); } replicationDocuments.ForEach(x => { if (x.Task.IsCompleted && x.Task.Result != null) { FailureCounters.ResetFailureCount(x.Node.Url); } }); var newestTopology = replicationDocuments .Where(x => x.Task.IsCompleted && x.Task.Result != null) .OrderByDescending(x => x.Task.Result.Term) .ThenByDescending(x => { var index = x.Task.Result.ClusterCommitIndex; return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index; }) .FirstOrDefault(); if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false) { tryFailoverServers = true; } if (newestTopology == null && triedFailoverServers) { if (Log.IsDebugEnabled) { Log.Debug($"Fetching topology resulted with no topology, tried failoever servers, setting leader node to primary node ({primaryNode})."); } //if the leader Node is not null this means that somebody updated it, we don't want to overwrite it with the primary. // i'm rasing the leader changed event although we don't have a real leader because some tests don't wait for leader but actually any node //Todo: change back to: if (SetLeaderNodeIfLeaderIsNull(primaryNode, false) == false) if (SetLeaderNodeIfLeaderIsNull(primaryNode) == false) { return; } if (Nodes.Count == 0) { Nodes = new List <OperationMetadata> { primaryNode } } ; return; } if (newestTopology != null) { var replicationDocument = newestTopology.Task.Result; var node = newestTopology.Node; if (UpdateTopology(serverClient, node, replicationDocument, serverHash, prevLeader)) { return; } } Thread.Sleep(500); } }).ContinueWith(t => { lastUpdate = SystemTime.UtcNow; refreshReplicationInformationTask = null; })); } }
private Task UpdateReplicationInformationForCluster(AsyncServerClient serverClient, OperationMetadata primaryNode, Func <OperationMetadata, Task <ReplicationDocumentWithClusterInformation> > getReplicationDestinationsTask) { lock (this) { var serverHash = ServerHash.GetServerHash(primaryNode.Url); var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) { return(taskCopy); } if (firstTime) { firstTime = false; var document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); var nodes = GetNodes(primaryNode, document?.DataAsJson.JsonDeserialization <ReplicationDocumentWithClusterInformation>()); if (nodes != null) { Nodes = nodes; var newLeaderNode = GetLeaderNode(Nodes); if (newLeaderNode != null) { if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, Leader is {LeaderNode}\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToKnownLeader(newLeaderNode); return(new CompletedTask()); } if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, no leader found.\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToNull(); } } return(refreshReplicationInformationTask = Task.Factory.StartNew(async() => { var tryFailoverServers = false; var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0; for (; ;) { //taking a snapshot so we could tell if the value changed while we fetch the topology var prevLeader = LeaderNode; var nodes = NodeUrls.ToHashSet(); if (tryFailoverServers == false) { if (nodes.Count == 0) { nodes.Add(primaryNode); } } else { nodes.Add(primaryNode); // always check primary node during failover check foreach (var failoverServer in FailoverServers) { var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster); if (node != null) { nodes.Add(node); } } triedFailoverServers = true; } var replicationDocuments = nodes .Select(operationMetadata => new { Node = operationMetadata, Task = getReplicationDestinationsTask(operationMetadata), }) .ToArray(); var tasks = replicationDocuments .Select(x => (Task)x.Task) .ToArray(); var tasksCompleted = Task.WaitAll(tasks, ReplicationDestinationsTopologyTimeout); if (Log.IsDebugEnabled && tasksCompleted == false) { Log.Debug($"During fetch topology {tasks.Count(t => t.IsCompleted)} servers have responded out of {tasks.Length}"); } replicationDocuments.ForEach(x => { if (x.Task.IsCompleted && x.Task.Result != null) { FailureCounters.ResetFailureCount(x.Node.Url); } }); var newestTopologies = replicationDocuments .Where(x => x.Task.IsCompleted && x.Task.Result != null) .OrderByDescending(x => x.Task.Result.Term) .ThenByDescending(x => { var index = x.Task.Result.ClusterCommitIndex; return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index; }).ToList(); var newestTopology = newestTopologies.FirstOrDefault(); var hasLeaderCount = replicationDocuments .Count(x => x.Task.IsCompleted && x.Task.Result != null && x.Task.Result.HasLeader); if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false) { tryFailoverServers = true; } if (newestTopology == null && triedFailoverServers) { if (Log.IsDebugEnabled) { Log.Debug($"Fetching topology resulted with no topology, tried failoever servers, setting leader node to primary node ({primaryNode})."); } //if the leader Node is not null this means that somebody updated it, we don't want to overwrite it with the primary. // i'm raising the leader changed event although we don't have a real leader because some tests don't wait for leader but actually any node //Todo: change back to: if (SetLeaderNodeIfLeaderIsNull(primaryNode, false) == false) if (SetLeaderNodeIfLeaderIsNull(primaryNode) == false) { return; } if (Nodes.Count == 0) { Nodes = new List <OperationMetadata> { primaryNode } } ; return; } if (Log.IsDebugEnabled) { foreach (var x in replicationDocuments) { Log.Debug($"Topology fetched from {x.Node.Url}"); Log.Debug($"{JsonConvert.SerializeObject(x.Task?.Result)}"); } } var majorityOfNodesAgreeThereIsLeader = Nodes.Count == 1 || hasLeaderCount > (newestTopology?.Task.Result.Destinations.Count + 1) / 2; if (newestTopology != null && majorityOfNodesAgreeThereIsLeader) { var replicationDocument = newestTopology.Task.Result; var node = newestTopology.Node; if (newestTopologies.Count > 1 && node.Url.Equals(serverClient.Url) == false) { // we got the replication document not from the primary url // need to add the node url destination to the destinations // (we know it exists since we have majority of nodes that agree on the leader) // and remove the primary url destination from the destinations var sourceNode = node; var destination = replicationDocument.Destinations .FirstOrDefault(x => DestinationUrl(x.Url, x.Database).Equals(serverClient.Url, StringComparison.OrdinalIgnoreCase)); if (destination != null) { replicationDocument.Destinations.Remove(destination); // we need to update the cluster information of the primary url for this node replicationDocument.ClusterInformation = destination.ClusterInformation; node = ConvertReplicationDestinationToOperationMetadata(destination, destination.ClusterInformation); } destination = destination ?? replicationDocument.Destinations.FirstOrDefault(); if (destination != null) { var database = destination.Database; var networkCredentials = sourceNode.Credentials?.Credentials as NetworkCredential; replicationDocument.Destinations.Add(new ReplicationDestination.ReplicationDestinationWithClusterInformation { Url = sourceNode.Url, Database = database, ApiKey = sourceNode.Credentials?.ApiKey, Username = networkCredentials?.UserName, Password = networkCredentials?.Password, Domain = networkCredentials?.Domain, ClusterInformation = sourceNode.ClusterInformation }); } } if (UpdateTopology(serverClient, node, replicationDocument, serverHash, prevLeader)) { return; } } await Task.Delay(3000).ConfigureAwait(false); } }).ContinueWith(t => { lastUpdate = SystemTime.UtcNow; refreshReplicationInformationTask = null; })); } }
private Task UpdateReplicationInformationForCluster(OperationMetadata primaryNode, Func<OperationMetadata, Task<ReplicationDocumentWithClusterInformation>> getReplicationDestinationsTask) { lock (this) { var serverHash = ServerHash.GetServerHash(primaryNode.Url); var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) return taskCopy; if (firstTime) { firstTime = false; var nodes = ReplicationInformerLocalCache.TryLoadClusterNodesFromLocalCache(serverHash); if (nodes != null) { Nodes = nodes; LeaderNode = GetLeaderNode(Nodes); if (LeaderNode != null) return new CompletedTask(); } } return refreshReplicationInformationTask = Task.Factory.StartNew(() => { var tryFailoverServers = false; var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0; for (; ; ) { var nodes = NodeUrls.ToHashSet(); if (tryFailoverServers == false) { if (nodes.Count == 0) nodes.Add(primaryNode); } else { nodes.Add(primaryNode); // always check primary node during failover check foreach (var failoverServer in FailoverServers) { var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster); if (node != null) nodes.Add(node); } triedFailoverServers = true; } var replicationDocuments = nodes .Select(operationMetadata => new { Node = operationMetadata, Task = getReplicationDestinationsTask(operationMetadata) }) .ToArray(); var tasks = replicationDocuments .Select(x => x.Task) .ToArray(); Task.WaitAll(tasks); replicationDocuments.ForEach(x => { if (x.Task.Result == null) return; FailureCounters.ResetFailureCount(x.Node.Url); }); var newestTopology = replicationDocuments .Where(x => x.Task.Result != null) .OrderByDescending(x => x.Task.Result.Term) .ThenByDescending(x => { var index = x.Task.Result.ClusterCommitIndex; return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index; }) .FirstOrDefault(); if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false) tryFailoverServers = true; if (newestTopology == null && triedFailoverServers) { LeaderNode = primaryNode; Nodes = new List<OperationMetadata> { primaryNode }; return; } if (newestTopology != null) { Nodes = GetNodes(newestTopology.Node, newestTopology.Task.Result); LeaderNode = newestTopology.Task.Result.ClusterInformation.IsLeader ? Nodes.FirstOrDefault(n => n.Url == newestTopology.Node.Url) : null; ReplicationInformerLocalCache.TrySavingClusterNodesToLocalCache(serverHash, Nodes); if (LeaderNode != null) return; } Thread.Sleep(500); } }).ContinueWith(t => { lastUpdate = SystemTime.UtcNow; refreshReplicationInformationTask = null; }); } }