private Task UpdateReplicationInformationForCluster(AsyncServerClient serverClient, OperationMetadata primaryNode, Func <OperationMetadata, Task <ReplicationDocumentWithClusterInformation> > getReplicationDestinationsTask) { lock (this) { var serverHash = ServerHash.GetServerHash(primaryNode.Url); var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) { return(taskCopy); } if (firstTime) { firstTime = false; var document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); var nodes = GetNodes(primaryNode, document?.DataAsJson.JsonDeserialization <ReplicationDocumentWithClusterInformation>()); if (nodes != null) { Nodes = nodes; var newLeaderNode = GetLeaderNode(Nodes); if (newLeaderNode != null) { if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, Leader is {LeaderNode}\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToKnownLeader(newLeaderNode); return(new CompletedTask()); } if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, no leader found.\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToNull(); } } return(refreshReplicationInformationTask = Task.Factory.StartNew(() => { var tryFailoverServers = false; var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0; for (;;) { //taking a snapshot so we could tell if the value changed while we fetch the topology var prevLeader = LeaderNode; var nodes = NodeUrls.ToHashSet(); if (tryFailoverServers == false) { if (nodes.Count == 0) { nodes.Add(primaryNode); } } else { nodes.Add(primaryNode); // always check primary node during failover check foreach (var failoverServer in FailoverServers) { var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster); if (node != null) { nodes.Add(node); } } triedFailoverServers = true; } var replicationDocuments = nodes .Select(operationMetadata => new { Node = operationMetadata, Task = getReplicationDestinationsTask(operationMetadata) }) .ToArray(); var tasks = replicationDocuments .Select(x => (Task)x.Task) .ToArray(); var tasksCompleted = Task.WaitAll(tasks, ReplicationDestinationsTopologyTimeout); if (Log.IsDebugEnabled && tasksCompleted == false) { Log.Debug($"During fetch topology {tasks.Count(t=>t.IsCompleted)} servers have responded out of {tasks.Length}"); } replicationDocuments.ForEach(x => { if (x.Task.IsCompleted && x.Task.Result != null) { FailureCounters.ResetFailureCount(x.Node.Url); } }); var newestTopology = replicationDocuments .Where(x => x.Task.IsCompleted && x.Task.Result != null) .OrderByDescending(x => x.Task.Result.Term) .ThenByDescending(x => { var index = x.Task.Result.ClusterCommitIndex; return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index; }) .FirstOrDefault(); if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false) { tryFailoverServers = true; } if (newestTopology == null && triedFailoverServers) { if (Log.IsDebugEnabled) { Log.Debug($"Fetching topology resulted with no topology, tried failoever servers, setting leader node to primary node ({primaryNode})."); } //if the leader Node is not null this means that somebody updated it, we don't want to overwrite it with the primary. // i'm rasing the leader changed event although we don't have a real leader because some tests don't wait for leader but actually any node //Todo: change back to: if (SetLeaderNodeIfLeaderIsNull(primaryNode, false) == false) if (SetLeaderNodeIfLeaderIsNull(primaryNode) == false) { return; } if (Nodes.Count == 0) { Nodes = new List <OperationMetadata> { primaryNode } } ; return; } if (newestTopology != null) { var replicationDocument = newestTopology.Task.Result; var node = newestTopology.Node; if (UpdateTopology(serverClient, node, replicationDocument, serverHash, prevLeader)) { return; } } Thread.Sleep(500); } }).ContinueWith(t => { lastUpdate = SystemTime.UtcNow; refreshReplicationInformationTask = null; })); } }
private Task UpdateReplicationInformationForCluster(OperationMetadata primaryNode, Func<OperationMetadata, Task<ReplicationDocumentWithClusterInformation>> getReplicationDestinationsTask) { lock (this) { var serverHash = ServerHash.GetServerHash(primaryNode.Url); var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) return taskCopy; if (firstTime) { firstTime = false; var nodes = ReplicationInformerLocalCache.TryLoadClusterNodesFromLocalCache(serverHash); if (nodes != null) { Nodes = nodes; LeaderNode = GetLeaderNode(Nodes); if (LeaderNode != null) return new CompletedTask(); } } return refreshReplicationInformationTask = Task.Factory.StartNew(() => { var tryFailoverServers = false; var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0; for (; ; ) { var nodes = NodeUrls.ToHashSet(); if (tryFailoverServers == false) { if (nodes.Count == 0) nodes.Add(primaryNode); } else { nodes.Add(primaryNode); // always check primary node during failover check foreach (var failoverServer in FailoverServers) { var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster); if (node != null) nodes.Add(node); } triedFailoverServers = true; } var replicationDocuments = nodes .Select(operationMetadata => new { Node = operationMetadata, Task = getReplicationDestinationsTask(operationMetadata) }) .ToArray(); var tasks = replicationDocuments .Select(x => x.Task) .ToArray(); Task.WaitAll(tasks); replicationDocuments.ForEach(x => { if (x.Task.Result == null) return; FailureCounters.ResetFailureCount(x.Node.Url); }); var newestTopology = replicationDocuments .Where(x => x.Task.Result != null) .OrderByDescending(x => x.Task.Result.Term) .ThenByDescending(x => { var index = x.Task.Result.ClusterCommitIndex; return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index; }) .FirstOrDefault(); if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false) tryFailoverServers = true; if (newestTopology == null && triedFailoverServers) { LeaderNode = primaryNode; Nodes = new List<OperationMetadata> { primaryNode }; return; } if (newestTopology != null) { Nodes = GetNodes(newestTopology.Node, newestTopology.Task.Result); LeaderNode = newestTopology.Task.Result.ClusterInformation.IsLeader ? Nodes.FirstOrDefault(n => n.Url == newestTopology.Node.Url) : null; ReplicationInformerLocalCache.TrySavingClusterNodesToLocalCache(serverHash, Nodes); if (LeaderNode != null) return; } Thread.Sleep(500); } }).ContinueWith(t => { lastUpdate = SystemTime.UtcNow; refreshReplicationInformationTask = null; }); } }
private Task UpdateReplicationInformationForCluster(AsyncServerClient serverClient, OperationMetadata primaryNode, Func <OperationMetadata, Task <ReplicationDocumentWithClusterInformation> > getReplicationDestinationsTask) { lock (this) { var serverHash = ServerHash.GetServerHash(primaryNode.Url); var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) { return(taskCopy); } if (firstTime) { firstTime = false; var document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash); var nodes = GetNodes(primaryNode, document?.DataAsJson.JsonDeserialization <ReplicationDocumentWithClusterInformation>()); if (nodes != null) { Nodes = nodes; var newLeaderNode = GetLeaderNode(Nodes); if (newLeaderNode != null) { if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, Leader is {LeaderNode}\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToKnownLeader(newLeaderNode); return(new CompletedTask()); } if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, no leader found.\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToNull(); } } return(refreshReplicationInformationTask = Task.Factory.StartNew(async() => { var tryFailoverServers = false; var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0; for (; ;) { //taking a snapshot so we could tell if the value changed while we fetch the topology var prevLeader = LeaderNode; var nodes = NodeUrls.ToHashSet(); if (tryFailoverServers == false) { if (nodes.Count == 0) { nodes.Add(primaryNode); } } else { nodes.Add(primaryNode); // always check primary node during failover check foreach (var failoverServer in FailoverServers) { var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster); if (node != null) { nodes.Add(node); } } triedFailoverServers = true; } var replicationDocuments = nodes .Select(operationMetadata => new { Node = operationMetadata, Task = getReplicationDestinationsTask(operationMetadata), }) .ToArray(); var tasks = replicationDocuments .Select(x => (Task)x.Task) .ToArray(); var tasksCompleted = Task.WaitAll(tasks, ReplicationDestinationsTopologyTimeout); if (Log.IsDebugEnabled && tasksCompleted == false) { Log.Debug($"During fetch topology {tasks.Count(t => t.IsCompleted)} servers have responded out of {tasks.Length}"); } replicationDocuments.ForEach(x => { if (x.Task.IsCompleted && x.Task.Result != null) { FailureCounters.ResetFailureCount(x.Node.Url); } }); var newestTopologies = replicationDocuments .Where(x => x.Task.IsCompleted && x.Task.Result != null) .OrderByDescending(x => x.Task.Result.Term) .ThenByDescending(x => { var index = x.Task.Result.ClusterCommitIndex; return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index; }).ToList(); var newestTopology = newestTopologies.FirstOrDefault(); var hasLeaderCount = replicationDocuments .Count(x => x.Task.IsCompleted && x.Task.Result != null && x.Task.Result.HasLeader); if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false) { tryFailoverServers = true; } if (newestTopology == null && triedFailoverServers) { if (Log.IsDebugEnabled) { Log.Debug($"Fetching topology resulted with no topology, tried failoever servers, setting leader node to primary node ({primaryNode})."); } //if the leader Node is not null this means that somebody updated it, we don't want to overwrite it with the primary. // i'm raising the leader changed event although we don't have a real leader because some tests don't wait for leader but actually any node //Todo: change back to: if (SetLeaderNodeIfLeaderIsNull(primaryNode, false) == false) if (SetLeaderNodeIfLeaderIsNull(primaryNode) == false) { return; } if (Nodes.Count == 0) { Nodes = new List <OperationMetadata> { primaryNode } } ; return; } if (Log.IsDebugEnabled) { foreach (var x in replicationDocuments) { Log.Debug($"Topology fetched from {x.Node.Url}"); Log.Debug($"{JsonConvert.SerializeObject(x.Task?.Result)}"); } } var majorityOfNodesAgreeThereIsLeader = Nodes.Count == 1 || hasLeaderCount > (newestTopology?.Task.Result.Destinations.Count + 1) / 2; if (newestTopology != null && majorityOfNodesAgreeThereIsLeader) { var replicationDocument = newestTopology.Task.Result; var node = newestTopology.Node; if (newestTopologies.Count > 1 && node.Url.Equals(serverClient.Url) == false) { // we got the replication document not from the primary url // need to add the node url destination to the destinations // (we know it exists since we have majority of nodes that agree on the leader) // and remove the primary url destination from the destinations var sourceNode = node; var destination = replicationDocument.Destinations .FirstOrDefault(x => DestinationUrl(x.Url, x.Database).Equals(serverClient.Url, StringComparison.OrdinalIgnoreCase)); if (destination != null) { replicationDocument.Destinations.Remove(destination); // we need to update the cluster information of the primary url for this node replicationDocument.ClusterInformation = destination.ClusterInformation; node = ConvertReplicationDestinationToOperationMetadata(destination, destination.ClusterInformation); } destination = destination ?? replicationDocument.Destinations.FirstOrDefault(); if (destination != null) { var database = destination.Database; var networkCredentials = sourceNode.Credentials?.Credentials as NetworkCredential; replicationDocument.Destinations.Add(new ReplicationDestination.ReplicationDestinationWithClusterInformation { Url = sourceNode.Url, Database = database, ApiKey = sourceNode.Credentials?.ApiKey, Username = networkCredentials?.UserName, Password = networkCredentials?.Password, Domain = networkCredentials?.Domain, ClusterInformation = sourceNode.ClusterInformation }); } } if (UpdateTopology(serverClient, node, replicationDocument, serverHash, prevLeader)) { return; } } await Task.Delay(3000).ConfigureAwait(false); } }).ContinueWith(t => { lastUpdate = SystemTime.UtcNow; refreshReplicationInformationTask = null; })); } }
public string[] GetClusterUrls() { return(NodeUrls.Split(',')); }