private Task UpdateReplicationInformationForCluster(OperationMetadata primaryNode, Func<OperationMetadata, Task<ReplicationDocumentWithClusterInformation>> getReplicationDestinationsTask) { lock (this) { var serverHash = ServerHash.GetServerHash(primaryNode.Url); var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) return taskCopy; if (firstTime) { firstTime = false; var nodes = ReplicationInformerLocalCache.TryLoadClusterNodesFromLocalCache(serverHash); if (nodes != null) { Nodes = nodes; LeaderNode = GetLeaderNode(Nodes); if (LeaderNode != null) return new CompletedTask(); } } return refreshReplicationInformationTask = Task.Factory.StartNew(() => { var tryFailoverServers = false; var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0; for (; ; ) { var nodes = NodeUrls.ToHashSet(); if (tryFailoverServers == false) { if (nodes.Count == 0) nodes.Add(primaryNode); } else { nodes.Add(primaryNode); // always check primary node during failover check foreach (var failoverServer in FailoverServers) { var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster); if (node != null) nodes.Add(node); } triedFailoverServers = true; } var replicationDocuments = nodes .Select(operationMetadata => new { Node = operationMetadata, Task = getReplicationDestinationsTask(operationMetadata) }) .ToArray(); var tasks = replicationDocuments .Select(x => x.Task) .ToArray(); Task.WaitAll(tasks); replicationDocuments.ForEach(x => { if (x.Task.Result == null) return; FailureCounters.ResetFailureCount(x.Node.Url); }); var newestTopology = replicationDocuments .Where(x => x.Task.Result != null) .OrderByDescending(x => x.Task.Result.Term) .ThenByDescending(x => { var index = x.Task.Result.ClusterCommitIndex; return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index; }) .FirstOrDefault(); if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false) tryFailoverServers = true; if (newestTopology == null && triedFailoverServers) { LeaderNode = primaryNode; Nodes = new List<OperationMetadata> { primaryNode }; return; } if (newestTopology != null) { Nodes = GetNodes(newestTopology.Node, newestTopology.Task.Result); LeaderNode = newestTopology.Task.Result.ClusterInformation.IsLeader ? Nodes.FirstOrDefault(n => n.Url == newestTopology.Node.Url) : null; ReplicationInformerLocalCache.TrySavingClusterNodesToLocalCache(serverHash, Nodes); if (LeaderNode != null) return; } Thread.Sleep(500); } }).ContinueWith(t => { lastUpdate = SystemTime.UtcNow; refreshReplicationInformationTask = null; }); } }
private Task UpdateReplicationInformationForCluster(AsyncServerClient serverClient, OperationMetadata primaryNode, Func <OperationMetadata, Task <ReplicationDocumentWithClusterInformation> > getReplicationDestinationsTask) { lock (this) { var serverHash = ServerHash.GetServerHash(primaryNode.Url); var taskCopy = refreshReplicationInformationTask; if (taskCopy != null) { return(taskCopy); } if (firstTime) { firstTime = false; var nodes = ReplicationInformerLocalCache.TryLoadClusterNodesFromLocalCache(serverHash); if (nodes != null) { Nodes = nodes; var newLeaderNode = GetLeaderNode(Nodes); if (newLeaderNode != null) { if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, Leader is {LeaderNode}\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToKnownLeader(newLeaderNode); return(new CompletedTask()); } if (Log.IsDebugEnabled) { Log.Debug($"Fetched topology from cache, no leader found.\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url))); } SetLeaderNodeToNull(); } } return(refreshReplicationInformationTask = Task.Factory.StartNew(() => { var tryFailoverServers = false; var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0; for (;;) { //taking a snapshot so we could tell if the value changed while we fetch the topology var prevLeader = LeaderNode; var nodes = NodeUrls.ToHashSet(); if (tryFailoverServers == false) { if (nodes.Count == 0) { nodes.Add(primaryNode); } } else { nodes.Add(primaryNode); // always check primary node during failover check foreach (var failoverServer in FailoverServers) { var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster); if (node != null) { nodes.Add(node); } } triedFailoverServers = true; } var replicationDocuments = nodes .Select(operationMetadata => new { Node = operationMetadata, Task = getReplicationDestinationsTask(operationMetadata) }) .ToArray(); var tasks = replicationDocuments .Select(x => (Task)x.Task) .ToArray(); var tasksCompleted = Task.WaitAll(tasks, ReplicationDestinationsTopologyTimeout); if (Log.IsDebugEnabled && tasksCompleted == false) { Log.Debug($"During fetch topology {tasks.Count(t=>t.IsCompleted)} servers have responded out of {tasks.Length}"); } replicationDocuments.ForEach(x => { if (x.Task.IsCompleted && x.Task.Result != null) { FailureCounters.ResetFailureCount(x.Node.Url); } }); var newestTopology = replicationDocuments .Where(x => x.Task.IsCompleted && x.Task.Result != null) .OrderByDescending(x => x.Task.Result.Term) .ThenByDescending(x => { var index = x.Task.Result.ClusterCommitIndex; return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index; }) .FirstOrDefault(); if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false) { tryFailoverServers = true; } if (newestTopology == null && triedFailoverServers) { if (Log.IsDebugEnabled) { Log.Debug($"Fetching topology resulted with no topology, tried failoever servers, setting leader node to primary node ({primaryNode})."); } //if the leader Node is not null this means that somebody updated it, we don't want to overwrite it with the primary. // i'm rasing the leader changed event although we don't have a real leader because some tests don't wait for leader but actually any node //Todo: change back to: if (SetLeaderNodeIfLeaderIsNull(primaryNode, false) == false) if (SetLeaderNodeIfLeaderIsNull(primaryNode) == false) { return; } if (Nodes.Count == 0) { Nodes = new List <OperationMetadata> { primaryNode } } ; return; } if (newestTopology != null) { Nodes = GetNodes(newestTopology.Node, newestTopology.Task.Result); var newLeader = newestTopology.Task.Result.ClusterInformation.IsLeader ? Nodes.FirstOrDefault(n => n.Url == newestTopology.Node.Url) : null; ReplicationInformerLocalCache.TrySavingClusterNodesToLocalCache(serverHash, Nodes); if (newestTopology.Task.Result.ClientConfiguration != null) { if (newestTopology.Task.Result.ClientConfiguration.FailoverBehavior == null) { if (Log.IsDebugEnabled) { Log.Debug($"Server side failoever configuration is set to let client decide, client decided on {serverClient.convention.FailoverBehavior}. "); } newestTopology.Task.Result.ClientConfiguration.FailoverBehavior = serverClient.convention.FailoverBehavior; } else if (Log.IsDebugEnabled) { Log.Debug($"Server enforced failoever behavior {newestTopology.Task.Result.ClientConfiguration.FailoverBehavior}. "); } serverClient.convention.UpdateFrom(newestTopology.Task.Result.ClientConfiguration); } if (newLeader != null) { SetLeaderNodeToKnownLeader(newLeader); return; } //here we try to set leader node to null but we might fail since it was changed. //We just need to make sure that the leader node is not null and we can stop searching. if (SetLeaderNodeToNullIfPrevIsTheSame(prevLeader) == false && LeaderNode != null) { return; } } Thread.Sleep(500); } }).ContinueWith(t => { lastUpdate = SystemTime.UtcNow; refreshReplicationInformationTask = null; })); } }