private Task UpdateReplicationInformationForCluster(OperationMetadata primaryNode, Func<OperationMetadata, Task<ReplicationDocumentWithClusterInformation>> getReplicationDestinationsTask)
        {
            lock (this)
            {
                var serverHash = ServerHash.GetServerHash(primaryNode.Url);

                var taskCopy = refreshReplicationInformationTask;
                if (taskCopy != null)
                    return taskCopy;

                if (firstTime)
                {
                    firstTime = false;

                    var nodes = ReplicationInformerLocalCache.TryLoadClusterNodesFromLocalCache(serverHash);
                    if (nodes != null)
                    {
                        Nodes = nodes;
                        LeaderNode = GetLeaderNode(Nodes);

                        if (LeaderNode != null)
                            return new CompletedTask();
                    }
                }

                return refreshReplicationInformationTask = Task.Factory.StartNew(() =>
                {
                    var tryFailoverServers = false;
                    var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0;
                    for (; ; )
                    {
                        var nodes = NodeUrls.ToHashSet();

                        if (tryFailoverServers == false)
                        {
                            if (nodes.Count == 0)
                                nodes.Add(primaryNode);
                        }
                        else
                        {
                            nodes.Add(primaryNode); // always check primary node during failover check

                            foreach (var failoverServer in FailoverServers)
                            {
                                var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster);
                                if (node != null)
                                    nodes.Add(node);
                            }

                            triedFailoverServers = true;
                        }

                        var replicationDocuments = nodes
                            .Select(operationMetadata => new
                            {
                                Node = operationMetadata,
                                Task = getReplicationDestinationsTask(operationMetadata)
                            })
                            .ToArray();

                        var tasks = replicationDocuments
                            .Select(x => x.Task)
                            .ToArray();

                        Task.WaitAll(tasks);

                        replicationDocuments.ForEach(x =>
                        {
                            if (x.Task.Result == null)
                                return;

                            FailureCounters.ResetFailureCount(x.Node.Url);
                        });

                        var newestTopology = replicationDocuments
                            .Where(x => x.Task.Result != null)
                            .OrderByDescending(x => x.Task.Result.Term)
                            .ThenByDescending(x =>
                             {
                                var index = x.Task.Result.ClusterCommitIndex;
                                return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index;
                             })
                            .FirstOrDefault();

                        if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false)
                            tryFailoverServers = true;

                        if (newestTopology == null && triedFailoverServers)
                        {
                            LeaderNode = primaryNode;
                            Nodes = new List<OperationMetadata>
                            {
                                primaryNode
                            };
                            return;
                        }

                        if (newestTopology != null)
                        {
                            Nodes = GetNodes(newestTopology.Node, newestTopology.Task.Result);
                            LeaderNode = newestTopology.Task.Result.ClusterInformation.IsLeader ?
                                Nodes.FirstOrDefault(n => n.Url == newestTopology.Node.Url) : null;

                            ReplicationInformerLocalCache.TrySavingClusterNodesToLocalCache(serverHash, Nodes);

                            if (LeaderNode != null)
                                return;
                        }

                        Thread.Sleep(500);
                    }
                }).ContinueWith(t =>
                {
                    lastUpdate = SystemTime.UtcNow;
                    refreshReplicationInformationTask = null;
                });
            }
        }
Beispiel #2
0
        private Task UpdateReplicationInformationForCluster(AsyncServerClient serverClient, OperationMetadata primaryNode, Func <OperationMetadata, Task <ReplicationDocumentWithClusterInformation> > getReplicationDestinationsTask)
        {
            lock (this)
            {
                var serverHash = ServerHash.GetServerHash(primaryNode.Url);

                var taskCopy = refreshReplicationInformationTask;
                if (taskCopy != null)
                {
                    return(taskCopy);
                }

                if (firstTime)
                {
                    firstTime = false;

                    var nodes = ReplicationInformerLocalCache.TryLoadClusterNodesFromLocalCache(serverHash);
                    if (nodes != null)
                    {
                        Nodes = nodes;
                        var newLeaderNode = GetLeaderNode(Nodes);
                        if (newLeaderNode != null)
                        {
                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug($"Fetched topology from cache, Leader is {LeaderNode}\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url)));
                            }
                            SetLeaderNodeToKnownLeader(newLeaderNode);
                            return(new CompletedTask());
                        }
                        if (Log.IsDebugEnabled)
                        {
                            Log.Debug($"Fetched topology from cache, no leader found.\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url)));
                        }
                        SetLeaderNodeToNull();
                    }
                }

                return(refreshReplicationInformationTask = Task.Factory.StartNew(() =>
                {
                    var tryFailoverServers = false;
                    var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0;
                    for (;;)
                    {
                        //taking a snapshot so we could tell if the value changed while we fetch the topology
                        var prevLeader = LeaderNode;
                        var nodes = NodeUrls.ToHashSet();

                        if (tryFailoverServers == false)
                        {
                            if (nodes.Count == 0)
                            {
                                nodes.Add(primaryNode);
                            }
                        }
                        else
                        {
                            nodes.Add(primaryNode); // always check primary node during failover check

                            foreach (var failoverServer in FailoverServers)
                            {
                                var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster);
                                if (node != null)
                                {
                                    nodes.Add(node);
                                }
                            }

                            triedFailoverServers = true;
                        }

                        var replicationDocuments = nodes
                                                   .Select(operationMetadata => new
                        {
                            Node = operationMetadata,
                            Task = getReplicationDestinationsTask(operationMetadata)
                        })
                                                   .ToArray();

                        var tasks = replicationDocuments
                                    .Select(x => (Task)x.Task)
                                    .ToArray();

                        var tasksCompleted = Task.WaitAll(tasks, ReplicationDestinationsTopologyTimeout);
                        if (Log.IsDebugEnabled && tasksCompleted == false)
                        {
                            Log.Debug($"During fetch topology {tasks.Count(t=>t.IsCompleted)} servers have responded out of {tasks.Length}");
                        }
                        replicationDocuments.ForEach(x =>
                        {
                            if (x.Task.IsCompleted && x.Task.Result != null)
                            {
                                FailureCounters.ResetFailureCount(x.Node.Url);
                            }
                        });

                        var newestTopology = replicationDocuments
                                             .Where(x => x.Task.IsCompleted && x.Task.Result != null)
                                             .OrderByDescending(x => x.Task.Result.Term)
                                             .ThenByDescending(x =>
                        {
                            var index = x.Task.Result.ClusterCommitIndex;
                            return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index;
                        })
                                             .FirstOrDefault();


                        if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false)
                        {
                            tryFailoverServers = true;
                        }

                        if (newestTopology == null && triedFailoverServers)
                        {
                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug($"Fetching topology resulted with no topology, tried failoever servers, setting leader node to primary node ({primaryNode}).");
                            }
                            //if the leader Node is not null this means that somebody updated it, we don't want to overwrite it with the primary.
                            // i'm rasing the leader changed event although we don't have a real leader because some tests don't wait for leader but actually any node
                            //Todo: change back to: if (SetLeaderNodeIfLeaderIsNull(primaryNode, false) == false)
                            if (SetLeaderNodeIfLeaderIsNull(primaryNode) == false)
                            {
                                return;
                            }

                            if (Nodes.Count == 0)
                            {
                                Nodes = new List <OperationMetadata>
                                {
                                    primaryNode
                                }
                            }
                            ;
                            return;
                        }

                        if (newestTopology != null)
                        {
                            Nodes = GetNodes(newestTopology.Node, newestTopology.Task.Result);
                            var newLeader = newestTopology.Task.Result.ClusterInformation.IsLeader ?
                                            Nodes.FirstOrDefault(n => n.Url == newestTopology.Node.Url) : null;

                            ReplicationInformerLocalCache.TrySavingClusterNodesToLocalCache(serverHash, Nodes);

                            if (newestTopology.Task.Result.ClientConfiguration != null)
                            {
                                if (newestTopology.Task.Result.ClientConfiguration.FailoverBehavior == null)
                                {
                                    if (Log.IsDebugEnabled)
                                    {
                                        Log.Debug($"Server side failoever configuration is set to let client decide, client decided on {serverClient.convention.FailoverBehavior}. ");
                                    }
                                    newestTopology.Task.Result.ClientConfiguration.FailoverBehavior = serverClient.convention.FailoverBehavior;
                                }
                                else if (Log.IsDebugEnabled)
                                {
                                    Log.Debug($"Server enforced failoever behavior {newestTopology.Task.Result.ClientConfiguration.FailoverBehavior}. ");
                                }
                                serverClient.convention.UpdateFrom(newestTopology.Task.Result.ClientConfiguration);
                            }
                            if (newLeader != null)
                            {
                                SetLeaderNodeToKnownLeader(newLeader);
                                return;
                            }
                            //here we try to set leader node to null but we might fail since it was changed.
                            //We just need to make sure that the leader node is not null and we can stop searching.
                            if (SetLeaderNodeToNullIfPrevIsTheSame(prevLeader) == false && LeaderNode != null)
                            {
                                return;
                            }
                        }

                        Thread.Sleep(500);
                    }
                }).ContinueWith(t =>
                {
                    lastUpdate = SystemTime.UtcNow;
                    refreshReplicationInformationTask = null;
                }));
            }
        }