コード例 #1
0
        private async Task<AsyncOperationResult<T>> TryClusterOperationAsync<T>(OperationMetadata node, Func<OperationMetadata, Task<T>> operation, bool avoidThrowing, CancellationToken token)
        {
            Debug.Assert(node != null);

            token.ThrowIfCancellationRequested();
            var shouldRetry = false;

            var operationResult = new AsyncOperationResult<T>();
            try
            {
                operationResult.Result = await operation(node).ConfigureAwait(false);
                operationResult.Success = true;
            }
            catch (Exception e)
            {
                bool wasTimeout;
                if (HttpConnectionHelper.IsServerDown(e, out wasTimeout))
                {
                    shouldRetry = true;
                    operationResult.WasTimeout = wasTimeout;
                }
                else
                {
                    var ae = e as AggregateException;
                    ErrorResponseException errorResponseException;
                    if (ae != null)
                        errorResponseException = ae.ExtractSingleInnerException() as ErrorResponseException;
                    else
                        errorResponseException = e as ErrorResponseException;

                    if (errorResponseException != null && (errorResponseException.StatusCode == HttpStatusCode.Redirect || errorResponseException.StatusCode == HttpStatusCode.ExpectationFailed))
                        shouldRetry = true;
                }

                if (shouldRetry == false && avoidThrowing == false)
                    throw;
            }

            if (operationResult.Success)
                FailureCounters.ResetFailureCount(node.Url);

            return operationResult;
        }
コード例 #2
0
        private Task UpdateReplicationInformationForCluster(AsyncServerClient serverClient, OperationMetadata primaryNode, Func <OperationMetadata, Task <ReplicationDocumentWithClusterInformation> > getReplicationDestinationsTask)
        {
            lock (this)
            {
                var serverHash = ServerHash.GetServerHash(primaryNode.Url);

                var taskCopy = refreshReplicationInformationTask;
                if (taskCopy != null)
                {
                    return(taskCopy);
                }

                if (firstTime)
                {
                    firstTime = false;
                    var document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash);
                    var nodes    = GetNodes(primaryNode, document?.DataAsJson.JsonDeserialization <ReplicationDocumentWithClusterInformation>());
                    if (nodes != null)
                    {
                        Nodes = nodes;
                        var newLeaderNode = GetLeaderNode(Nodes);
                        if (newLeaderNode != null)
                        {
                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug($"Fetched topology from cache, Leader is {LeaderNode}\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url)));
                            }
                            SetLeaderNodeToKnownLeader(newLeaderNode);
                            return(new CompletedTask());
                        }
                        if (Log.IsDebugEnabled)
                        {
                            Log.Debug($"Fetched topology from cache, no leader found.\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url)));
                        }
                        SetLeaderNodeToNull();
                    }
                }

                return(refreshReplicationInformationTask = Task.Factory.StartNew(() =>
                {
                    var tryFailoverServers = false;
                    var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0;
                    for (;;)
                    {
                        //taking a snapshot so we could tell if the value changed while we fetch the topology
                        var prevLeader = LeaderNode;
                        var nodes = NodeUrls.ToHashSet();

                        if (tryFailoverServers == false)
                        {
                            if (nodes.Count == 0)
                            {
                                nodes.Add(primaryNode);
                            }
                        }
                        else
                        {
                            nodes.Add(primaryNode); // always check primary node during failover check

                            foreach (var failoverServer in FailoverServers)
                            {
                                var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster);
                                if (node != null)
                                {
                                    nodes.Add(node);
                                }
                            }

                            triedFailoverServers = true;
                        }

                        var replicationDocuments = nodes
                                                   .Select(operationMetadata => new
                        {
                            Node = operationMetadata,
                            Task = getReplicationDestinationsTask(operationMetadata)
                        })
                                                   .ToArray();

                        var tasks = replicationDocuments
                                    .Select(x => (Task)x.Task)
                                    .ToArray();

                        var tasksCompleted = Task.WaitAll(tasks, ReplicationDestinationsTopologyTimeout);
                        if (Log.IsDebugEnabled && tasksCompleted == false)
                        {
                            Log.Debug($"During fetch topology {tasks.Count(t=>t.IsCompleted)} servers have responded out of {tasks.Length}");
                        }
                        replicationDocuments.ForEach(x =>
                        {
                            if (x.Task.IsCompleted && x.Task.Result != null)
                            {
                                FailureCounters.ResetFailureCount(x.Node.Url);
                            }
                        });

                        var newestTopology = replicationDocuments
                                             .Where(x => x.Task.IsCompleted && x.Task.Result != null)
                                             .OrderByDescending(x => x.Task.Result.Term)
                                             .ThenByDescending(x =>
                        {
                            var index = x.Task.Result.ClusterCommitIndex;
                            return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index;
                        })
                                             .FirstOrDefault();


                        if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false)
                        {
                            tryFailoverServers = true;
                        }

                        if (newestTopology == null && triedFailoverServers)
                        {
                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug($"Fetching topology resulted with no topology, tried failoever servers, setting leader node to primary node ({primaryNode}).");
                            }
                            //if the leader Node is not null this means that somebody updated it, we don't want to overwrite it with the primary.
                            // i'm rasing the leader changed event although we don't have a real leader because some tests don't wait for leader but actually any node
                            //Todo: change back to: if (SetLeaderNodeIfLeaderIsNull(primaryNode, false) == false)
                            if (SetLeaderNodeIfLeaderIsNull(primaryNode) == false)
                            {
                                return;
                            }

                            if (Nodes.Count == 0)
                            {
                                Nodes = new List <OperationMetadata>
                                {
                                    primaryNode
                                }
                            }
                            ;
                            return;
                        }

                        if (newestTopology != null)
                        {
                            var replicationDocument = newestTopology.Task.Result;
                            var node = newestTopology.Node;
                            if (UpdateTopology(serverClient, node, replicationDocument, serverHash, prevLeader))
                            {
                                return;
                            }
                        }

                        Thread.Sleep(500);
                    }
                }).ContinueWith(t =>
                {
                    lastUpdate = SystemTime.UtcNow;
                    refreshReplicationInformationTask = null;
                }));
            }
        }
コード例 #3
0
        private async Task <AsyncOperationResult <T> > TryClusterOperationAsync <T>(OperationMetadata node, Func <OperationMetadata, IRequestTimeMetric, Task <T> > operation, bool avoidThrowing, CancellationToken token)
        {
            Debug.Assert(node != null);

            token.ThrowIfCancellationRequested();
            var shouldRetry = false;

            var operationResult = new AsyncOperationResult <T>();

            try
            {
                operationResult.Result = await operation(node, null).ConfigureAwait(false);

                operationResult.Success = true;
            }
            catch (Exception e)
            {
                bool wasTimeout;
                if (HttpConnectionHelper.IsServerDown(e, out wasTimeout))
                {
                    shouldRetry = true;
                    operationResult.WasTimeout = wasTimeout;
                    if (Log.IsDebugEnabled)
                    {
                        Log.Debug($"Operation failed because server {node.Url} is down.");
                    }
                }
                else
                {
                    var ae = e as AggregateException;
                    ErrorResponseException errorResponseException;
                    if (ae != null)
                    {
                        errorResponseException = ae.ExtractSingleInnerException() as ErrorResponseException;
                    }
                    else
                    {
                        errorResponseException = e as ErrorResponseException;
                    }

                    if (errorResponseException != null)
                    {
                        if (errorResponseException.StatusCode == HttpStatusCode.Redirect)
                        {
                            IEnumerable <string> values;
                            if (errorResponseException.Response.Headers.TryGetValues("Raven-Leader-Redirect", out values) == false &&
                                values.Contains("true") == false)
                            {
                                throw new InvalidOperationException("Got 302 Redirect, but without Raven-Leader-Redirect: true header, maybe there is a proxy in the middle", e);
                            }
                            var redirectUrl   = errorResponseException.Response.Headers.Location.ToString();
                            var newLeaderNode = Nodes.FirstOrDefault(n => n.Url.Equals(redirectUrl)) ?? new OperationMetadata(redirectUrl, node.Credentials, node.ClusterInformation);
                            SetLeaderNodeToKnownLeader(newLeaderNode);
                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug($"Redirecting to {redirectUrl} because {node.Url} responded with 302-redirect.");
                            }
                            return(await TryClusterOperationAsync(newLeaderNode, operation, avoidThrowing, token).ConfigureAwait(false));
                        }

                        if (errorResponseException.StatusCode == HttpStatusCode.ExpectationFailed)
                        {
                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug($"Operation failed with status code {HttpStatusCode.ExpectationFailed}, will retry.");
                            }
                            shouldRetry = true;
                        }
                    }
                }

                if (shouldRetry == false && avoidThrowing == false)
                {
                    throw;
                }

                operationResult.Error = e;
            }

            if (operationResult.Success)
            {
                FailureCounters.ResetFailureCount(node.Url);
            }

            return(operationResult);
        }
コード例 #4
0
        private Task UpdateReplicationInformationForCluster(AsyncServerClient serverClient, OperationMetadata primaryNode, Func <OperationMetadata, Task <ReplicationDocumentWithClusterInformation> > getReplicationDestinationsTask)
        {
            lock (this)
            {
                var serverHash = ServerHash.GetServerHash(primaryNode.Url);

                var taskCopy = refreshReplicationInformationTask;
                if (taskCopy != null)
                {
                    return(taskCopy);
                }

                if (firstTime)
                {
                    firstTime = false;
                    var document = ReplicationInformerLocalCache.TryLoadReplicationInformationFromLocalCache(serverHash);
                    var nodes    = GetNodes(primaryNode, document?.DataAsJson.JsonDeserialization <ReplicationDocumentWithClusterInformation>());
                    if (nodes != null)
                    {
                        Nodes = nodes;
                        var newLeaderNode = GetLeaderNode(Nodes);
                        if (newLeaderNode != null)
                        {
                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug($"Fetched topology from cache, Leader is {LeaderNode}\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url)));
                            }
                            SetLeaderNodeToKnownLeader(newLeaderNode);
                            return(new CompletedTask());
                        }
                        if (Log.IsDebugEnabled)
                        {
                            Log.Debug($"Fetched topology from cache, no leader found.\n Nodes:" + string.Join(",", Nodes.Select(n => n.Url)));
                        }
                        SetLeaderNodeToNull();
                    }
                }

                return(refreshReplicationInformationTask = Task.Factory.StartNew(async() =>
                {
                    var tryFailoverServers = false;
                    var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0;
                    for (; ;)
                    {
                        //taking a snapshot so we could tell if the value changed while we fetch the topology
                        var prevLeader = LeaderNode;
                        var nodes = NodeUrls.ToHashSet();

                        if (tryFailoverServers == false)
                        {
                            if (nodes.Count == 0)
                            {
                                nodes.Add(primaryNode);
                            }
                        }
                        else
                        {
                            nodes.Add(primaryNode); // always check primary node during failover check

                            foreach (var failoverServer in FailoverServers)
                            {
                                var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster);
                                if (node != null)
                                {
                                    nodes.Add(node);
                                }
                            }

                            triedFailoverServers = true;
                        }

                        var replicationDocuments = nodes
                                                   .Select(operationMetadata => new
                        {
                            Node = operationMetadata,
                            Task = getReplicationDestinationsTask(operationMetadata),
                        })
                                                   .ToArray();

                        var tasks = replicationDocuments
                                    .Select(x => (Task)x.Task)
                                    .ToArray();

                        var tasksCompleted = Task.WaitAll(tasks, ReplicationDestinationsTopologyTimeout);
                        if (Log.IsDebugEnabled && tasksCompleted == false)
                        {
                            Log.Debug($"During fetch topology {tasks.Count(t => t.IsCompleted)} servers have responded out of {tasks.Length}");
                        }
                        replicationDocuments.ForEach(x =>
                        {
                            if (x.Task.IsCompleted && x.Task.Result != null)
                            {
                                FailureCounters.ResetFailureCount(x.Node.Url);
                            }
                        });

                        var newestTopologies = replicationDocuments
                                               .Where(x => x.Task.IsCompleted && x.Task.Result != null)
                                               .OrderByDescending(x => x.Task.Result.Term)
                                               .ThenByDescending(x =>
                        {
                            var index = x.Task.Result.ClusterCommitIndex;
                            return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index;
                        }).ToList();

                        var newestTopology = newestTopologies.FirstOrDefault();

                        var hasLeaderCount = replicationDocuments
                                             .Count(x => x.Task.IsCompleted && x.Task.Result != null && x.Task.Result.HasLeader);

                        if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false)
                        {
                            tryFailoverServers = true;
                        }

                        if (newestTopology == null && triedFailoverServers)
                        {
                            if (Log.IsDebugEnabled)
                            {
                                Log.Debug($"Fetching topology resulted with no topology, tried failoever servers, setting leader node to primary node ({primaryNode}).");
                            }
                            //if the leader Node is not null this means that somebody updated it, we don't want to overwrite it with the primary.
                            // i'm raising the leader changed event although we don't have a real leader because some tests don't wait for leader but actually any node
                            //Todo: change back to: if (SetLeaderNodeIfLeaderIsNull(primaryNode, false) == false)
                            if (SetLeaderNodeIfLeaderIsNull(primaryNode) == false)
                            {
                                return;
                            }

                            if (Nodes.Count == 0)
                            {
                                Nodes = new List <OperationMetadata>
                                {
                                    primaryNode
                                }
                            }
                            ;
                            return;
                        }
                        if (Log.IsDebugEnabled)
                        {
                            foreach (var x in replicationDocuments)
                            {
                                Log.Debug($"Topology fetched from {x.Node.Url}");
                                Log.Debug($"{JsonConvert.SerializeObject(x.Task?.Result)}");
                            }
                        }
                        var majorityOfNodesAgreeThereIsLeader = Nodes.Count == 1 || hasLeaderCount > (newestTopology?.Task.Result.Destinations.Count + 1) / 2;
                        if (newestTopology != null && majorityOfNodesAgreeThereIsLeader)
                        {
                            var replicationDocument = newestTopology.Task.Result;
                            var node = newestTopology.Node;

                            if (newestTopologies.Count > 1 && node.Url.Equals(serverClient.Url) == false)
                            {
                                // we got the replication document not from the primary url
                                // need to add the node url destination to the destinations
                                // (we know it exists since we have majority of nodes that agree on the leader)
                                // and remove the primary url destination from the destinations

                                var sourceNode = node;
                                var destination = replicationDocument.Destinations
                                                  .FirstOrDefault(x => DestinationUrl(x.Url, x.Database).Equals(serverClient.Url, StringComparison.OrdinalIgnoreCase));
                                if (destination != null)
                                {
                                    replicationDocument.Destinations.Remove(destination);
                                    // we need to update the cluster information of the primary url for this node
                                    replicationDocument.ClusterInformation = destination.ClusterInformation;
                                    node = ConvertReplicationDestinationToOperationMetadata(destination, destination.ClusterInformation);
                                }

                                destination = destination ?? replicationDocument.Destinations.FirstOrDefault();
                                if (destination != null)
                                {
                                    var database = destination.Database;
                                    var networkCredentials = sourceNode.Credentials?.Credentials as NetworkCredential;
                                    replicationDocument.Destinations.Add(new ReplicationDestination.ReplicationDestinationWithClusterInformation
                                    {
                                        Url = sourceNode.Url,
                                        Database = database,
                                        ApiKey = sourceNode.Credentials?.ApiKey,
                                        Username = networkCredentials?.UserName,
                                        Password = networkCredentials?.Password,
                                        Domain = networkCredentials?.Domain,
                                        ClusterInformation = sourceNode.ClusterInformation
                                    });
                                }
                            }

                            if (UpdateTopology(serverClient, node, replicationDocument, serverHash, prevLeader))
                            {
                                return;
                            }
                        }
                        await Task.Delay(3000).ConfigureAwait(false);
                    }
                }).ContinueWith(t =>
                {
                    lastUpdate = SystemTime.UtcNow;
                    refreshReplicationInformationTask = null;
                }));
            }
        }
コード例 #5
0
        private Task UpdateReplicationInformationForCluster(OperationMetadata primaryNode, Func<OperationMetadata, Task<ReplicationDocumentWithClusterInformation>> getReplicationDestinationsTask)
        {
            lock (this)
            {
                var serverHash = ServerHash.GetServerHash(primaryNode.Url);

                var taskCopy = refreshReplicationInformationTask;
                if (taskCopy != null)
                    return taskCopy;

                if (firstTime)
                {
                    firstTime = false;

                    var nodes = ReplicationInformerLocalCache.TryLoadClusterNodesFromLocalCache(serverHash);
                    if (nodes != null)
                    {
                        Nodes = nodes;
                        LeaderNode = GetLeaderNode(Nodes);

                        if (LeaderNode != null)
                            return new CompletedTask();
                    }
                }

                return refreshReplicationInformationTask = Task.Factory.StartNew(() =>
                {
                    var tryFailoverServers = false;
                    var triedFailoverServers = FailoverServers == null || FailoverServers.Length == 0;
                    for (; ; )
                    {
                        var nodes = NodeUrls.ToHashSet();

                        if (tryFailoverServers == false)
                        {
                            if (nodes.Count == 0)
                                nodes.Add(primaryNode);
                        }
                        else
                        {
                            nodes.Add(primaryNode); // always check primary node during failover check

                            foreach (var failoverServer in FailoverServers)
                            {
                                var node = ConvertReplicationDestinationToOperationMetadata(failoverServer, ClusterInformation.NotInCluster);
                                if (node != null)
                                    nodes.Add(node);
                            }

                            triedFailoverServers = true;
                        }

                        var replicationDocuments = nodes
                            .Select(operationMetadata => new
                            {
                                Node = operationMetadata,
                                Task = getReplicationDestinationsTask(operationMetadata)
                            })
                            .ToArray();

                        var tasks = replicationDocuments
                            .Select(x => x.Task)
                            .ToArray();

                        Task.WaitAll(tasks);

                        replicationDocuments.ForEach(x =>
                        {
                            if (x.Task.Result == null)
                                return;

                            FailureCounters.ResetFailureCount(x.Node.Url);
                        });

                        var newestTopology = replicationDocuments
                            .Where(x => x.Task.Result != null)
                            .OrderByDescending(x => x.Task.Result.Term)
                            .ThenByDescending(x =>
                             {
                                var index = x.Task.Result.ClusterCommitIndex;
                                return x.Task.Result.ClusterInformation.IsLeader ? index + 1 : index;
                             })
                            .FirstOrDefault();

                        if (newestTopology == null && FailoverServers != null && FailoverServers.Length > 0 && tryFailoverServers == false)
                            tryFailoverServers = true;

                        if (newestTopology == null && triedFailoverServers)
                        {
                            LeaderNode = primaryNode;
                            Nodes = new List<OperationMetadata>
                            {
                                primaryNode
                            };
                            return;
                        }

                        if (newestTopology != null)
                        {
                            Nodes = GetNodes(newestTopology.Node, newestTopology.Task.Result);
                            LeaderNode = newestTopology.Task.Result.ClusterInformation.IsLeader ?
                                Nodes.FirstOrDefault(n => n.Url == newestTopology.Node.Url) : null;

                            ReplicationInformerLocalCache.TrySavingClusterNodesToLocalCache(serverHash, Nodes);

                            if (LeaderNode != null)
                                return;
                        }

                        Thread.Sleep(500);
                    }
                }).ContinueWith(t =>
                {
                    lastUpdate = SystemTime.UtcNow;
                    refreshReplicationInformationTask = null;
                });
            }
        }