Exemple #1
0
        private void ReplicateToDestination()
        {
            try
            {
                AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiate);
                NativeMemory.EnsureRegistered();
                if (_log.IsInfoEnabled)
                {
                    _log.Info($"Will replicate to {Destination.FromString()} via {_connectionInfo.Url}");
                }

                using (_parent._server.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                    using (context.OpenReadTransaction())
                    {
                        var record = _parent.LoadDatabaseRecord();
                        if (record == null)
                        {
                            throw new InvalidOperationException($"The database record for {_parent.Database.Name} does not exist?!");
                        }

                        if (record.Encrypted && Destination.Url.StartsWith("https:", StringComparison.OrdinalIgnoreCase) == false)
                        {
                            throw new InvalidOperationException(
                                      $"{record.DatabaseName} is encrypted, and require HTTPS for replication, but had endpoint with url {Destination.Url} to database {Destination.Database}");
                        }
                    }

                var task = TcpUtils.ConnectSocketAsync(_connectionInfo, _parent._server.Engine.TcpConnectionTimeout, _log);
                task.Wait(CancellationToken);
                using (Interlocked.Exchange(ref _tcpClient, task.Result))
                {
                    var wrapSsl = TcpUtils.WrapStreamWithSslAsync(_tcpClient, _connectionInfo, _parent._server.Server.Certificate.Certificate, _parent._server.Engine.TcpConnectionTimeout);
                    wrapSsl.Wait(CancellationToken);

                    using (_stream = wrapSsl.Result) // note that _stream is being disposed by the interruptible read
                        using (_interruptibleRead = new InterruptibleRead(_database.DocumentsStorage.ContextPool, _stream))
                            using (_buffer = JsonOperationContext.ManagedPinnedBuffer.LongLivedInstance())
                            {
                                var documentSender = new ReplicationDocumentSender(_stream, this, _log);

                                WriteHeaderToRemotePeer();
                                //handle initial response to last etag and staff
                                try
                                {
                                    var response = HandleServerResponse(getFullResponse: true);
                                    switch (response.ReplyType)
                                    {
                                    //The first time we start replication we need to register the destination current CV
                                    case ReplicationMessageReply.ReplyType.Ok:
                                        LastAcceptedChangeVector = response.Reply.DatabaseChangeVector;
                                        break;

                                    case ReplicationMessageReply.ReplyType.Error:
                                        var exception = new InvalidOperationException(response.Reply.Exception);
                                        if (response.Reply.Exception.Contains(nameof(DatabaseDoesNotExistException)) ||
                                            response.Reply.Exception.Contains(nameof(DatabaseNotRelevantException)))
                                        {
                                            AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, "Database does not exist");
                                            DatabaseDoesNotExistException.ThrowWithMessageAndException(Destination.Database, response.Reply.Message, exception);
                                        }

                                        AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, $"Got error: {response.Reply.Exception}");
                                        throw exception;
                                    }
                                }
                                catch (DatabaseDoesNotExistException e)
                                {
                                    var msg = $"Failed to parse initial server replication response, because there is no database named {_database.Name} " +
                                              "on the other end. ";
                                    if (_external)
                                    {
                                        msg += "In order for the replication to work, a database with the same name needs to be created at the destination";
                                    }

                                    var young = (DateTime.UtcNow - _startedAt).TotalSeconds < 30;
                                    if (young)
                                    {
                                        msg += "This can happen if the other node wasn't yet notified about being assigned this database and should be resolved shortly.";
                                    }
                                    if (_log.IsInfoEnabled)
                                    {
                                        _log.Info(msg, e);
                                    }

                                    AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, msg);

                                    // won't add an alert on young connections
                                    // because it may take a few seconds for the other side to be notified by
                                    // the cluster that it has this db.
                                    if (young == false)
                                    {
                                        AddAlertOnFailureToReachOtherSide(msg, e);
                                    }

                                    throw;
                                }
                                catch (OperationCanceledException e)
                                {
                                    const string msg = "Got operation canceled notification while opening outgoing replication channel. " +
                                                       "Aborting and closing the channel.";
                                    if (_log.IsInfoEnabled)
                                    {
                                        _log.Info(msg, e);
                                    }
                                    AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, msg);
                                    throw;
                                }
                                catch (Exception e)
                                {
                                    var msg = $"{OutgoingReplicationThreadName} got an unexpected exception during initial handshake";
                                    if (_log.IsInfoEnabled)
                                    {
                                        _log.Info(msg, e);
                                    }

                                    AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, msg);
                                    AddAlertOnFailureToReachOtherSide(msg, e);

                                    throw;
                                }

                                DateTime nextReplicateAt = default(DateTime);

                                while (_cts.IsCancellationRequested == false)
                                {
                                    while (_database.Time.GetUtcNow() > nextReplicateAt)
                                    {
                                        if (_parent.DebugWaitAndRunReplicationOnce != null)
                                        {
                                            _parent.DebugWaitAndRunReplicationOnce.Wait(_cts.Token);
                                            _parent.DebugWaitAndRunReplicationOnce.Reset();
                                        }

                                        var sp    = Stopwatch.StartNew();
                                        var stats = _lastStats = new OutgoingReplicationStatsAggregator(_parent.GetNextReplicationStatsId(), _lastStats);
                                        AddReplicationPerformance(stats);
                                        AddReplicationPulse(ReplicationPulseDirection.OutgoingBegin);

                                        try
                                        {
                                            using (var scope = stats.CreateScope())
                                            {
                                                try
                                                {
                                                    if (Destination is InternalReplication dest)
                                                    {
                                                        _parent.EnsureNotDeleted(dest.NodeTag);
                                                    }
                                                    var didWork = documentSender.ExecuteReplicationOnce(scope, ref nextReplicateAt);
                                                    if (didWork == false)
                                                    {
                                                        break;
                                                    }

                                                    if (Destination is ExternalReplication externalReplication)
                                                    {
                                                        var taskId = externalReplication.TaskId;
                                                        UpdateExternalReplicationInfo(taskId);
                                                    }

                                                    DocumentsSend?.Invoke(this);

                                                    if (sp.ElapsedMilliseconds > 60 * 1000)
                                                    {
                                                        _waitForChanges.Set();
                                                        break;
                                                    }
                                                }
                                                catch (OperationCanceledException)
                                                {
                                                    // cancellation is not an actual error,
                                                    // it is a "notification" that we need to cancel current operation

                                                    const string msg = "Operation was canceled.";
                                                    AddReplicationPulse(ReplicationPulseDirection.OutgoingError, msg);

                                                    throw;
                                                }
                                                catch (Exception e)
                                                {
                                                    AddReplicationPulse(ReplicationPulseDirection.OutgoingError, e.Message);

                                                    scope.AddError(e);
                                                    throw;
                                                }
                                            }
                                        }
                                        finally
                                        {
                                            stats.Complete();
                                            AddReplicationPulse(ReplicationPulseDirection.OutgoingEnd);
                                        }
                                    }

                                    //if this returns false, this means either timeout or canceled token is activated
                                    while (WaitForChanges(_parent.MinimalHeartbeatInterval, _cts.Token) == false)
                                    {
                                        //If we got cancelled we need to break right away
                                        if (_cts.IsCancellationRequested)
                                        {
                                            break;
                                        }

                                        // open tx
                                        // read current change vector compare to last sent
                                        // if okay, send cv
                                        using (_database.DocumentsStorage.ContextPool.AllocateOperationContext(out DocumentsOperationContext ctx))
                                            using (var tx = ctx.OpenReadTransaction())
                                            {
                                                var etag = DocumentsStorage.ReadLastEtag(tx.InnerTransaction);
                                                if (etag == _lastSentDocumentEtag)
                                                {
                                                    SendHeartbeat(DocumentsStorage.GetDatabaseChangeVector(ctx));
                                                    _parent.CompleteDeletionIfNeeded();
                                                }
                                                else if (nextReplicateAt > DateTime.UtcNow)
                                                {
                                                    SendHeartbeat(null);
                                                }
                                                else
                                                {
                                                    //Send a heartbeat first so we will get an updated CV of the destination
                                                    var currentChangeVector = DocumentsStorage.GetDatabaseChangeVector(ctx);
                                                    SendHeartbeat(null);
                                                    //If our previous CV is already merged to the destination wait a bit more
                                                    if (ChangeVectorUtils.GetConflictStatus(LastAcceptedChangeVector, currentChangeVector) ==
                                                        ConflictStatus.AlreadyMerged)
                                                    {
                                                        continue;
                                                    }

                                                    // we have updates that we need to send to the other side
                                                    // let's do that..
                                                    // this can happen if we got replication from another node
                                                    // that we need to send to it. Note that we typically
                                                    // will wait for the other node to send the data directly to
                                                    // our destination, but if it doesn't, we'll step in.
                                                    // In this case, we try to limit congestion in the network and
                                                    // only send updates that we have gotten from someone else after
                                                    // a certain time, to let the other side tell us that it already
                                                    // got it. Note that this is merely an optimization to reduce network
                                                    // traffic. It is fine to have the same data come from different sources.
                                                    break;
                                                }
                                            }
                                    }
                                    _waitForChanges.Reset();
                                }
                            }
                }
            }
            catch (AggregateException e)
            {
                if (e.InnerExceptions.Count == 1)
                {
                    if (e.InnerException is OperationCanceledException oce)
                    {
                        HandleOperationCancelException(oce);
                    }
                    if (e.InnerException is IOException ioe)
                    {
                        HandleIOException(ioe);
                    }
                }

                HandleException(e);
            }
            catch (OperationCanceledException e)
            {
                HandleOperationCancelException(e);
            }
            catch (IOException e)
            {
                HandleIOException(e);
            }
            catch (Exception e)
            {
                HandleException(e);
            }

            void HandleOperationCancelException(OperationCanceledException e)
            {
                if (_log.IsInfoEnabled)
                {
                    _log.Info($"Operation canceled on replication thread ({FromToString}). " +
                              $"This is not necessary due to an issue. Stopped the thread.");
                }
                if (_cts.IsCancellationRequested == false)
                {
                    Failed?.Invoke(this, e);
                }
            }

            void HandleIOException(IOException e)
            {
                if (_log.IsInfoEnabled)
                {
                    if (e.InnerException is SocketException)
                    {
                        _log.Info($"SocketException was thrown from the connection to remote node ({FromToString}). " +
                                  $"This might mean that the remote node is done or there is a network issue.", e);
                    }
                    else
                    {
                        _log.Info($"IOException was thrown from the connection to remote node ({FromToString}).", e);
                    }
                }
                Failed?.Invoke(this, e);
            }

            void HandleException(Exception e)
            {
                if (_log.IsInfoEnabled)
                {
                    _log.Info($"Unexpected exception occurred on replication thread ({FromToString}). " +
                              $"Replication stopped (will be retried later).", e);
                }
                Failed?.Invoke(this, e);
            }
        }
Exemple #2
0
        private void ReplicateToDestination()
        {
            try
            {
                var connectionInfo = GetTcpInfo();

                using (_tcpClient = new TcpClient())
                {
                    ConnectSocket(connectionInfo, _tcpClient);

                    using (_database.DocumentsStorage.ContextPool.AllocateOperationContext(out _documentsContext))
                        using (_database.ConfigurationStorage.ContextPool.AllocateOperationContext(out _configurationContext))
                            using (var stream = _tcpClient.GetStream())
                            {
                                var documentSender            = new ReplicationDocumentSender(stream, this, _log);
                                var indexAndTransformerSender = new ReplicationIndexTransformerSender(stream, this, _log);

                                using (_writer = new BlittableJsonTextWriter(_documentsContext, stream))
                                    using (_parser = _documentsContext.ParseMultiFrom(stream))
                                    {
                                        //send initial connection information
                                        _documentsContext.Write(_writer, new DynamicJsonValue
                                        {
                                            [nameof(TcpConnectionHeaderMessage.DatabaseName)] = _destination.Database,
                                            [nameof(TcpConnectionHeaderMessage.Operation)]    = TcpConnectionHeaderMessage.OperationTypes.Replication.ToString(),
                                        });

                                        //start request/response for fetching last etag
                                        _documentsContext.Write(_writer, new DynamicJsonValue
                                        {
                                            ["Type"]               = "GetLastEtag",
                                            ["SourceDatabaseId"]   = _database.DbId.ToString(),
                                            ["SourceDatabaseName"] = _database.Name,
                                            ["SourceUrl"]          = _database.Configuration.Core.ServerUrl,
                                            ["MachineName"]        = Environment.MachineName,
                                        });
                                        _writer.Flush();

                                        //handle initial response to last etag and staff
                                        try
                                        {
                                            using (_configurationContext.OpenReadTransaction())
                                                using (_documentsContext.OpenReadTransaction())
                                                {
                                                    var response = HandleServerResponse();
                                                    if (response.Item1 == ReplicationMessageReply.ReplyType.Error)
                                                    {
                                                        if (response.Item2.Contains("DatabaseDoesNotExistsException"))
                                                        {
                                                            throw new DatabaseDoesNotExistsException();
                                                        }

                                                        throw new InvalidOperationException(response.Item2);
                                                    }
                                                }
                                        }
                                        catch (DatabaseDoesNotExistsException e)
                                        {
                                            var msg = $"Failed to parse initial server replication response, because there is no database named {_database.Name} on the other end. " +
                                                      "In order for the replication to work, a database with the same name needs to be created at the destination";
                                            if (_log.IsInfoEnabled)
                                            {
                                                _log.Info(msg, e);
                                            }

                                            using (var txw = _configurationContext.OpenWriteTransaction())
                                            {
                                                _database.Alerts.AddAlert(new Alert
                                                {
                                                    Key       = FromToString,
                                                    Type      = AlertType.Replication,
                                                    Message   = msg,
                                                    CreatedAt = DateTime.UtcNow,
                                                    Severity  = AlertSeverity.Warning
                                                }, _configurationContext, txw);
                                                txw.Commit();
                                            }

                                            throw;
                                        }
                                        catch (Exception e)
                                        {
                                            var msg =
                                                $"Failed to parse initial server response. This is definitely not supposed to happen. Exception thrown: {e}";
                                            if (_log.IsInfoEnabled)
                                            {
                                                _log.Info(msg, e);
                                            }

                                            using (var txw = _configurationContext.OpenWriteTransaction())
                                            {
                                                _database.Alerts.AddAlert(new Alert
                                                {
                                                    Key       = FromToString,
                                                    Type      = AlertType.Replication,
                                                    Message   = msg,
                                                    CreatedAt = DateTime.UtcNow,
                                                    Severity  = AlertSeverity.Error
                                                }, _configurationContext, txw);
                                                txw.Commit();
                                            }

                                            throw;
                                        }

                                        while (_cts.IsCancellationRequested == false)
                                        {
                                            _documentsContext.ResetAndRenew();
                                            long currentEtag;

                                            Debug.Assert(_database.IndexMetadataPersistence.IsInitialized);

                                            using (_configurationContext.OpenReadTransaction())
                                                currentEtag = _database.IndexMetadataPersistence.ReadLastEtag(_configurationContext.Transaction.InnerTransaction);


                                            if (_destination.SkipIndexReplication == false &&
                                                currentEtag != indexAndTransformerSender.LastEtag)
                                            {
                                                indexAndTransformerSender.ExecuteReplicationOnce();
                                            }

                                            var sp = Stopwatch.StartNew();
                                            while (documentSender.ExecuteReplicationOnce())
                                            {
                                                if (sp.ElapsedMilliseconds > 60 * 1000)
                                                {
                                                    _waitForChanges.Set();
                                                    break;
                                                }
                                            }

                                            //if this returns false, this means either timeout or canceled token is activated
                                            while (WaitForChanges(_minimalHeartbeatInterval, _cts.Token) == false)
                                            {
                                                _configurationContext.ResetAndRenew();
                                                _documentsContext.ResetAndRenew();
                                                using (_documentsContext.OpenReadTransaction())
                                                    using (_configurationContext.OpenReadTransaction())
                                                    {
                                                        SendHeartbeat();
                                                    }
                                            }
                                            _waitForChanges.Reset();
                                        }
                                    }
                            }
                }
            }
            catch (OperationCanceledException)
            {
                if (_log.IsInfoEnabled)
                {
                    _log.Info($"Operation canceled on replication thread ({FromToString}). Stopped the thread.");
                }
            }
            catch (IOException e)
            {
                if (_log.IsInfoEnabled)
                {
                    if (e.InnerException is SocketException)
                    {
                        _log.Info(
                            $"SocketException was thrown from the connection to remote node ({FromToString}). This might mean that the remote node is done or there is a network issue.",
                            e);
                    }
                    else
                    {
                        _log.Info($"IOException was thrown from the connection to remote node ({FromToString}).", e);
                    }
                }
            }
            catch (Exception e)
            {
                if (_log.IsInfoEnabled)
                {
                    _log.Info($"Unexpected exception occured on replication thread ({FromToString}). Replication stopped (will be retried later).", e);
                }
                Failed?.Invoke(this, e);
            }
        }