예제 #1
0
        private void SetLastDbGoodTime(ClusterNodeStatusReport lastSuccessfulReport, string dbName)
        {
            DateTime lastGood = DateTime.MinValue;

            lastSuccessfulReport?.LastGoodDatabaseStatus.TryGetValue(dbName, out lastGood);
            LastGoodDatabaseStatus[dbName] = lastGood;
        }
        public ClusterNodeStatusReport(
            Dictionary <string, DatabaseStatusReport> report,
            ReportStatus reportStatus,
            Exception error,
            DateTime updateDateTime,
            ClusterNodeStatusReport lastSuccessfulReport)
        {
            Report         = report;
            Status         = reportStatus;
            Error          = error;
            UpdateDateTime = updateDateTime;

            LastSuccessfulUpdateDateTime = lastSuccessfulReport?.UpdateDateTime ?? DateTime.MinValue;

            LastGoodDatabaseStatus = new Dictionary <string, DateTime>();
            foreach (var dbReport in report)
            {
                var dbName   = dbReport.Key;
                var dbStatus = dbReport.Value.Status;

                if (reportStatus == ReportStatus.Ok &&
                    (dbStatus == DatabaseStatus.Loaded || dbStatus == DatabaseStatus.NoChange))
                {
                    LastGoodDatabaseStatus[dbName] = updateDateTime;
                }
                else
                {
                    SetLastDbGoodTime(lastSuccessfulReport, dbName);
                }
            }
        }
            private void OnTimeout()
            {
                if (_token.IsCancellationRequested)
                {
                    return;
                }

                // expected timeout
                if (_log.IsInfoEnabled)
                {
                    _log.Info("Timeout occurred while collecting info report.");
                }

                ReceivedReport = new ClusterNodeStatusReport(new Dictionary <string, DatabaseStatusReport>(),
                                                             ClusterNodeStatusReport.ReportStatus.Timeout,
                                                             null,
                                                             DateTime.UtcNow,
                                                             _lastSuccessfulReceivedReport);
            }
예제 #4
0
        public ClusterNodeStatusReport(
            ServerReport serverReport,
            Dictionary <string, DatabaseStatusReport> report,
            ReportStatus reportStatus,
            Exception error,
            DateTime updateDateTime,
            ClusterNodeStatusReport lastSuccessfulReport)
        {
            ServerReport   = serverReport;
            Report         = report;
            Status         = reportStatus;
            Error          = error;
            UpdateDateTime = updateDateTime;

            if (ServerReport.OutOfCpuCredits ?? ServerReport.EarlyOutOfMemory ?? ServerReport.HighDirtyMemory ?? false)
            {
                // we don't want to give any grace time if the node is out of credits, early out of memory or high dirty memory
                LastSuccessfulUpdateDateTime = DateTime.MinValue;
            }
            else
            {
                LastSuccessfulUpdateDateTime = lastSuccessfulReport?.UpdateDateTime ?? DateTime.MinValue;
            }

            LastGoodDatabaseStatus = new Dictionary <string, DateTime>();
            foreach (var dbReport in report)
            {
                var dbName   = dbReport.Key;
                var dbStatus = dbReport.Value.Status;

                if (reportStatus == ReportStatus.Ok &&
                    (dbStatus == DatabaseStatus.Loaded || dbStatus == DatabaseStatus.NoChange))
                {
                    LastGoodDatabaseStatus[dbName] = updateDateTime;
                }
                else
                {
                    SetLastDbGoodTime(lastSuccessfulReport, dbName);
                }
            }
        }
예제 #5
0
            private void UpdateNodeReportIfNeeded(ClusterNodeStatusReport nodeReport, List <DatabaseStatusReport> unchangedReports)
            {
                foreach (var dbReport in nodeReport.Report)
                {
                    if (dbReport.Value.Status == DatabaseStatus.NoChange)
                    {
                        _parent.ForTestingPurposes?.NoChangeFoundAction(this);

                        unchangedReports.Add(dbReport.Value);
                    }
                }

                if (unchangedReports.Count == 0)
                {
                    return;
                }

                // we take the last received and not the last successful.
                // we don't want to reuse by mistake a successful report when we receive an 'unchanged' error.
                var lastReport = ReceivedReport;

                if (lastReport.Status != ClusterNodeStatusReport.ReportStatus.Ok)
                {
                    throw new InvalidOperationException(
                              $"We have databases with '{DatabaseStatus.NoChange}' status, but our last report from this node is '{lastReport.Status}'");
                }

                foreach (var dbReport in unchangedReports)
                {
                    var dbName = dbReport.Name;
                    if (lastReport.Report.TryGetValue(dbName, out var previous) == false)
                    {
                        throw new InvalidOperationException(
                                  $"We got '{DatabaseStatus.NoChange}' for the database '{dbReport}', but it is missing in the last good report");
                    }

                    previous.LastSentEtag     = dbReport.LastSentEtag;
                    previous.UpTime           = dbReport.UpTime;
                    nodeReport.Report[dbName] = previous;
                }
            }
예제 #6
0
            private void UpdateNodeReportIfNeeded(ClusterNodeStatusReport nodeReport, List <DatabaseStatusReport> unchangedReports)
            {
                // we take the last received and not the last successful.
                // we don't want to reuse by miskate a successful report when we recieve an 'unchanged' error.
                var lastReport = ReceivedReport;

                if (lastReport.Status != ClusterNodeStatusReport.ReportStatus.Ok)
                {
                    return;
                }

                foreach (var dbReport in nodeReport.Report)
                {
                    if (dbReport.Value.Status == DatabaseStatus.NoChange)
                    {
                        unchangedReports.Add(dbReport.Value);
                    }
                }

                if (unchangedReports.Count == 0)
                {
                    return;
                }

                foreach (var dbReport in unchangedReports)
                {
                    var dbName = dbReport.Name;
                    if (lastReport.Report.TryGetValue(dbName, out var previous) == false)
                    {
                        // new db, shouldn't really be the case, but not much we can do, we'll
                        // show it to the user as is
                        continue;
                    }
                    previous.LastSentEtag     = dbReport.LastSentEtag;
                    previous.UpTime           = dbReport.UpTime;
                    nodeReport.Report[dbName] = previous;
                }
            }
예제 #7
0
            private async Task ListenToMaintenanceWorker()
            {
                bool needToWait               = false;
                var  onErrorDelayTime         = _parent.Config.OnErrorDelayTime.AsTimeSpan;
                var  receiveFromWorkerTimeout = _parent.Config.ReceiveFromWorkerTimeout.AsTimeSpan;

                TcpConnectionInfo tcpConnection = null;

                try
                {
                    tcpConnection = await ReplicationUtils.GetTcpInfoAsync(Url, null, "Supervisor",
                                                                           _parent._server.RavenServer.ClusterCertificateHolder?.Certificate);
                }
                catch (Exception e)
                {
                    if (_log.IsInfoEnabled)
                    {
                        _log.Info($"ClusterMaintenanceSupervisor() => Failed to add to cluster node key = {ClusterTag}", e);
                    }
                }
                while (_token.IsCancellationRequested == false)
                {
                    var internalTaskCancellationToken = CancellationTokenSource.CreateLinkedTokenSource(_token);
                    try
                    {
                        if (needToWait)
                        {
                            needToWait = false; // avoid tight loop if there was timeout / error
                            await TimeoutManager.WaitFor(onErrorDelayTime, _token);

                            tcpConnection = await ReplicationUtils.GetTcpInfoAsync(Url, null, "Supervisor",
                                                                                   _parent._server.RavenServer.ClusterCertificateHolder.Certificate);
                        }

                        if (tcpConnection == null)
                        {
                            needToWait = true;
                            continue;
                        }
                        var(tcpClient, connection) = await ConnectToClientNodeAsync(tcpConnection, _parent._server.Engine.TcpConnectionTimeout);

                        using (tcpClient)
                            using (_cts.Token.Register(tcpClient.Dispose))
                                using (connection)
                                {
                                    while (_token.IsCancellationRequested == false)
                                    {
                                        using (_contextPool.AllocateOperationContext(out JsonOperationContext context))
                                        {
                                            var readResponseTask = context.ReadForMemoryAsync(connection, _readStatusUpdateDebugString, internalTaskCancellationToken.Token);
                                            var timeout          = TimeoutManager.WaitFor(receiveFromWorkerTimeout, _token);

                                            if (await Task.WhenAny(readResponseTask.AsTask(), timeout) == timeout)
                                            {
                                                if (_log.IsInfoEnabled)
                                                {
                                                    _log.Info($"Timeout occurred while collecting info from {ClusterTag}");
                                                }
                                                ReceivedReport = new ClusterNodeStatusReport(new Dictionary <string, DatabaseStatusReport>(),
                                                                                             ClusterNodeStatusReport.ReportStatus.Timeout,
                                                                                             null,
                                                                                             DateTime.UtcNow,
                                                                                             _lastSuccessfulReceivedReport);
                                                needToWait = true;
                                                internalTaskCancellationToken.Cancel();
                                                break;
                                            }

                                            using (var statusUpdateJson = await readResponseTask)
                                            {
                                                var report = new Dictionary <string, DatabaseStatusReport>();
                                                foreach (var property in statusUpdateJson.GetPropertyNames())
                                                {
                                                    var value = (BlittableJsonReaderObject)statusUpdateJson[property];
                                                    report.Add(property, JsonDeserializationServer.DatabaseStatusReport(value));
                                                }

                                                ReceivedReport = new ClusterNodeStatusReport(
                                                    report,
                                                    ClusterNodeStatusReport.ReportStatus.Ok,
                                                    null,
                                                    DateTime.UtcNow,
                                                    _lastSuccessfulReceivedReport);
                                                _lastSuccessfulReceivedReport = ReceivedReport;
                                            }
                                        }
                                    }
                                }
                    }
                    catch (Exception e)
                    {
                        if (_log.IsInfoEnabled)
                        {
                            _log.Info($"Exception was thrown while collecting info from {ClusterTag}", e);
                        }
                        ReceivedReport = new ClusterNodeStatusReport(new Dictionary <string, DatabaseStatusReport>(),
                                                                     ClusterNodeStatusReport.ReportStatus.Error,
                                                                     e,
                                                                     DateTime.UtcNow,
                                                                     _lastSuccessfulReceivedReport);
                        needToWait = true;
                    }
                    finally
                    {
                        internalTaskCancellationToken.Dispose();
                    }
                }
            }
            private void ListenToMaintenanceWorker()
            {
                var needToWait               = false;
                var firstIteration           = true;
                var onErrorDelayTime         = _parent.Config.OnErrorDelayTime.AsTimeSpan;
                var receiveFromWorkerTimeout = _parent.Config.ReceiveFromWorkerTimeout.AsTimeSpan;
                var tcpTimeout               = _parent.Config.TcpConnectionTimeout.AsTimeSpan;

                if (tcpTimeout < receiveFromWorkerTimeout)
                {
                    if (_log.IsInfoEnabled)
                    {
                        _log.Info(
                            $"Warning: TCP timeout is lower than the receive from worker timeout ({tcpTimeout} < {receiveFromWorkerTimeout}), " +
                            "this could affect the cluster observer's decisions.");
                    }
                }

                TcpConnectionInfo tcpConnection = null;

                while (_token.IsCancellationRequested == false)
                {
                    try
                    {
                        if (needToWait)
                        {
                            needToWait = false; // avoid tight loop if there was timeout / error
                            if (firstIteration == false)
                            {
                                _token.WaitHandle.WaitOne(onErrorDelayTime);
                            }
                            firstIteration = false;
                            using (var timeout = new CancellationTokenSource(tcpTimeout))
                                using (var combined = CancellationTokenSource.CreateLinkedTokenSource(_token, timeout.Token))
                                {
                                    tcpConnection = ReplicationUtils.GetTcpInfo(Url, null, "Supervisor", _parent._server.Server.Certificate.Certificate, combined.Token);
                                }
                        }

                        if (tcpConnection == null)
                        {
                            needToWait = true;
                            continue;
                        }

                        var connection = ConnectToClientNode(tcpConnection, _parent._server.Engine.TcpConnectionTimeout);
                        var tcpClient  = connection.TcpClient;
                        var stream     = connection.Stream;
                        using (tcpClient)
                            using (_cts.Token.Register(tcpClient.Dispose))
                                using (_contextPool.AllocateOperationContext(out JsonOperationContext context))
                                    using (var timeoutEvent = new TimeoutEvent(receiveFromWorkerTimeout, $"Timeout event for: {_name}", singleShot: false))
                                    {
                                        timeoutEvent.Start(OnTimeout);
                                        while (_token.IsCancellationRequested == false)
                                        {
                                            BlittableJsonReaderObject rawReport;
                                            try
                                            {
                                                // even if there is a timeout event, we will keep waiting on the same connection until the TCP timeout occurs.
                                                rawReport = context.ReadForMemory(stream, _readStatusUpdateDebugString);
                                                timeoutEvent.Defer(_parent._leaderClusterTag);
                                            }
                                            catch (Exception e)
                                            {
                                                if (_token.IsCancellationRequested)
                                                {
                                                    return;
                                                }

                                                if (_log.IsInfoEnabled)
                                                {
                                                    _log.Info("Exception occurred while reading the report from the connection", e);
                                                }

                                                ReceivedReport = new ClusterNodeStatusReport(new Dictionary <string, DatabaseStatusReport>(),
                                                                                             ClusterNodeStatusReport.ReportStatus.Error,
                                                                                             e,
                                                                                             DateTime.UtcNow,
                                                                                             _lastSuccessfulReceivedReport);

                                                needToWait = true;
                                                break;
                                            }

                                            var report = BuildReport(rawReport);
                                            timeoutEvent.Defer(_parent._leaderClusterTag);

                                            ReceivedReport = _lastSuccessfulReceivedReport = report;
                                        }
                                    }
                    }
                    catch (Exception e)
                    {
                        if (_log.IsInfoEnabled)
                        {
                            _log.Info($"Exception was thrown while collecting info from {ClusterTag}", e);
                        }

                        ReceivedReport = new ClusterNodeStatusReport(new Dictionary <string, DatabaseStatusReport>(),
                                                                     ClusterNodeStatusReport.ReportStatus.Error,
                                                                     e,
                                                                     DateTime.UtcNow,
                                                                     _lastSuccessfulReceivedReport);

                        needToWait = true;
                    }
                }
            }
예제 #9
0
            private void ListenToMaintenanceWorker()
            {
                var firstIteration           = true;
                var onErrorDelayTime         = _parent.Config.OnErrorDelayTime.AsTimeSpan;
                var receiveFromWorkerTimeout = _parent.Config.ReceiveFromWorkerTimeout.AsTimeSpan;
                var tcpTimeout = _parent.Config.TcpConnectionTimeout.AsTimeSpan;

                if (tcpTimeout < receiveFromWorkerTimeout)
                {
                    if (_log.IsInfoEnabled)
                    {
                        _log.Info(
                            $"Warning: TCP timeout is lower than the receive from worker timeout ({tcpTimeout} < {receiveFromWorkerTimeout}), " +
                            "this could affect the cluster observer's decisions.");
                    }
                }

                while (_token.IsCancellationRequested == false)
                {
                    try
                    {
                        if (firstIteration == false)
                        {
                            // avoid tight loop if there was timeout / error
                            _token.WaitHandle.WaitOne(onErrorDelayTime);
                            if (_token.IsCancellationRequested)
                            {
                                return;
                            }
                        }
                        firstIteration = false;

                        TcpConnectionInfo tcpConnection = null;
                        using (var timeout = new CancellationTokenSource(tcpTimeout))
                            using (var combined = CancellationTokenSource.CreateLinkedTokenSource(_token, timeout.Token))
                            {
                                tcpConnection = ReplicationUtils.GetTcpInfo(Url, null, "Supervisor", _parent._server.Server.Certificate.Certificate, combined.Token);
                                if (tcpConnection == null)
                                {
                                    continue;
                                }
                            }

                        var connection = ConnectToClientNode(tcpConnection, _parent._server.Engine.TcpConnectionTimeout);
                        var tcpClient  = connection.TcpClient;
                        var stream     = connection.Stream;
                        using (tcpClient)
                            using (_cts.Token.Register(tcpClient.Dispose))
                                using (_contextPool.AllocateOperationContext(out JsonOperationContext contextForParsing))
                                    using (_contextPool.AllocateOperationContext(out JsonOperationContext contextForBuffer))
                                        using (contextForBuffer.GetMemoryBuffer(out var readBuffer))
                                            using (var timeoutEvent = new TimeoutEvent(receiveFromWorkerTimeout, $"Timeout event for: {_name}", singleShot: false))
                                            {
                                                timeoutEvent.Start(OnTimeout);
                                                var unchangedReports = new List <DatabaseStatusReport>();

                                                while (_token.IsCancellationRequested == false)
                                                {
                                                    contextForParsing.Reset();
                                                    contextForParsing.Renew();
                                                    BlittableJsonReaderObject rawReport;
                                                    try
                                                    {
                                                        // even if there is a timeout event, we will keep waiting on the same connection until the TCP timeout occurs.

                                                        rawReport = contextForParsing.Sync.ParseToMemory(stream, _readStatusUpdateDebugString, BlittableJsonDocumentBuilder.UsageMode.None, readBuffer);
                                                        timeoutEvent.Defer(_parent._leaderClusterTag);
                                                    }
                                                    catch (Exception e)
                                                    {
                                                        if (_token.IsCancellationRequested)
                                                        {
                                                            return;
                                                        }

                                                        if (_log.IsInfoEnabled)
                                                        {
                                                            _log.Info("Exception occurred while reading the report from the connection", e);
                                                        }

                                                        ReceivedReport = new ClusterNodeStatusReport(new ServerReport(), new Dictionary <string, DatabaseStatusReport>(),
                                                                                                     ClusterNodeStatusReport.ReportStatus.Error,
                                                                                                     e,
                                                                                                     DateTime.UtcNow,
                                                                                                     _lastSuccessfulReceivedReport);

                                                        break;
                                                    }

                                                    _parent.ForTestingPurposes?.BeforeReportBuildAction(this);

                                                    var nodeReport = BuildReport(rawReport, connection.SupportedFeatures);
                                                    timeoutEvent.Defer(_parent._leaderClusterTag);


                                                    UpdateNodeReportIfNeeded(nodeReport, unchangedReports);
                                                    unchangedReports.Clear();

                                                    ReceivedReport = _lastSuccessfulReceivedReport = nodeReport;
                                                    _parent.ForTestingPurposes?.AfterSettingReportAction(this);
                                                }
                                            }
                    }
                    catch (Exception e)
                    {
                        if (_token.IsCancellationRequested)
                        {
                            return;
                        }

                        if (_log.IsInfoEnabled)
                        {
                            _log.Info($"Exception was thrown while collecting info from {ClusterTag}", e);
                        }

                        ReceivedReport = new ClusterNodeStatusReport(new ServerReport(), new Dictionary <string, DatabaseStatusReport>(),
                                                                     ClusterNodeStatusReport.ReportStatus.Error,
                                                                     e,
                                                                     DateTime.UtcNow,
                                                                     _lastSuccessfulReceivedReport);
                    }
                }
            }