private void MonitorMemoryUsage() { NativeMemory.EnsureRegistered(); var memoryAvailableHandles = new WaitHandle[] { _simulatedLowMemory, _shutdownRequested }; var timeout = 5 * 1000; try { while (true) { try { var result = WaitHandle.WaitAny(memoryAvailableHandles, timeout); switch (result) { case WaitHandle.WaitTimeout: timeout = CheckMemoryStatus(_lowMemoryMonitor); break; case 0: SimulateLowMemory(); timeout = 1000; // on EarlyOOM just run cleaners once (CheckMemoryStatus will run in 1000mSec and will return system to normal) break; case 1: // shutdown requested return; default: return; } } catch (OutOfMemoryException) { try { if (_logger.IsInfoEnabled) { _logger.Info("Out of memory error in the low memory notification thread, will wait 5 seconds before trying to check memory status again. The system is likely running out of memory"); } } catch { // can't even log, nothing we can do here } if (_shutdownRequested.WaitOne(5000)) { return; // shutdown requested } } } } catch (Exception e) { if (_logger.IsOperationsEnabled) { _logger.Operations("Catastrophic failure in low memory notification", e); } } }
private void RunBackupThread(PeriodicBackup periodicBackup, BackupTask backupTask, Action <IOperationProgress> onProgress, TaskCompletionSource <IOperationResult> tcs) { try { Thread.CurrentThread.Priority = ThreadPriority.BelowNormal; NativeMemory.EnsureRegistered(); using (_database.PreventFromUnloading()) { tcs.SetResult(backupTask.RunPeriodicBackup(onProgress)); } } catch (OperationCanceledException) { tcs.SetCanceled(); } catch (Exception e) { if (_logger.IsOperationsEnabled) { _logger.Operations($"Failed to run the backup thread: '{periodicBackup.Configuration.Name}'", e); } tcs.SetException(e); } finally { ScheduleNextBackup(periodicBackup); } }
private void RunBackupThread(PeriodicBackup periodicBackup, BackupTask backupTask, Action <IOperationProgress> onProgress, TaskCompletionSource <IOperationResult> tcs) { try { Thread.CurrentThread.Priority = ThreadPriority.BelowNormal; NativeMemory.EnsureRegistered(); using (_database.PreventFromUnloading()) { tcs.SetResult(backupTask.RunPeriodicBackup(onProgress)); } } catch (OperationCanceledException) { tcs.SetCanceled(); } catch (Exception e) { if (_logger.IsOperationsEnabled) { _logger.Operations($"Failed to run the backup thread: '{periodicBackup.Configuration.Name}'", e); } tcs.SetException(e); } finally { try { _serverStore.ConcurrentBackupsSemaphore.Release(); periodicBackup.RunningTask = null; periodicBackup.RunningBackupTaskId = null; periodicBackup.CancelToken = null; periodicBackup.RunningBackupStatus = null; if (periodicBackup.HasScheduledBackup() && _cancellationToken.IsCancellationRequested == false) { var newBackupTimer = GetTimer(periodicBackup.Configuration, periodicBackup.BackupStatus); periodicBackup.UpdateTimer(newBackupTimer, discardIfDisabled: true); } } catch (Exception e) { var msg = $"Failed to schedule next backup for backup thread: '{periodicBackup.Configuration.Name}'"; if (_logger.IsOperationsEnabled) { _logger.Operations(msg, e); } _database.NotificationCenter.Add(AlertRaised.Create( _database.Name, "Couldn't schedule next backup.", msg, AlertType.PeriodicBackup, NotificationSeverity.Warning, details: new ExceptionDetails(e))); } } }
private void VoronEnvironmentFlushing() { NativeMemory.EnsureRegistered(); // We want this to always run, even if we dispose / create new storage env, this is // static for the life time of the process, and environments will register / unregister from // it as needed try { var avoidDuplicates = new HashSet <StorageEnvironment>(); while (true) { avoidDuplicates.Clear(); var maybeNeedSync = _maybeNeedToSync.Count; var millisecondsTimeout = 15000 - maybeNeedSync; if (millisecondsTimeout <= 0 || _flushWriterEvent.Wait(millisecondsTimeout) == false) { if (_maybeNeedToSync.Count == 0) { continue; } if (_log.IsInfoEnabled) { _log.Info($"Starting desired sync with {_maybeNeedToSync.Count:#,#} items to sync after {millisecondsTimeout:#,#} ms with no activity"); } // sync after 5 seconds if no flushing occurred, or if there has been a LOT of // writes that we would like to run SyncDesiredEnvironments(avoidDuplicates); continue; } _flushWriterEvent.Reset(); FlushEnvironments(avoidDuplicates); SyncRequiredEnvironments(avoidDuplicates); } } catch (Exception e) { if (_log.IsOperationsEnabled) { _log.Operations("Catastrophic failure in Voron environment flushing", e); } // wait for the message to be flushed to the logs Thread.Sleep(5000); // Note that we intentionally don't have error handling here. // If this code throws an exception that bubbles up to here, we WANT the process // to die, since we can't recover from the flusher thread dying. throw; } // ReSharper disable once FunctionNeverReturns }
private void MergeOperationThreadProc() { NativeMemory.EnsureRegistered(); try { while (_runTransactions) { if (_operations.IsEmpty) { using (var generalMeter = GeneralWaitPerformanceMetrics.MeterPerformanceRate()) { generalMeter.IncrementCounter(1); _waitHandle.Wait(_shutdown); } _waitHandle.Reset(); } MergeTransactionsOnce(); } } catch (OperationCanceledException) { // clean shutdown, nothing to do } catch (Exception e) { if (_log.IsOperationsEnabled) { _log.Operations( "Serious failure in transaction merging thread, the database must be restarted!", e); } Interlocked.Exchange(ref _edi, ExceptionDispatchInfo.Capture(e)); // cautionary, we make sure that stuff that is waiting on the // queue is notified about this catasropic error and we wait // just a bit more to verify that nothing racy can still get // there for (int i = 0; i < 3; i++) { while (_operations.TryDequeue(out MergedTransactionCommand result)) { result.Exception = e; NotifyOnThreadPool(result); } try { _waitHandle.Wait(50, _shutdown); _waitHandle.Reset(); } catch (OperationCanceledException) { break; } } } }
private unsafe void VoronPrefetcher() { NativeMemory.EnsureRegistered(); // We want this to always run, even if we dispose / create new storage env, this is // static for the life time of the process, and environments will register / unregister from // it as needed // We are already zeroing 512 bytes and with the default behavior can get 16MB prefetches per sys call, // so that is quite enough. We expect that we'll only rarely need to do more than 32 at a go, anyway const int StackSpace = 32; var toPrefetch = stackalloc PrefetchRanges[StackSpace]; try { while (true) { ref PrefetchRanges element0 = ref toPrefetch[0]; CommandQueue.TryTake(out element0, Timeout.InfiniteTimeSpan); int prefetchIdx = 1; while (CommandQueue.Count != 0) { // Prepare the segment information. ref PrefetchRanges memoryToPrefetch = ref toPrefetch[prefetchIdx]; CommandQueue.TryTake(out memoryToPrefetch, 0); prefetchIdx++; if (prefetchIdx >= StackSpace) { // We dont have enough space, so we send the batch to the kernel // we explicitly ignore the return code here, this is optimization only rvn_prefetch_ranges(toPrefetch, StackSpace, out _); prefetchIdx = 0; } } if (prefetchIdx != 0) { // we explicitly ignore the return code here, this is optimization only rvn_prefetch_ranges(toPrefetch, prefetchIdx, out _); } }
private void Run() { NativeMemory.EnsureRegistered(); int tries = 0; while (true) { (WaitCallback callback, object state)result; while (_actions.TryDequeue(out result)) { try { result.callback(result.state); } catch { // there is nothing that we _can_ do here that would be right // and there is no meaningful error handling. Ignoring this because // callers are expected to do their own exception catching } } // PERF: Entering a kernel lock even if the ManualResetEventSlim will try to avoid that doing some spin locking // is very costly. This is a hack that is allowing amortize a bit very high frequency events. The proper // way to handle requires infrastructure changes. https://issues.hibernatingrhinos.com/issue/RavenDB-8126 if (tries < 5) { // Yield execution quantum. If we are in a high-frequency event we will be able to avoid the kernel lock. Thread.Sleep(0); tries++; } else { _event.WaitHandle.WaitOne(); _event.Reset(); // Nothing we can do here, just block. tries = 0; } } }
private void ReplicateToDestination() { try { AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiate); NativeMemory.EnsureRegistered(); if (_log.IsInfoEnabled) { _log.Info($"Will replicate to {Destination.FromString()} via {_connectionInfo.Url}"); } using (_parent._server.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) using (context.OpenReadTransaction()) { var record = _parent.LoadDatabaseRecord(); if (record == null) { throw new InvalidOperationException($"The database record for {_parent.Database.Name} does not exist?!"); } if (record.Encrypted && Destination.Url.StartsWith("https:", StringComparison.OrdinalIgnoreCase) == false) { throw new InvalidOperationException( $"{record.DatabaseName} is encrypted, and require HTTPS for replication, but had endpoint with url {Destination.Url} to database {Destination.Database}"); } } var task = TcpUtils.ConnectSocketAsync(_connectionInfo, _parent._server.Engine.TcpConnectionTimeout, _log); task.Wait(CancellationToken); using (Interlocked.Exchange(ref _tcpClient, task.Result)) { var wrapSsl = TcpUtils.WrapStreamWithSslAsync(_tcpClient, _connectionInfo, _parent._server.Server.Certificate.Certificate, _parent._server.Engine.TcpConnectionTimeout); wrapSsl.Wait(CancellationToken); using (_stream = wrapSsl.Result) // note that _stream is being disposed by the interruptible read using (_interruptibleRead = new InterruptibleRead(_database.DocumentsStorage.ContextPool, _stream)) using (_buffer = JsonOperationContext.ManagedPinnedBuffer.LongLivedInstance()) { var documentSender = new ReplicationDocumentSender(_stream, this, _log); WriteHeaderToRemotePeer(); //handle initial response to last etag and staff try { var response = HandleServerResponse(getFullResponse: true); switch (response.ReplyType) { //The first time we start replication we need to register the destination current CV case ReplicationMessageReply.ReplyType.Ok: LastAcceptedChangeVector = response.Reply.DatabaseChangeVector; break; case ReplicationMessageReply.ReplyType.Error: var exception = new InvalidOperationException(response.Reply.Exception); if (response.Reply.Exception.Contains(nameof(DatabaseDoesNotExistException)) || response.Reply.Exception.Contains(nameof(DatabaseNotRelevantException))) { AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, "Database does not exist"); DatabaseDoesNotExistException.ThrowWithMessageAndException(Destination.Database, response.Reply.Message, exception); } AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, $"Got error: {response.Reply.Exception}"); throw exception; } } catch (DatabaseDoesNotExistException e) { var msg = $"Failed to parse initial server replication response, because there is no database named {_database.Name} " + "on the other end. "; if (_external) { msg += "In order for the replication to work, a database with the same name needs to be created at the destination"; } var young = (DateTime.UtcNow - _startedAt).TotalSeconds < 30; if (young) { msg += "This can happen if the other node wasn't yet notified about being assigned this database and should be resolved shortly."; } if (_log.IsInfoEnabled) { _log.Info(msg, e); } AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, msg); // won't add an alert on young connections // because it may take a few seconds for the other side to be notified by // the cluster that it has this db. if (young == false) { AddAlertOnFailureToReachOtherSide(msg, e); } throw; } catch (OperationCanceledException e) { const string msg = "Got operation canceled notification while opening outgoing replication channel. " + "Aborting and closing the channel."; if (_log.IsInfoEnabled) { _log.Info(msg, e); } AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, msg); throw; } catch (Exception e) { var msg = $"{OutgoingReplicationThreadName} got an unexpected exception during initial handshake"; if (_log.IsInfoEnabled) { _log.Info(msg, e); } AddReplicationPulse(ReplicationPulseDirection.OutgoingInitiateError, msg); AddAlertOnFailureToReachOtherSide(msg, e); throw; } DateTime nextReplicateAt = default(DateTime); while (_cts.IsCancellationRequested == false) { while (_database.Time.GetUtcNow() > nextReplicateAt) { if (_parent.DebugWaitAndRunReplicationOnce != null) { _parent.DebugWaitAndRunReplicationOnce.Wait(_cts.Token); _parent.DebugWaitAndRunReplicationOnce.Reset(); } var sp = Stopwatch.StartNew(); var stats = _lastStats = new OutgoingReplicationStatsAggregator(_parent.GetNextReplicationStatsId(), _lastStats); AddReplicationPerformance(stats); AddReplicationPulse(ReplicationPulseDirection.OutgoingBegin); try { using (var scope = stats.CreateScope()) { try { if (Destination is InternalReplication dest) { _parent.EnsureNotDeleted(dest.NodeTag); } var didWork = documentSender.ExecuteReplicationOnce(scope, ref nextReplicateAt); if (didWork == false) { break; } if (Destination is ExternalReplication externalReplication) { var taskId = externalReplication.TaskId; UpdateExternalReplicationInfo(taskId); } DocumentsSend?.Invoke(this); if (sp.ElapsedMilliseconds > 60 * 1000) { _waitForChanges.Set(); break; } } catch (OperationCanceledException) { // cancellation is not an actual error, // it is a "notification" that we need to cancel current operation const string msg = "Operation was canceled."; AddReplicationPulse(ReplicationPulseDirection.OutgoingError, msg); throw; } catch (Exception e) { AddReplicationPulse(ReplicationPulseDirection.OutgoingError, e.Message); scope.AddError(e); throw; } } } finally { stats.Complete(); AddReplicationPulse(ReplicationPulseDirection.OutgoingEnd); } } //if this returns false, this means either timeout or canceled token is activated while (WaitForChanges(_parent.MinimalHeartbeatInterval, _cts.Token) == false) { //If we got cancelled we need to break right away if (_cts.IsCancellationRequested) { break; } // open tx // read current change vector compare to last sent // if okay, send cv using (_database.DocumentsStorage.ContextPool.AllocateOperationContext(out DocumentsOperationContext ctx)) using (var tx = ctx.OpenReadTransaction()) { var etag = DocumentsStorage.ReadLastEtag(tx.InnerTransaction); if (etag == _lastSentDocumentEtag) { SendHeartbeat(DocumentsStorage.GetDatabaseChangeVector(ctx)); _parent.CompleteDeletionIfNeeded(); } else if (nextReplicateAt > DateTime.UtcNow) { SendHeartbeat(null); } else { //Send a heartbeat first so we will get an updated CV of the destination var currentChangeVector = DocumentsStorage.GetDatabaseChangeVector(ctx); SendHeartbeat(null); //If our previous CV is already merged to the destination wait a bit more if (ChangeVectorUtils.GetConflictStatus(LastAcceptedChangeVector, currentChangeVector) == ConflictStatus.AlreadyMerged) { continue; } // we have updates that we need to send to the other side // let's do that.. // this can happen if we got replication from another node // that we need to send to it. Note that we typically // will wait for the other node to send the data directly to // our destination, but if it doesn't, we'll step in. // In this case, we try to limit congestion in the network and // only send updates that we have gotten from someone else after // a certain time, to let the other side tell us that it already // got it. Note that this is merely an optimization to reduce network // traffic. It is fine to have the same data come from different sources. break; } } } _waitForChanges.Reset(); } } } } catch (AggregateException e) { if (e.InnerExceptions.Count == 1) { if (e.InnerException is OperationCanceledException oce) { HandleOperationCancelException(oce); } if (e.InnerException is IOException ioe) { HandleIOException(ioe); } } HandleException(e); } catch (OperationCanceledException e) { HandleOperationCancelException(e); } catch (IOException e) { HandleIOException(e); } catch (Exception e) { HandleException(e); } void HandleOperationCancelException(OperationCanceledException e) { if (_log.IsInfoEnabled) { _log.Info($"Operation canceled on replication thread ({FromToString}). " + $"This is not necessary due to an issue. Stopped the thread."); } if (_cts.IsCancellationRequested == false) { Failed?.Invoke(this, e); } } void HandleIOException(IOException e) { if (_log.IsInfoEnabled) { if (e.InnerException is SocketException) { _log.Info($"SocketException was thrown from the connection to remote node ({FromToString}). " + $"This might mean that the remote node is done or there is a network issue.", e); } else { _log.Info($"IOException was thrown from the connection to remote node ({FromToString}).", e); } } Failed?.Invoke(this, e); } void HandleException(Exception e) { if (_log.IsInfoEnabled) { _log.Info($"Unexpected exception occurred on replication thread ({FromToString}). " + $"Replication stopped (will be retried later).", e); } Failed?.Invoke(this, e); } }
private void MonitorMemoryUsage() { NativeMemory.EnsureRegistered(); int clearInactiveHandlersCounter = 0; var memoryAvailableHandles = new WaitHandle[] { _simulatedLowMemory, _shutdownRequested }; var paranoidModeHandles = new WaitHandle[] { _simulatedLowMemory, _shutdownRequested, _warnAllocation }; var timeout = 5 * 1000; while (true) { long totalUnmanagedAllocations = 0; var handles = SelectWaitMode(paranoidModeHandles, memoryAvailableHandles); switch (WaitHandle.WaitAny(handles, timeout)) { case WaitHandle.WaitTimeout: if (++clearInactiveHandlersCounter > 60) // 5 minutes == WaitAny 5 Secs * 60 { clearInactiveHandlersCounter = 0; ClearInactiveHandlers(); } foreach (var stats in NativeMemory.ThreadAllocations.Values) { if (stats.ThreadInstance.IsAlive == false) { continue; } totalUnmanagedAllocations += stats.TotalAllocated; } var memInfo = MemoryInformation.GetMemoryInfo(); var currentProcessMemoryMappedShared = GetCurrentProcessMemoryMappedShared(); var availableMem = (memInfo.AvailableMemory + currentProcessMemoryMappedShared).GetValue(SizeUnit.Bytes); if (availableMem < _lowMemoryThreshold && // at all times, we want 2% or 1 GB, the lowest of the two memInfo.AvailableMemory < Size.Min((memInfo.TotalPhysicalMemory / 50), new Size(1, SizeUnit.Gigabytes))) { if (LowMemoryState == false) { if (_logger.IsInfoEnabled) { _logger.Info("Low memory detected, will try to reduce memory usage..."); } AddLowMemEvent(LowMemReason.LowMemOnTimeoutChk, availableMem, totalUnmanagedAllocations, memInfo.TotalPhysicalMemory.GetValue(SizeUnit.Bytes)); } LowMemoryState = true; clearInactiveHandlersCounter = 0; RunLowMemoryHandlers(true); timeout = 500; } else { if (LowMemoryState) { if (_logger.IsInfoEnabled) { _logger.Info("Back to normal memory usage detected"); } AddLowMemEvent(LowMemReason.BackToNormal, availableMem, totalUnmanagedAllocations, memInfo.TotalPhysicalMemory.GetValue(SizeUnit.Bytes)); } LowMemoryState = false; RunLowMemoryHandlers(false); timeout = availableMem < _lowMemoryThreshold * 2 ? 1000 : 5000; } break; case 0: _simulatedLowMemory.Reset(); LowMemoryState = !LowMemoryState; var memInfoForLog = MemoryInformation.GetMemoryInfo(); var availableMemForLog = memInfoForLog.AvailableMemory.GetValue(SizeUnit.Bytes); AddLowMemEvent(LowMemoryState ? LowMemReason.LowMemStateSimulation : LowMemReason.BackToNormalSimulation, availableMemForLog, totalUnmanagedAllocations, memInfoForLog.TotalPhysicalMemory.GetValue(SizeUnit.Bytes)); if (_logger.IsInfoEnabled) { _logger.Info("Simulating : " + (LowMemoryState ? "Low memory event" : "Back to normal memory usage")); } RunLowMemoryHandlers(LowMemoryState); break; case 2: // check allocations _warnAllocation.Reset(); goto case WaitHandle.WaitTimeout; case 1: // shutdown requested return; default: return; } } }
private void MonitorMemoryUsage() { NativeMemory.EnsureRegistered(); int clearInactiveHandlersCounter = 0; var memoryAvailableHandles = new WaitHandle[] { _simulatedLowMemory, _shutdownRequested }; var paranoidModeHandles = new WaitHandle[] { _simulatedLowMemory, _shutdownRequested, _warnAllocation }; var timeout = 5 * 1000; long totalUnmanagedAllocations = 0; while (true) { var handles = MemoryInformation.GetMemoryInfo().AvailableMemory.GetValue(SizeUnit.Bytes) < _lowMemoryThreshold * 2 ? paranoidModeHandles : memoryAvailableHandles; switch (WaitHandle.WaitAny(handles, timeout)) { case WaitHandle.WaitTimeout: if (++clearInactiveHandlersCounter > 60) // 5 minutes == WaitAny 5 Secs * 60 { clearInactiveHandlersCounter = 0; ClearInactiveHandlers(); } foreach (var stats in NativeMemory.ThreadAllocations.Values) { if (stats.ThreadInstance.IsAlive == false) { continue; } totalUnmanagedAllocations += stats.TotalAllocated; } var memInfo = MemoryInformation.GetMemoryInfo(); var availableMem = memInfo.AvailableMemory.GetValue(SizeUnit.Bytes); if (availableMem < _lowMemoryThreshold && totalUnmanagedAllocations > memInfo.TotalPhysicalMemory.GetValue(SizeUnit.Bytes) * _physicalRatioForLowMemDetection) { if (LowMemoryState == false && _logger.IsInfoEnabled) { _logger.Info("Low memory detected, will try to reduce memory usage..."); } LowMemoryState = true; clearInactiveHandlersCounter = 0; RunLowMemoryHandlers(true); timeout = 500; } else { if (LowMemoryState && _logger.IsInfoEnabled) { _logger.Info("Back to normal memory usage detected"); } LowMemoryState = false; RunLowMemoryHandlers(false); timeout = availableMem < _lowMemoryThreshold * 2 ? 1000 : 5000; } break; case 0: _simulatedLowMemory.Reset(); LowMemoryState = !LowMemoryState; if (_logger.IsInfoEnabled) { _logger.Info("Simulating : " + (LowMemoryState ? "Low memory event" : "Back to normal memory usage")); } RunLowMemoryHandlers(LowMemoryState); break; case 2: // check allocations _warnAllocation.Reset(); goto case WaitHandle.WaitTimeout; case 1: // shutdown requested return; default: return; } } }
private void BackgroundLogger() { NativeMemory.EnsureRegistered(); try { Interlocked.Increment(ref _generation); var threadStates = new List <WeakReference <LocalThreadWriterState> >(); var threadStatesToRemove = new FastStack <WeakReference <LocalThreadWriterState> >(); while (_keepLogging) { try { const int maxFileSize = 1024 * 1024 * 256; using (var currentFile = GetNewStream(maxFileSize)) { var sizeWritten = 0; var foundEntry = true; while (sizeWritten < maxFileSize) { if (foundEntry == false) { if (_keepLogging == false) { return; } _hasEntries.Wait(); if (_keepLogging == false) { return; } _hasEntries.Reset(); } foundEntry = false; foreach (var threadStateRef in threadStates) { if (threadStateRef.TryGetTarget(out LocalThreadWriterState threadState) == false) { threadStatesToRemove.Push(threadStateRef); continue; } for (var i = 0; i < 16; i++) { if (threadState.Full.Dequeue(out WebSocketMessageEntry item) == false) { break; } foundEntry = true; sizeWritten += ActualWriteToLogTargets(item, currentFile); threadState.Free.Enqueue(item); } } while (threadStatesToRemove.TryPop(out var ts)) { threadStates.Remove(ts); } if (_newThreadStates.IsEmpty) { continue; } while (_newThreadStates.TryDequeue(out WeakReference <LocalThreadWriterState> result)) { threadStates.Add(result); } _hasEntries.Set(); // we need to start writing logs again from new thread states } } } catch (OutOfMemoryException) { Console.Error.WriteLine("ERROR! Out of memory exception while trying to log, will avoid logging for the next 5 seconds"); var time = 5000; var current = Stopwatch.GetTimestamp(); while (time > 0 && _hasEntries.Wait(time)) { _hasEntries.Reset(); time = (int)(((Stopwatch.GetTimestamp() - current) * Stopwatch.Frequency) / 1000); foreach (var threadStateRef in threadStates) { DiscardThreadLogState(threadStateRef); } foreach (var newThreadState in _newThreadStates) { DiscardThreadLogState(newThreadState); } current = Stopwatch.GetTimestamp(); } } } } catch (Exception e) { var msg = $"FATAL ERROR trying to log!{Environment.NewLine}{e}"; Console.Error.WriteLine(msg); } }
private void MonitorMemoryUsage() { SmapsReader smapsReader = PlatformDetails.RunningOnLinux ? new SmapsReader(new[] { new byte[SmapsReader.BufferSize], new byte[SmapsReader.BufferSize] }) : null; NativeMemory.EnsureRegistered(); var memoryAvailableHandles = new WaitHandle[] { _simulatedLowMemory, _shutdownRequested }; var timeout = 5 * 1000; try { while (true) { try { var result = WaitHandle.WaitAny(memoryAvailableHandles, timeout); switch (result) { case WaitHandle.WaitTimeout: timeout = CheckMemoryStatus(smapsReader); break; case 0: SimulateLowMemory(); break; case 1: // shutdown requested return; default: return; } } catch (OutOfMemoryException) { try { if (_logger.IsInfoEnabled) { _logger.Info("Out of memory error in the low memory notification thread, will wait 5 seconds before trying to check memory status again. The system is likely running out of memory"); } } catch { // can't even log, nothing we can do here } if (_shutdownRequested.WaitOne(5000)) { return; // shutdown requested } } } } catch (Exception e) { if (_logger.IsOperationsEnabled) { _logger.Operations("Catastrophic failure in low memory notification", e); } } }
private void BackgroundLogger() { NativeMemory.EnsureRegistered(); try { Interlocked.Increment(ref _generation); var threadStates = new List <WeakReference <LocalThreadWriterState> >(); var threadStatesToRemove = new FastStack <WeakReference <LocalThreadWriterState> >(); while (_keepLogging) { const int maxFileSize = 1024 * 1024 * 256; using (var currentFile = GetNewStream(maxFileSize)) { var sizeWritten = 0; var foundEntry = true; while (sizeWritten < maxFileSize) { if (foundEntry == false) { if (_keepLogging == false) { return; } _hasEntries.Wait(); if (_keepLogging == false) { return; } _hasEntries.Reset(); } foundEntry = false; foreach (var threadStateRef in threadStates) { if (threadStateRef.TryGetTarget(out LocalThreadWriterState threadState) == false) { threadStatesToRemove.Push(threadStateRef); continue; } for (var i = 0; i < 16; i++) { if (threadState.Full.Dequeue(out WebSocketMessageEntry item) == false) { break; } foundEntry = true; sizeWritten += ActualWriteToLogTargets(item, currentFile); threadState.Free.Enqueue(item); } } while (threadStatesToRemove.TryPop(out var ts)) { threadStates.Remove(ts); } if (_newThreadStates.IsEmpty) { continue; } while (_newThreadStates.TryDequeue(out WeakReference <LocalThreadWriterState> result)) { threadStates.Add(result); } } } } } catch (Exception e) { var msg = $"FATAL ERROR trying to log!{Environment.NewLine}{e}"; Console.WriteLine(msg); //TODO: Log to event viewer in Windows and sys trace in Linux? } }
private void BackgroundLogger() { NativeMemory.EnsureRegistered(); try { Interlocked.Increment(ref _generation); var threadStates = new List <WeakReference <LocalThreadWriterState> >(); var threadStatesToRemove = new FastStack <WeakReference <LocalThreadWriterState> >(); while (_keepLogging) { try { var maxFileSize = MaxFileSizeInBytes; if (TryGetNewStreamAndApplyRetentionPolicies(maxFileSize, out var currentFile) == false) { if (_keepLogging == false) { return; } _hasEntries.Wait(1000); continue; } using (currentFile) { _readyToCompress.Set(); var sizeWritten = 0; var foundEntry = true; while (sizeWritten < maxFileSize) { if (foundEntry == false) { if (_keepLogging == false) { return; } // we don't want to have fsync here, we just // want to send it to the OS currentFile.Flush(flushToDisk: false); if (_hasEntries.IsSet == false) { // about to go to sleep, so can check if need to update offset or create new file for today logs UpdateLocalDateTimeOffset(); if (DateTime.Today != _today) { // let's create new file so its name will have today date break; } } _hasEntries.Wait(); if (_keepLogging == false) { return; } _hasEntries.Reset(); } foundEntry = false; foreach (var threadStateRef in threadStates) { if (threadStateRef.TryGetTarget(out LocalThreadWriterState threadState) == false) { threadStatesToRemove.Push(threadStateRef); continue; } for (var i = 0; i < 16; i++) { if (threadState.Full.Dequeue(out WebSocketMessageEntry item) == false) { break; } foundEntry = true; sizeWritten += ActualWriteToLogTargets(item, currentFile); threadState.Free.Enqueue(item); } } while (threadStatesToRemove.TryPop(out var ts)) { threadStates.Remove(ts); } if (_newThreadStates.IsEmpty) { continue; } while (_newThreadStates.TryDequeue(out WeakReference <LocalThreadWriterState> result)) { threadStates.Add(result); } _hasEntries.Set(); // we need to start writing logs again from new thread states } } } catch (OutOfMemoryException) { Console.Error.WriteLine("ERROR! Out of memory exception while trying to log, will avoid logging for the next 5 seconds"); var time = 5000; var current = Stopwatch.GetTimestamp(); while (time > 0 && _hasEntries.Wait(time)) { _hasEntries.Reset(); time = (int)(((Stopwatch.GetTimestamp() - current) * Stopwatch.Frequency) / 1000); foreach (var threadStateRef in threadStates) { DiscardThreadLogState(threadStateRef); } foreach (var newThreadState in _newThreadStates) { DiscardThreadLogState(newThreadState); } current = Stopwatch.GetTimestamp(); } } catch (Exception e) { var msg = $"FATAL ERROR trying to log!{Environment.NewLine}{e}"; Console.Error.WriteLine(msg); } } } finally { _readyToCompress.Set(); if (_compressLoggingThread?.Join(1000) == false) { _tokenSource.Cancel(); } } }
private void BackgroundLogger() { NativeMemory.EnsureRegistered(); try { Interlocked.Increment(ref _generation); var threadStatesToRemove = new FastStack <WeakReference <LocalThreadWriterState> >(); while (_keepLogging) { try { var maxFileSize = MaxFileSizeInBytes; if (TryGetNewStreamAndApplyRetentionPolicies(maxFileSize, out var currentFile) == false) { if (_keepLogging == false) { return; } _hasEntries.Wait(1000); continue; } using (currentFile) { _readyToCompress.Set(); var sizeWritten = 0; var foundEntry = true; while (sizeWritten < maxFileSize) { if (foundEntry == false) { if (_keepLogging == false) { return; } // we don't want to have fsync here, we just // want to send it to the OS currentFile.Flush(flushToDisk: false); if (_hasEntries.IsSet == false) { // about to go to sleep, so can check if need to update offset or create new file for today logs UpdateLocalDateTimeOffset(); if (DateTime.Today != _today) { // let's create new file so its name will have today date break; } } _hasEntries.Wait(); if (_keepLogging == false) { return; } _hasEntries.Reset(); } foundEntry = false; for (var index = 0; index < _activePoolMessageEntries.Length; index++) { var messages = _activePoolMessageEntries[index]; for (var limit = 0; limit < 16; limit++) { if (messages.TryDequeue(out LogMessageEntry item) == false) { break; } foundEntry = true; sizeWritten += ActualWriteToLogTargets(item, currentFile); Debug.Assert(item.Data != null); _freePooledMessageEntries[index].Enqueue(item, 128); } } } } } catch (OutOfMemoryException) { Console.Error.WriteLine("Out of memory exception while trying to log, will avoid logging for the next 5 seconds"); DisableLogsFor(TimeSpan.FromSeconds(5)); } catch (Exception e) { var msg = e is IOException i && IsOutOfDiskSpaceException(i) ? "Couldn't create a new log file because of out of disk space! " + "Disabling the logs for 30 seconds" : "FATAL ERROR trying to log!"; Console.Error.WriteLine($"{msg}{Environment.NewLine}{e}"); DisableLogsFor(TimeSpan.FromSeconds(30)); } } } finally { _readyToCompress.Set(); if (_compressLoggingThread?.Join(1000) == false) { _tokenSource.Cancel(); } } }
private void CreateUploadTaskIfNeeded <S, T>(S settings, Action <S, FileStream, Progress> uploadToServer, T uploadStatus, string targetName) where S : BackupSettings where T : CloudUploadStatus { if (PeriodicBackupConfiguration.CanBackupUsing(settings) == false) { return; } Debug.Assert(uploadStatus != null); var localUploadStatus = uploadStatus; var thread = PoolOfThreads.GlobalRavenThreadPool.LongRunning(_ => { try { Thread.CurrentThread.Priority = ThreadPriority.BelowNormal; NativeMemory.EnsureRegistered(); using (localUploadStatus.UpdateStats(_isFullBackup)) using (var fileStream = File.OpenRead(_settings.BackupPath)) { var uploadProgress = localUploadStatus.UploadProgress; try { localUploadStatus.Skipped = false; uploadProgress.ChangeState(UploadState.PendingUpload); uploadProgress.SetTotal(fileStream.Length); AddInfo($"Starting the upload of backup file to {targetName}."); var bytesPutsPerSec = new MeterMetric(); long lastUploadedInBytes = 0; var totalToUpload = new Size(uploadProgress.TotalInBytes, SizeUnit.Bytes).ToString(); var sw = Stopwatch.StartNew(); var progress = new Progress(uploadProgress) { OnUploadProgress = () => { if (sw.ElapsedMilliseconds <= 1000) { return; } var totalUploadedInBytes = uploadProgress.UploadedInBytes; bytesPutsPerSec.MarkSingleThreaded(totalUploadedInBytes - lastUploadedInBytes); lastUploadedInBytes = totalUploadedInBytes; var uploaded = new Size(totalUploadedInBytes, SizeUnit.Bytes); uploadProgress.BytesPutsPerSec = bytesPutsPerSec.MeanRate; AddInfo($"Uploaded: {uploaded} / {totalToUpload}"); sw.Restart(); } }; uploadToServer(settings, fileStream, progress); AddInfo($"Total uploaded: {totalToUpload}, took: {MsToHumanReadableString(uploadProgress.UploadTimeInMs)}"); } finally { uploadProgress.ChangeState(UploadState.Done); } } } catch (Exception e) { var extracted = e.ExtractSingleInnerException(); var error = $"Failed to upload the backup file to {targetName}."; Exception exception = null; if (extracted is OperationCanceledException) { // shutting down or HttpClient timeout exception = TaskCancelToken.Token.IsCancellationRequested ? extracted : new TimeoutException(error, e); } localUploadStatus.Exception = (exception ?? e).ToString(); _exceptions.Add(exception ?? new InvalidOperationException(error, e)); } }, null, $"Upload backup file of database '{_settings.DatabaseName}' to {targetName} (task: '{_settings.TaskName}')"); _threads.Add(thread); }
private void MergeOperationThreadProc() { try { Thread.CurrentThread.Priority = ThreadPriority.AboveNormal; } catch (Exception e) { if (_log.IsInfoEnabled) { _log.Info("Unable to elevate the transaction merger thread for " + _parent.Name, e); } } var oomTimer = new Stopwatch(); // this is allocated here to avoid OOM when using it while (true) // this is actually only executed once, except if we are trying to recover from OOM errors { NativeMemory.EnsureRegistered(); try { while (_runTransactions) { _recording.State?.Prepare(ref _recording.State); if (_operations.IsEmpty) { if (_parent.IsEncrypted) { using (_parent.DocumentsStorage.ContextPool.AllocateOperationContext(out DocumentsOperationContext ctx)) { using (ctx.OpenWriteTransaction()) { ctx.Environment.Options.Encryption.JournalCompressionBufferHandler.ZeroCompressionBuffer(ctx.Transaction.InnerTransaction.LowLevelTransaction); } } } using (var generalMeter = GeneralWaitPerformanceMetrics.MeterPerformanceRate()) { generalMeter.IncrementCounter(1); _waitHandle.Wait(_shutdown); } _waitHandle.Reset(); } MergeTransactionsOnce(); } return; } catch (OperationCanceledException) { // clean shutdown, nothing to do } catch (Exception e) when(e is EarlyOutOfMemoryException || e is OutOfMemoryException) { // this catch block is meant to handle potentially transient errors // in particular, an OOM error is something that we want to recover // we'll handle this by throwing out all the pending transactions, // waiting for 3 seconds and then resuming normal operations if (_log.IsOperationsEnabled) { try { _log.Operations( "OutOfMemoryException happened in the transaction merger, will abort all transactions for the next 3 seconds and then resume operations", e); } catch { // under these conditions, this may throw, but we don't care, we wanna survive (cue music) } } ClearQueueWithException(e); oomTimer.Restart(); while (_runTransactions) { try { var timeSpan = TimeSpan.FromSeconds(3) - oomTimer.Elapsed; if (timeSpan <= TimeSpan.Zero || _waitHandle.Wait(timeSpan, _shutdown) == false) { break; } } catch (ObjectDisposedException) { return; } catch (OperationCanceledException) { return; } ClearQueueWithException(e); } oomTimer.Stop(); // and now we return to the top of the loop and restart } catch (Exception e) { if (_log.IsOperationsEnabled) { _log.Operations( "Serious failure in transaction merging thread, the database must be restarted!", e); } Interlocked.Exchange(ref _edi, ExceptionDispatchInfo.Capture(e)); // cautionary, we make sure that stuff that is waiting on the // queue is notified about this catastrophic3 error and we wait // just a bit more to verify that nothing racy can still get // there while (_runTransactions) { ClearQueueWithException(e); try { _waitHandle.Wait(_shutdown); _waitHandle.Reset(); } catch (ObjectDisposedException) { return; } catch (OperationCanceledException) { return; } } } } void ClearQueueWithException(Exception e) { while (_operations.TryDequeue(out MergedTransactionCommand result)) { result.Exception = e; NotifyOnThreadPool(result); } } }