/// <summary> /// Handles the actions to take when the monitor has to "wake up" /// </summary> /// <param name="activationReason">The <see cref="MonitorActivationReason"/> that caused the invocation</param> /// <param name="monitorState">The current <see cref="MonitorState"/>. Will be modified upon retrn</param> /// <param name="cancellationToken">The <see cref="CancellationToken"/> for the operation</param> /// <returns>A <see cref="Task"/> representing the running operation</returns> async Task HandlerMonitorWakeup(MonitorActivationReason activationReason, MonitorState monitorState, CancellationToken cancellationToken) { logger.LogDebug("Monitor activation. Reason: {0}", activationReason); //returns true if the inactive server can't be used immediately bool FullRestartDeadInactive() { if (monitorState.RebootingInactiveServer || monitorState.InactiveServerCritFail) { logger.LogInformation("Inactive server is {0}! Restarting monitor...", monitorState.InactiveServerCritFail ? "critically failed" : "still rebooting"); monitorState.NextAction = MonitorAction.Restart; //will dispose server return(true); } return(false); }; //trys to set inactive server's port to the public port //doesn't handle closing active server's port async Task <bool> MakeInactiveActive() { logger.LogDebug("Setting inactive server to port {0}...", ActiveLaunchParameters.PrimaryPort.Value); var result = await monitorState.InactiveServer.SetPort(ActiveLaunchParameters.PrimaryPort.Value, cancellationToken).ConfigureAwait(false); if (!result) { logger.LogWarning("Failed to activate inactive server! Restarting monitor..."); monitorState.NextAction = MonitorAction.Restart; //will dispose server return(false); } //inactive server should always be using active launch parameters LastLaunchParameters = ActiveLaunchParameters; var tmp = monitorState.ActiveServer; monitorState.ActiveServer = monitorState.InactiveServer; monitorState.InactiveServer = tmp; AlphaIsActive = !AlphaIsActive; return(true); } // Tries to load inactive server with latest dmb, falling back to current dmb on failure. Requires a lock on <see cref="semaphore"/> async Task <bool> RestartInactiveServer() { logger.LogInformation("Rebooting inactive server..."); var newDmb = dmbFactory.LockNextDmb(1); bool usedMostRecentDmb; try { monitorState.InactiveServer = await sessionControllerFactory.LaunchNew(ActiveLaunchParameters, newDmb, null, false, !monitorState.ActiveServer.IsPrimary, false, cancellationToken).ConfigureAwait(false); usedMostRecentDmb = true; } catch (OperationCanceledException) { throw; } catch (Exception e) { logger.LogError("Error occurred while recreating server! Attempting backup strategy of running DMB of running server! Exception: {0}", e.ToString()); //ahh jeez, what do we do here? //this is our fault, so it should never happen but //idk maybe a database error while handling the newest dmb? //either way try to start it using the active server's dmb as a backup try { var dmbBackup = await dmbFactory.FromCompileJob(monitorState.ActiveServer.Dmb.CompileJob, cancellationToken).ConfigureAwait(false); if (dmbBackup == null) //NANI!? //just give up, if THAT compile job is failing then the ActiveServer is gonna crash soon too or already has { throw new JobException("Creating backup DMB provider failed!"); } monitorState.InactiveServer = await sessionControllerFactory.LaunchNew(ActiveLaunchParameters, dmbBackup, null, false, !monitorState.ActiveServer.IsPrimary, false, cancellationToken).ConfigureAwait(false); usedMostRecentDmb = false; await chat.SendWatchdogMessage("Staging newest DMB on inactive server failed: {0} Falling back to previous dmb...", cancellationToken).ConfigureAwait(false); } catch (OperationCanceledException) { throw; } catch (Exception e2) { //fuuuuucckkk logger.LogError("Backup strategy failed! Monitor will restart when active server reboots! Exception: {0}", e2.ToString()); monitorState.InactiveServerCritFail = true; await chat.SendWatchdogMessage("Attempted reboot of inactive server failed. Watchdog will reset when active server fails or exits", cancellationToken).ConfigureAwait(false); return(true); //we didn't use the old dmb } } logger.LogInformation("Successfully relaunched inactive server!"); monitorState.RebootingInactiveServer = true; return(usedMostRecentDmb); } async Task UpdateAndRestartInactiveServer(bool breakAfter) { //replace the notification tcs here so that the next loop will read a fresh one activeParametersUpdated = new TaskCompletionSource <object>(); monitorState.InactiveServer.Dispose(); //kill or recycle it monitorState.NextAction = breakAfter ? MonitorAction.Break : MonitorAction.Continue; var usedLatestDmb = await RestartInactiveServer().ConfigureAwait(false); if (monitorState.NextAction == (breakAfter ? MonitorAction.Break : MonitorAction.Continue)) { monitorState.ActiveServer.ClosePortOnReboot = false; if (monitorState.InactiveServerHasStagedDmb && !usedLatestDmb) { monitorState.InactiveServerHasStagedDmb = false; //don't try to load it again though } } }; string ExitWord(ISessionController controller) => controller.TerminationWasRequested ? "exited" : "crashed"; //reason handling switch (activationReason) { case MonitorActivationReason.ActiveServerCrashed: if (monitorState.ActiveServer.RebootState == Components.Watchdog.RebootState.Shutdown) { await chat.SendWatchdogMessage(String.Format(CultureInfo.InvariantCulture, "Active server {0}! Exiting due to graceful termination request...", ExitWord(monitorState.ActiveServer)), cancellationToken).ConfigureAwait(false); monitorState.NextAction = MonitorAction.Exit; break; } if (FullRestartDeadInactive()) { await chat.SendWatchdogMessage(String.Format(CultureInfo.InvariantCulture, "Active server {0}! Inactive server unable to online!", ExitWord(monitorState.ActiveServer)), cancellationToken).ConfigureAwait(false); break; } await chat.SendWatchdogMessage(String.Format(CultureInfo.InvariantCulture, "Active server {0}! Onlining inactive server...", ExitWord(monitorState.ActiveServer)), cancellationToken).ConfigureAwait(false); if (!await MakeInactiveActive().ConfigureAwait(false)) { break; } await UpdateAndRestartInactiveServer(true).ConfigureAwait(false); break; case MonitorActivationReason.InactiveServerCrashed: await chat.SendWatchdogMessage(String.Format(CultureInfo.InvariantCulture, "Inactive server {0}! Rebooting...", ExitWord(monitorState.InactiveServer)), cancellationToken).ConfigureAwait(false); await UpdateAndRestartInactiveServer(false).ConfigureAwait(false); break; case MonitorActivationReason.ActiveServerRebooted: if (FullRestartDeadInactive()) { break; } //what matters here is the RebootState bool restartOnceSwapped = false; var rebootState = monitorState.ActiveServer.RebootState; monitorState.ActiveServer.ResetRebootState(); //the DMAPI has already done this internally switch (rebootState) { case Components.Watchdog.RebootState.Normal: break; case Components.Watchdog.RebootState.Restart: restartOnceSwapped = true; break; case Components.Watchdog.RebootState.Shutdown: await chat.SendWatchdogMessage("Active server rebooted! Exiting due to graceful termination request...", cancellationToken).ConfigureAwait(false); DisposeAndNullControllers(); monitorState.NextAction = MonitorAction.Exit; return; } var sameCompileJob = monitorState.InactiveServer.Dmb.CompileJob.Id == monitorState.ActiveServer.Dmb.CompileJob.Id; if (sameCompileJob && monitorState.InactiveServerHasStagedDmb) { //both servers up to date monitorState.InactiveServerHasStagedDmb = false; } if (!sameCompileJob || ActiveLaunchParameters != LastLaunchParameters) { //need a new launch in ActiveServer restartOnceSwapped = true; } if (restartOnceSwapped && !monitorState.ActiveServer.ClosePortOnReboot) { //we need to manually restart active server //it won't listen to us right now so just kill it monitorState.ActiveServer.Dispose(); } if ((!restartOnceSwapped && !monitorState.ActiveServer.ClosePortOnReboot) || !await MakeInactiveActive().ConfigureAwait(false)) { break; } monitorState.ActiveServer.ClosePortOnReboot = true; if (!restartOnceSwapped) { monitorState.InactiveServer.ClosePortOnReboot = false; //try to reopen inactive server on the private port so it's not pinging all the time //failing that, just reboot it restartOnceSwapped = !await monitorState.InactiveServer.SetPort(ActiveLaunchParameters.SecondaryPort.Value, cancellationToken).ConfigureAwait(false); } if (restartOnceSwapped) //for one reason or another { await UpdateAndRestartInactiveServer(true).ConfigureAwait(false); //break because worse case, active server is still booting } else { monitorState.InactiveServer.ClosePortOnReboot = false; monitorState.NextAction = MonitorAction.Break; } break; case MonitorActivationReason.InactiveServerRebooted: monitorState.RebootingInactiveServer = true; monitorState.InactiveServer.ResetRebootState(); monitorState.ActiveServer.ClosePortOnReboot = false; monitorState.NextAction = MonitorAction.Continue; break; case MonitorActivationReason.InactiveServerStartupComplete: //eziest case of my life monitorState.RebootingInactiveServer = false; monitorState.ActiveServer.ClosePortOnReboot = true; monitorState.NextAction = MonitorAction.Continue; break; case MonitorActivationReason.NewDmbAvailable: monitorState.InactiveServerHasStagedDmb = true; await UpdateAndRestartInactiveServer(true).ConfigureAwait(false); //next case does same thing break; case MonitorActivationReason.ActiveLaunchParametersUpdated: await UpdateAndRestartInactiveServer(false).ConfigureAwait(false); break; } }
/// <inheritdoc /> public async Task <ReattachInformation> Load(CancellationToken cancellationToken) { Models.ReattachInformation result = null; TimeSpan?topicTimeout = null; await databaseContextFactory.UseContext(async (db) => { var dbReattachInfos = await db .ReattachInformations .AsQueryable() .Where(x => x.CompileJob.Job.Instance.Id == metadata.Id) .Include(x => x.CompileJob) .ToListAsync(cancellationToken).ConfigureAwait(false); result = dbReattachInfos.FirstOrDefault(); if (result == default) { return; } var timeoutMilliseconds = await db .Instances .AsQueryable() .Where(x => x.Id == metadata.Id) .Select(x => x.DreamDaemonSettings.TopicRequestTimeout) .FirstOrDefaultAsync(cancellationToken) .ConfigureAwait(false); if (timeoutMilliseconds == default) { logger.LogCritical("Missing TopicRequestTimeout!"); return; } topicTimeout = TimeSpan.FromMilliseconds(timeoutMilliseconds.Value); bool first = true; foreach (var reattachInfo in dbReattachInfos) { if (!first) { logger.LogWarning("Killing PID {0} associated with extra reattach information...", reattachInfo.ProcessId); try { using var process = processExecutor.GetProcess(reattachInfo.ProcessId); process.Terminate(); await process.Lifetime.ConfigureAwait(false); } catch (Exception ex) { logger.LogWarning(ex, "Failed to kill process!"); } } db.ReattachInformations.Remove(reattachInfo); first = false; } await db.Save(cancellationToken).ConfigureAwait(false); }).ConfigureAwait(false); if (!topicTimeout.HasValue) { logger.LogDebug("Reattach information not found!"); return(null); } var dmb = await dmbFactory.FromCompileJob(result.CompileJob, cancellationToken).ConfigureAwait(false); if (dmb == null) { logger.LogError("Unable to reattach! Could not load .dmb!"); return(null); } var info = new ReattachInformation( result, dmb, topicTimeout.Value); logger.LogDebug("Reattach information loaded: {0}", info); return(info); }