/// <inheritdoc /> protected override async Task InitControllers( Task chatTask, ReattachInformation reattachInfo, CancellationToken cancellationToken) { // don't need a new dmb if reattaching var reattachInProgress = reattachInfo != null; var dmbToUse = reattachInProgress ? null : DmbFactory.LockNextDmb(1); // if this try catches something, both servers are killed try { // start the alpha server task, either by launch a new process or attaching to an existing one // The tasks returned are mainly for writing interop files to the directories among other things and should generally never fail // The tasks pertaining to server startup times are in the ISessionControllers Task <ISessionController> serverLaunchTask; if (!reattachInProgress) { dmbToUse = await PrepServerForLaunch(dmbToUse, cancellationToken).ConfigureAwait(false); serverLaunchTask = SessionControllerFactory.LaunchNew( dmbToUse, null, ActiveLaunchParameters, false, cancellationToken); } else { serverLaunchTask = SessionControllerFactory.Reattach(reattachInfo, cancellationToken); } // retrieve the session controller Server = await serverLaunchTask.ConfigureAwait(false); // failed reattaches will return null Server?.SetHighPriority(); // possiblity of null servers due to failed reattaches if (Server == null) { await ReattachFailure( chatTask, cancellationToken) .ConfigureAwait(false); return; } await CheckLaunchResult(Server, "Server", cancellationToken).ConfigureAwait(false); Server.EnableCustomChatCommands(); } catch { // kill the controllers bool serverWasActive = Server != null; // DCT: Operation must always run await DisposeAndNullControllers(default).ConfigureAwait(false);
/// <inheritdoc /> protected sealed override async Task InitControllers(Action callBeforeRecurse, Task chatTask, DualReattachInformation reattachInfo, CancellationToken cancellationToken) { var serverToReattach = reattachInfo?.Alpha ?? reattachInfo?.Bravo; var serverToKill = reattachInfo?.Bravo ?? reattachInfo?.Alpha; // vice versa if (serverToKill == serverToReattach) { serverToKill = null; } if (reattachInfo?.AlphaIsActive == false) { var temp = serverToReattach; serverToReattach = serverToKill; serverToKill = temp; } // don't need a new dmb if reattaching var doesntNeedNewDmb = serverToReattach != null; var dmbToUse = doesntNeedNewDmb ? null : DmbFactory.LockNextDmb(1); // if this try catches something, both servers are killed bool inactiveServerWasKilled = false; try { // start the alpha server task, either by launch a new process or attaching to an existing one // The tasks returned are mainly for writing interop files to the directories among other things and should generally never fail // The tasks pertaining to server startup times are in the ISessionControllers Task <ISessionController> serverLaunchTask, inactiveReattachTask; if (!doesntNeedNewDmb) { dmbToUse = await PrepServerForLaunch(dmbToUse, cancellationToken).ConfigureAwait(false); serverLaunchTask = SessionControllerFactory.LaunchNew( dmbToUse, null, ActiveLaunchParameters, true, true, false, cancellationToken); } else { serverLaunchTask = SessionControllerFactory.Reattach(serverToReattach, cancellationToken); } bool thereIsAnInactiveServerToKill = serverToKill != null; if (thereIsAnInactiveServerToKill) { inactiveReattachTask = SessionControllerFactory.Reattach(serverToKill, cancellationToken); } else { inactiveReattachTask = Task.FromResult <ISessionController>(null); } // retrieve the session controller Server = await serverLaunchTask.ConfigureAwait(false); // failed reattaches will return null Server?.SetHighPriority(); var inactiveServerController = await inactiveReattachTask.ConfigureAwait(false); inactiveServerController?.Dispose(); inactiveServerWasKilled = inactiveServerController != null; // possiblity of null servers due to failed reattaches if (Server == null) { callBeforeRecurse(); await NotifyOfFailedReattach(thereIsAnInactiveServerToKill && !inactiveServerWasKilled, cancellationToken).ConfigureAwait(false); return; } await CheckLaunchResult(Server, "Server", cancellationToken).ConfigureAwait(false); Server.EnableCustomChatCommands(); } catch { // kill the controllers bool serverWasActive = Server != null; DisposeAndNullControllers(); // server didn't get control of this dmb if (dmbToUse != null && !serverWasActive) { dmbToUse.Dispose(); } if (serverToKill != null && !inactiveServerWasKilled) { serverToKill.Dmb.Dispose(); } throw; } }
#pragma warning restore CA1502 /// <inheritdoc /> #pragma warning disable CA1502 // TODO: Decomplexify protected override async Task InitControllers(Action callBeforeRecurse, Task chatTask, WatchdogReattachInformation reattachInfo, CancellationToken cancellationToken) { Debug.Assert(alphaServer == null && bravoServer == null, "Entered LaunchNoLock with one or more of the servers not being null!"); // don't need a new dmb if reattaching var doesntNeedNewDmb = reattachInfo?.Alpha != null && reattachInfo?.Bravo != null; var dmbToUse = doesntNeedNewDmb ? null : DmbFactory.LockNextDmb(2); // if this try catches something, both servers are killed try { // start the alpha server task, either by launch a new process or attaching to an existing one // The tasks returned are mainly for writing interop files to the directories among other things and should generally never fail // The tasks pertaining to server startup times are in the ISessionControllers Task <ISessionController> alphaServerTask; if (!doesntNeedNewDmb) { alphaServerTask = SessionControllerFactory.LaunchNew(ActiveLaunchParameters, dmbToUse, null, true, true, false, cancellationToken); } else { alphaServerTask = SessionControllerFactory.Reattach(reattachInfo.Alpha, cancellationToken); } // retrieve the session controller var startTime = DateTimeOffset.Now; alphaServer = await alphaServerTask.ConfigureAwait(false); // failed reattaches will return null alphaServer?.SetHighPriority(); // extra delay for total ordering var now = DateTimeOffset.Now; var delay = now - startTime; // definitely not if reattaching though if (reattachInfo == null && delay.TotalSeconds < AlphaBravoStartupSeperationInterval) { await AsyncDelayer.Delay(startTime.AddSeconds(AlphaBravoStartupSeperationInterval) - now, cancellationToken).ConfigureAwait(false); } // now bring bravo up if (!doesntNeedNewDmb) { bravoServer = await SessionControllerFactory.LaunchNew(ActiveLaunchParameters, dmbToUse, null, false, false, false, cancellationToken).ConfigureAwait(false); } else { bravoServer = await SessionControllerFactory.Reattach(reattachInfo.Bravo, cancellationToken).ConfigureAwait(false); } // failed reattaches will return null bravoServer?.SetHighPriority(); // possiblity of null servers due to failed reattaches if (alphaServer == null || bravoServer == null) { await chatTask.ConfigureAwait(false); var bothServersDead = alphaServer == null && bravoServer == null; if (bothServersDead || (alphaServer == null && reattachInfo.AlphaIsActive) || (bravoServer == null && !reattachInfo.AlphaIsActive)) { // we lost the active server, just restart entirely DisposeAndNullControllers(); const string FailReattachMessage = "Unable to properly reattach to active server! Restarting..."; Logger.LogWarning(FailReattachMessage); Logger.LogDebug(bothServersDead ? "Also could not reattach to inactive server!" : "Inactive server was reattached successfully!"); chatTask = Chat.SendWatchdogMessage(FailReattachMessage, cancellationToken); callBeforeRecurse(); await LaunchImplNoLock(true, false, null, cancellationToken).ConfigureAwait(false); await chatTask.ConfigureAwait(false); return; } // we still have the active server but the other one is dead to us, hand it off to the monitor to restart const string InactiveReattachFailureMessage = "Unable to reattach to inactive server. Leaving for monitor to reboot..."; chatTask = Chat.SendWatchdogMessage(InactiveReattachFailureMessage, cancellationToken); Logger.LogWarning(InactiveReattachFailureMessage); if (reattachInfo.AlphaIsActive) { bravoServer = SessionControllerFactory.CreateDeadSession(reattachInfo.Bravo.Dmb); } else { alphaServer = SessionControllerFactory.CreateDeadSession(reattachInfo.Alpha.Dmb); } } var alphaLrt = CheckLaunchResult(alphaServer, "Alpha", cancellationToken); var bravoLrt = CheckLaunchResult(bravoServer, "Bravo", cancellationToken); // this task completes when both serers have finished booting var allTask = Task.WhenAll(alphaLrt, bravoLrt); await allTask.ConfigureAwait(false); // both servers are now running, alpha is the active server(unless reattach), huzzah alphaIsActive = reattachInfo?.AlphaIsActive ?? true; var activeServer = AlphaIsActive ? alphaServer : bravoServer; activeServer.EnableCustomChatCommands(); activeServer.ClosePortOnReboot = true; } catch { if (dmbToUse != null) { // we locked 2 dmbs if (bravoServer == null) { // bravo didn't get control of his dmbToUse.Dispose(); if (alphaServer == null) { dmbToUse.Dispose(); // alpha didn't get control of his } } } else if (doesntNeedNewDmb) // we have reattachInfo { if (bravoServer == null) { // bravo didn't get control of his reattachInfo.Bravo?.Dmb.Dispose(); if (alphaServer == null) { reattachInfo.Alpha?.Dmb.Dispose(); // alpha didn't get control of his } } } // kill the controllers DisposeAndNullControllers(); throw; } }
/// <summary> /// Handles the actions to take when the monitor has to "wake up" /// </summary> /// <param name="activationReason">The <see cref="MonitorActivationReason"/> that caused the invocation</param> /// <param name="monitorState">The current <see cref="MonitorState"/>. Will be modified upon retrn</param> /// <param name="cancellationToken">The <see cref="CancellationToken"/> for the operation</param> /// <returns>A <see cref="Task"/> representing the running operation</returns> #pragma warning disable CA1502 // TODO: Decomplexify async Task HandlerMonitorWakeup(MonitorActivationReason activationReason, MonitorState monitorState, CancellationToken cancellationToken) { Logger.LogDebug("Monitor activation. Reason: {0}", activationReason); // this is where the bulk of the watchdog handling code lives and is fraught with lambdas, sorry not sorry // I'll do my best to walk you through it // returns true if the inactive server can't be used immediately // also sets monitor to restart if the above holds bool FullRestartDeadInactive() { if (monitorState.RebootingInactiveServer || monitorState.InactiveServerCritFail) { Logger.LogInformation("Inactive server is {0}! Restarting monitor...", monitorState.InactiveServerCritFail ? "critically failed" : "still rebooting"); monitorState.NextAction = MonitorAction.Restart; // will dispose server return(true); } return(false); } // trys to set inactive server's port to the public game port // doesn't handle closing active server's port // returns true on success and swaps inactiveserver and activeserver also sets LastLaunchParameters to ActiveLaunchParameters // on failure, sets monitor to restart async Task <bool> MakeInactiveActive() { Logger.LogDebug("Setting inactive server to port {0}...", ActiveLaunchParameters.PrimaryPort.Value); var result = await monitorState.InactiveServer.SetPort(ActiveLaunchParameters.PrimaryPort.Value, cancellationToken).ConfigureAwait(false); if (!result) { Logger.LogWarning("Failed to activate inactive server! Restarting monitor..."); monitorState.NextAction = MonitorAction.Restart; // will dispose server return(false); } // inactive server should always be using active launch parameters LastLaunchParameters = ActiveLaunchParameters; var tmp = monitorState.ActiveServer; monitorState.ActiveServer = monitorState.InactiveServer; monitorState.InactiveServer = tmp; alphaIsActive = !AlphaIsActive; monitorState.ActiveServer.EnableCustomChatCommands(); return(true); } // Kills and tries to launch inactive server with the latest dmb // falls back to current dmb on failure // Sets critfail on inactive server failing that // returns false if the backup dmb was used successfully, true otherwise async Task UpdateAndRestartInactiveServer(bool breakAfter) { ActiveParametersUpdated = new TaskCompletionSource <object>(); monitorState.InactiveServer.Dispose(); // kill or recycle it var desiredNextAction = breakAfter ? MonitorAction.Break : MonitorAction.Continue; monitorState.NextAction = desiredNextAction; Logger.LogInformation("Rebooting inactive server..."); var newDmb = DmbFactory.LockNextDmb(1); try { monitorState.InactiveServer = await SessionControllerFactory.LaunchNew(ActiveLaunchParameters, newDmb, null, false, !monitorState.ActiveServer.IsPrimary, false, cancellationToken).ConfigureAwait(false); monitorState.InactiveServer.SetHighPriority(); } catch (OperationCanceledException) { throw; } catch (Exception e) { Logger.LogError("Error occurred while recreating server! Attempting backup strategy of running DMB of running server! Exception: {0}", e.ToString()); // ahh jeez, what do we do here? // this is our fault, so it should never happen but // idk maybe a database error while handling the newest dmb? // either way try to start it using the active server's dmb as a backup try { var dmbBackup = await DmbFactory.FromCompileJob(monitorState.ActiveServer.Dmb.CompileJob, cancellationToken).ConfigureAwait(false); if (dmbBackup == null) // NANI!? { throw new JobException("Creating backup DMB provider failed!"); // just give up, if THAT compile job is failing then the ActiveServer is gonna crash soon too or already has } monitorState.InactiveServer = await SessionControllerFactory.LaunchNew(ActiveLaunchParameters, dmbBackup, null, false, !monitorState.ActiveServer.IsPrimary, false, cancellationToken).ConfigureAwait(false); monitorState.InactiveServer.SetHighPriority(); await Chat.SendWatchdogMessage("Staging newest DMB on inactive server failed: {0} Falling back to previous dmb...", cancellationToken).ConfigureAwait(false); } catch (OperationCanceledException) { throw; } catch (Exception e2) { // fuuuuucckkk Logger.LogError("Backup strategy failed! Monitor will restart when active server reboots! Exception: {0}", e2.ToString()); monitorState.InactiveServerCritFail = true; await Chat.SendWatchdogMessage("Attempted reboot of inactive server failed. Watchdog will reset when active server fails or exits", cancellationToken).ConfigureAwait(false); return; } } Logger.LogInformation("Successfully relaunched inactive server!"); monitorState.RebootingInactiveServer = true; } string ExitWord(ISessionController controller) => controller.TerminationWasRequested ? "exited" : "crashed"; // reason handling switch (activationReason) { case MonitorActivationReason.ActiveServerCrashed: if (monitorState.ActiveServer.RebootState == Watchdog.RebootState.Shutdown) { // the time for graceful shutdown is now await Chat.SendWatchdogMessage(String.Format(CultureInfo.InvariantCulture, "Active server {0}! Exiting due to graceful termination request...", ExitWord(monitorState.ActiveServer)), cancellationToken).ConfigureAwait(false); DisposeAndNullControllers(); monitorState.NextAction = MonitorAction.Exit; break; } if (FullRestartDeadInactive()) { // tell chat about it and go ahead await Chat.SendWatchdogMessage(String.Format(CultureInfo.InvariantCulture, "Active server {0}! Inactive server unable to online!", ExitWord(monitorState.ActiveServer)), cancellationToken).ConfigureAwait(false); // we've already been set to restart break; } // tell chat about it await Chat.SendWatchdogMessage(String.Format(CultureInfo.InvariantCulture, "Active server {0}! Onlining inactive server...", ExitWord(monitorState.ActiveServer)), cancellationToken).ConfigureAwait(false); // try to activate the inactive server if (!await MakeInactiveActive().ConfigureAwait(false)) { break; // failing that, we've already been set to restart } // bring up another inactive server await UpdateAndRestartInactiveServer(true).ConfigureAwait(false); break; case MonitorActivationReason.InactiveServerCrashed: // just announce and try to bring it back await Chat.SendWatchdogMessage(String.Format(CultureInfo.InvariantCulture, "Inactive server {0}! Rebooting...", ExitWord(monitorState.InactiveServer)), cancellationToken).ConfigureAwait(false); await UpdateAndRestartInactiveServer(false).ConfigureAwait(false); break; case MonitorActivationReason.ActiveServerRebooted: // ideal goal: active server just closed its port // tell inactive server to open it's port and that's now the active server var rebootState = monitorState.ActiveServer.RebootState; monitorState.ActiveServer.ResetRebootState(); // the DMAPI has already done this internally if (FullRestartDeadInactive() && rebootState != Watchdog.RebootState.Shutdown) { break; // full restart if the inactive server is being fucky } // what matters here is the RebootState var restartOnceSwapped = false; switch (rebootState) { case Watchdog.RebootState.Normal: // life as normal break; case Watchdog.RebootState.Restart: // reboot the current active server once the inactive one activates restartOnceSwapped = true; break; case Watchdog.RebootState.Shutdown: // graceful shutdown time await Chat.SendWatchdogMessage("Active server rebooted! Exiting due to graceful termination request...", cancellationToken).ConfigureAwait(false); DisposeAndNullControllers(); monitorState.NextAction = MonitorAction.Exit; return; default: throw new InvalidOperationException($"Invalid reboot state: {rebootState}"); } // are both servers now running the same CompileJob? var sameCompileJob = monitorState.InactiveServer.Dmb.CompileJob.Id == monitorState.ActiveServer.Dmb.CompileJob.Id; if (!sameCompileJob || ActiveLaunchParameters != LastLaunchParameters) { restartOnceSwapped = true; // need a new launch to update either settings or compile job } if (restartOnceSwapped) { /* * we need to manually restart active server * just kill it here, easier that way */ monitorState.ActiveServer.Dispose(); } var activeServerStillHasPortOpen = !restartOnceSwapped && !monitorState.ActiveServer.ClosePortOnReboot; if (activeServerStillHasPortOpen) { /* we didn't want active server to swap for some reason and it still has it's port open * just continue as normal */ break; } if (!await MakeInactiveActive().ConfigureAwait(false)) { break; // monitor will restart } // servers now swapped // enable this now if inactive server is not still valid monitorState.ActiveServer.ClosePortOnReboot = restartOnceSwapped; if (!restartOnceSwapped) { /* * now try to reopen it on the private port * failing that, just reboot it */ restartOnceSwapped = !await monitorState.InactiveServer.SetPort(ActiveLaunchParameters.SecondaryPort.Value, cancellationToken).ConfigureAwait(false); } // break either way because any issues past this point would be solved by the reboot if (restartOnceSwapped) // for one reason or another { await UpdateAndRestartInactiveServer(true).ConfigureAwait(false); // update and reboot } else { monitorState.NextAction = MonitorAction.Skip; // only skip checking inactive server rebooted, it's guaranteed InactiveServerStartup complete wouldn't fire this iteration } break; case MonitorActivationReason.InactiveServerRebooted: // just don't let the active server close it's port if the inactive server isn't ready monitorState.RebootingInactiveServer = true; monitorState.InactiveServer.ResetRebootState(); monitorState.ActiveServer.ClosePortOnReboot = false; monitorState.NextAction = MonitorAction.Continue; break; case MonitorActivationReason.InactiveServerStartupComplete: // opposite of above case monitorState.RebootingInactiveServer = false; monitorState.ActiveServer.ClosePortOnReboot = true; monitorState.NextAction = MonitorAction.Continue; break; case MonitorActivationReason.NewDmbAvailable: case MonitorActivationReason.ActiveLaunchParametersUpdated: // just reload the inactive server and wait for a swap to apply the changes await UpdateAndRestartInactiveServer(true).ConfigureAwait(false); break; default: Trace.Assert(false, String.Format(CultureInfo.InvariantCulture, "Invalid monitor activation reason: {0}!", activationReason)); break; } }
/// <inheritdoc /> #pragma warning disable CA1502 // TODO: Decomplexify protected override async Task HandleMonitorWakeup(MonitorActivationReason activationReason, MonitorState monitorState, CancellationToken cancellationToken) { Logger.LogDebug("Monitor activation. Reason: {0}", activationReason); // this is where the bulk of the watchdog handling code lives and is fraught with lambdas, sorry not sorry // I'll do my best to walk you through it // returns true if the inactive server can't be used immediately // also sets monitor to restart if the above holds bool FullRestartDeadInactive() { if (monitorState.RebootingInactiveServer || monitorState.InactiveServerCritFail) { Logger.LogInformation("Inactive server is {0}! Restarting monitor...", monitorState.InactiveServerCritFail ? "critically failed" : "still rebooting"); monitorState.NextAction = MonitorAction.Restart; // will dispose server return(true); } return(false); } // trys to set inactive server's port to the public game port // doesn't handle closing active server's port // returns true on success and swaps inactiveserver and activeserver also sets LastLaunchParameters to ActiveLaunchParameters // on failure, sets monitor to restart async Task <bool> MakeInactiveActive() { Logger.LogDebug("Setting inactive server to port {0}...", ActiveLaunchParameters.PrimaryPort.Value); var result = await monitorState.InactiveServer.SetPort(ActiveLaunchParameters.PrimaryPort.Value, cancellationToken).ConfigureAwait(false); if (!result) { Logger.LogWarning("Failed to activate inactive server! Restarting monitor..."); monitorState.NextAction = MonitorAction.Restart; // will dispose server return(false); } // inactive server should always be using active launch parameters LastLaunchParameters = ActiveLaunchParameters; var tmp = monitorState.ActiveServer; monitorState.ActiveServer = monitorState.InactiveServer; monitorState.InactiveServer = tmp; alphaIsActive = !AlphaIsActive; monitorState.ActiveServer.EnableCustomChatCommands(); return(true); } // Kills and tries to launch inactive server with the latest dmb // falls back to current dmb on failure // Sets critfail on inactive server failing that // returns false if the backup dmb was used successfully, true otherwise async Task UpdateAndRestartInactiveServer(bool breakAfter) { ActiveParametersUpdated = new TaskCompletionSource <object>(); monitorState.InactiveServer.Dispose(); // kill or recycle it var desiredNextAction = breakAfter ? MonitorAction.Break : MonitorAction.Continue; monitorState.NextAction = desiredNextAction; Logger.LogInformation("Rebooting inactive server..."); var newDmb = DmbFactory.LockNextDmb(1); try { monitorState.InactiveServer = await SessionControllerFactory.LaunchNew( newDmb, null, ActiveLaunchParameters, false, !monitorState.ActiveServer.IsPrimary, false, cancellationToken) .ConfigureAwait(false); monitorState.InactiveServer.SetHighPriority(); } catch (OperationCanceledException) { throw; } catch (Exception e) { Logger.LogError("Error occurred while recreating server! Attempting backup strategy of running DMB of running server! Exception: {0}", e.ToString()); // ahh jeez, what do we do here? // this is our fault, so it should never happen but // idk maybe a database error while handling the newest dmb? // either way try to start it using the active server's dmb as a backup try { var dmbBackup = await DmbFactory.FromCompileJob(monitorState.ActiveServer.Dmb.CompileJob, cancellationToken).ConfigureAwait(false); if (dmbBackup == null) // NANI!? { throw new InvalidOperationException("Watchdog double crit-fail!"); // just give up, if THAT compile job is failing then the ActiveServer is gonna crash soon too or already has } monitorState.InactiveServer = await SessionControllerFactory.LaunchNew( dmbBackup, null, ActiveLaunchParameters, false, !monitorState.ActiveServer.IsPrimary, false, cancellationToken) .ConfigureAwait(false); monitorState.InactiveServer.SetHighPriority(); await Chat.SendWatchdogMessage( "Staging newest DMB on inactive server failed: {0} Falling back to previous dmb...", false, cancellationToken).ConfigureAwait(false); } catch (OperationCanceledException) { throw; } catch (Exception e2) { // fuuuuucckkk Logger.LogError("Backup strategy failed! Monitor will restart when active server reboots! Exception: {0}", e2.ToString()); monitorState.InactiveServerCritFail = true; await Chat.SendWatchdogMessage( "Attempted reboot of inactive server failed. Watchdog will reset when active server fails or exits", false, cancellationToken).ConfigureAwait(false); return; } } Logger.LogInformation("Successfully relaunched inactive server!"); monitorState.RebootingInactiveServer = true; }