private static void ReportKillStarted() { AmStoreServiceMonitor.s_killWasTriggered = true; AmSystemEventCode eventCode = AmSystemEventCode.StoreServiceUnexpectedlyStopped; AmConfig config = AmSystemManager.Instance.Config; if (!config.IsUnknown && !config.IsStandalone) { AmServerName currentPAM = config.DagConfig.CurrentPAM; if (config.DagConfig.IsNodePubliclyUp(currentPAM)) { AmTrace.Diagnostic("Reporting to PAM ({0}) that store process is being killed.", new object[] { currentPAM }); AmStoreServiceMonitor.ReportStoreStatus(currentPAM, eventCode, AmServerName.LocalComputerName); } } }
private void CheckIfMoveApplicableForDatabase(AmServerName activeServer, AmServerName moveFromServer, AmDbActionCode actionCode) { if (base.Database.ReplicationType != ReplicationType.Remote) { AmTrace.Debug("Move ignored for database {0} since it is not replicated.", new object[] { base.DatabaseName }); throw new AmDbMoveOperationNotSupportedException(base.DatabaseName); } if (!AmServerName.IsNullOrEmpty(moveFromServer)) { if (!AmServerName.IsEqual(activeServer, moveFromServer)) { AmTrace.Diagnostic("Move ignored for database {0} since the master server is different from the server that originally initiated the failover. (master={1}, movedInitiatedFrom={2})", new object[] { base.DatabaseName, activeServer, moveFromServer }); throw new AmDbMoveOperationNoLongerApplicableException(base.DatabaseName, moveFromServer.NetbiosName, activeServer.NetbiosName); } if (actionCode.Reason == AmDbActionReason.TimeoutFailure) { bool flag = AmStoreHelper.IsMounted(moveFromServer, base.DatabaseGuid); if (flag) { throw new AmDbMoveOperationOnTimeoutFailureCancelled(base.DatabaseName, moveFromServer.NetbiosName); } } } if (base.Config.DagConfig.MemberServers.Length < 2) { AmTrace.Diagnostic("Move ignored for database {0} since according to active manager there is just one server in the DAG)", new object[] { base.DatabaseName }); throw new AmDbMoveOperationNotSupportedException(base.DatabaseName); } }
private void CheckActionResultsAndUpdateAdProperties(AmServerName initialSourceServer, AmServerName targetServer, IBestCopySelector bcs, Exception lastException, int countServersTried, int natSkippedServersCount) { Exception lastException2 = bcs.LastException; if (countServersTried == 0) { lastException = lastException2; } else if (lastException == null && natSkippedServersCount == countServersTried) { AmTrace.Diagnostic("{0} for database {1} was not attempted in any of the servers in the DAG since all the servers in the DAG recently unsuccessful in performing the failover action.", new object[] { base.ActionCode, base.DatabaseName }); lastException = new AmDbOperationAttempedTooSoonException(base.DatabaseName); } else if (bcs.BestCopySelectionType == AmBcsType.BestCopySelection && lastException != null && lastException2 != null) { lastException = lastException2; } this.UpdateAdProperties(lastException == null, initialSourceServer); AmHelper.ThrowDbActionWrapperExceptionIfNecessary(lastException); }
internal static Exception TryCrashingStoreGracefully() { Exception ex = null; Process storeProcess = null; EventWaitHandle crashControlAckEvent = null; try { ReplayCrimsonEvents.InitiatingGracefulStoreCrash.Log(); storeProcess = ServiceOperations.GetServiceProcess("MSExchangeIS", out ex); if (ex == null) { ex = ServiceOperations.RunOperation(delegate(object param0, EventArgs param1) { if (!RegistryParameters.KillStoreInsteadOfWatsonOnTimeout) { crashControlAckEvent = new EventWaitHandle(false, EventResetMode.ManualReset, "Global\\17B584B2-A9E0-45CF-87CB-7774112D6CB9"); ThreadPool.QueueUserWorkItem(delegate(object param0) { Exception ex2 = ServiceOperations.ControlService("MSExchangeIS", 130); if (ex2 != null) { AmTrace.Debug("ControlService() failed with {0}", new object[] { ex2.Message }); } }); } else { AmTrace.Diagnostic("Killing store instead of taking a Watson dump due to registry override.", new object[0]); } if (crashControlAckEvent != null) { if (crashControlAckEvent.WaitOne(RegistryParameters.StoreCrashControlCodeAckTimeoutInMSec)) { AmStoreServiceMonitor.ReportKillStarted(); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); if (!storeProcess.WaitForExit(RegistryParameters.StoreWatsonDumpTimeoutInMSec)) { AmTrace.Diagnostic("Store process did not finish taking dump in {0} msecs", new object[] { RegistryParameters.StoreWatsonDumpTimeoutInMSec }); } else { AmTrace.Diagnostic("Store process finished taking dump in {0} msecs", new object[] { stopwatch.Elapsed.TotalMilliseconds }); } } else { AmTrace.Diagnostic("Store failed to acknowledge that it received the crash control code in {0} msecs.", new object[] { RegistryParameters.StoreCrashControlCodeAckTimeoutInMSec }); AmStoreServiceMonitor.ReportKillStarted(); } } else { AmStoreServiceMonitor.ReportKillStarted(); } if (!storeProcess.HasExited) { if (crashControlAckEvent != null) { AmTrace.Diagnostic("Store process is still running even after the graceful attempt. Force killing it.", new object[0]); } storeProcess.Kill(); TimeSpan timeSpan = TimeSpan.FromMilliseconds((double)RegistryParameters.StoreKillBugcheckTimeoutInMSec); if (!storeProcess.WaitForExit(RegistryParameters.StoreKillBugcheckTimeoutInMSec)) { ExDateTime storeKillBugcheckDisabledTime = RegistryParameters.StoreKillBugcheckDisabledTime; string text = string.Format("Store process is still running {0} secs after attempt to force kill it.", timeSpan.TotalSeconds); if (storeKillBugcheckDisabledTime > ExDateTime.UtcNow) { AmTrace.Debug("Store bugcheck has been disabled by regkey '{0}' until '{1}'.", new object[] { "StoreKillBugcheckDisabledTime", storeKillBugcheckDisabledTime }); ReplayCrimsonEvents.StoreBugCheckDisabledUntilTime.LogPeriodic <string, string, ExDateTime>(Environment.MachineName, DiagCore.DefaultEventSuppressionInterval, text, "StoreKillBugcheckDisabledTime", storeKillBugcheckDisabledTime); return; } AmTrace.Debug("Attempting to bugcheck the system. Reason: {0}", new object[] { text }); BugcheckHelper.TriggerBugcheckIfRequired(DateTime.UtcNow, text); return; } else { AmTrace.Diagnostic("Store process has been forcefully killed.", new object[0]); } } }); } } finally { ReplayCrimsonEvents.FinishedGracefulStoreCrash.Log <string>((ex != null) ? ex.Message : "<none>"); if (crashControlAckEvent != null) { crashControlAckEvent.Close(); } if (storeProcess != null) { storeProcess.Dispose(); } } return(ex); }
protected override void MoveInternal(MountFlags storeMountFlags, UnmountFlags dismountFlags, DatabaseMountDialOverride mountDialoverride, AmServerName fromServer, AmServerName targetServer, bool tryOtherHealthyServers, AmBcsSkipFlags skipValidationChecks, string componentName, ref AmDbOperationDetailedStatus moveStatus) { AmDbNodeAttemptTable dbNodeAttemptTable = AmSystemManager.Instance.DbNodeAttemptTable; IBestCopySelector bestCopySelector = null; Exception ex = null; bool flag = false; int num = 0; int natSkippedServersCount = 0; if (!base.State.IsEntryExist) { base.DbTrace.Error("Database was never mounted. Move is applicable only if it was mounted at least once", new object[0]); throw new AmDatabaseNeverMountedException(); } if (base.State.IsAdminDismounted) { AmTrace.Diagnostic("Moving a dismounted database {0}. The database will be moved, but won't be mounted", new object[] { base.DatabaseName }); } moveStatus.InitialDbState = base.State.Copy(); AmServerName activeServer = base.State.ActiveServer; AmAcllReturnStatus amAcllReturnStatus = null; bool flag2 = true; try { this.CheckIfMoveApplicableForDatabase(base.State.ActiveServer, fromServer, base.ActionCode); bestCopySelector = this.ConstructBestCopySelector(tryOtherHealthyServers, skipValidationChecks, null, activeServer, targetServer, mountDialoverride, componentName); if (base.ActionCode.IsAutomaticShutdownSwitchover) { base.AttemptDismount(base.State.ActiveServer, dismountFlags, true, out ex); flag = true; } AmServerName amServerName = bestCopySelector.FindNextBestCopy(); while (amServerName != null) { num++; if (!flag) { base.AttemptDismount(activeServer, dismountFlags, true, out ex); flag = true; } AmMountFlags amMountFlags = AmMountFlags.None; if (BitMasker.IsOn((int)skipValidationChecks, 4)) { amMountFlags = AmMountFlags.MoveWithSkipHealth; } this.AttemptMountOnServer(amServerName, base.State.ActiveServer, storeMountFlags, amMountFlags, dismountFlags, mountDialoverride, skipValidationChecks, tryOtherHealthyServers, ref natSkippedServersCount, ref amAcllReturnStatus, out ex); base.DbTrace.Debug("AttemptMountOnServer returned AcllStatus: {0}", new object[] { amAcllReturnStatus }); moveStatus.AddSubstatus(new AmDbOperationSubStatus(amServerName, amAcllReturnStatus, ex)); if (ex == null) { flag2 = false; break; } bestCopySelector.ErrorLogger.ReportServerFailure(amServerName, "CopyHasBeenTriedCheck", ex.Message); if (ex is AmRoleChangedWhileOperationIsInProgressException) { flag2 = false; break; } if (ex is AmMountTimeoutException) { flag2 = false; break; } amServerName = bestCopySelector.FindNextBestCopy(); base.CurrentAttemptNumber++; } } finally { moveStatus.FinalDbState = base.State.Copy(); } if (flag2) { MountStatus storeDatabaseMountStatus = AmStoreHelper.GetStoreDatabaseMountStatus(base.State.ActiveServer, base.Database.Guid); if (storeDatabaseMountStatus != base.State.MountStatus) { ReplayCrimsonEvents.MismatchErrorAfterMove.Log <string, Guid, AmServerName, MountStatus, MountStatus>(base.Database.Name, base.Database.Guid, base.State.ActiveServer, base.State.MountStatus, storeDatabaseMountStatus); if (storeDatabaseMountStatus == MountStatus.Dismounted) { base.State.MountStatus = MountStatus.Dismounted; base.WriteState(); } } } this.CheckActionResultsAndUpdateAdProperties(activeServer, targetServer, bestCopySelector, ex, num, natSkippedServersCount); }