internal static bool IsClusterUp(DatabaseAvailabilityGroup dag, HaTaskOutputHelper output) { try { using (AmCluster amCluster = AmCluster.OpenDagClus(dag)) { using (IAmClusterGroup amClusterGroup = amCluster.FindCoreClusterGroup()) { IEnumerable <AmServerName> source = amCluster.EnumerateNodeNames(); output.AppendLogMessage("Cluster is up, there are {0} nodes in the cluster, group owner:{1}", new object[] { source.Count <AmServerName>(), amClusterGroup.OwnerNode.NetbiosName }); } } } catch (AmCoreGroupRegNotFound amCoreGroupRegNotFound) { output.AppendLogMessage("Cluster is down, got AmCoreGroupRegNotFound:{0} when trying to open the cluster", new object[] { amCoreGroupRegNotFound.ToString() }); return(false); } return(true); }
internal static void TryStartClussvcOnNode(AmServerName serverName, HaTaskOutputHelper output) { using (ServiceController serviceController = new ServiceController("clussvc", serverName.Fqdn)) { try { if (DatabaseAvailabilityGroupAction.GetServiceControllerStatus(serviceController, serverName.NetbiosName, output) == ServiceControllerStatus.StopPending) { output.AppendLogMessage("Service is in stop pending, wait for it to be stopped", new object[0]); serviceController.WaitForStatus(ServiceControllerStatus.Stopped, DatabaseAvailabilityGroupAction.ServiceTimeout); } if (DatabaseAvailabilityGroupAction.GetServiceControllerStatus(serviceController, serverName.NetbiosName, output) == ServiceControllerStatus.Stopped) { serviceController.Start(); output.WriteProgressSimple(Strings.WaitingForClusterServiceToStart(serverName.NetbiosName)); serviceController.WaitForStatus(ServiceControllerStatus.Running, DatabaseAvailabilityGroupAction.ServiceTimeout); } } catch (System.ServiceProcess.TimeoutException) { output.WriteErrorSimple(new FailedToStartClusSvcException(serverName.NetbiosName, DatabaseAvailabilityGroupAction.GetServiceControllerStatus(serviceController, serverName.NetbiosName, output).ToString())); } } }
internal static ServiceControllerStatus GetServiceControllerStatus(ServiceController sc, string serverName, HaTaskOutputHelper output) { try { return(sc.Status); } catch (Win32Exception ex) { output.AppendLogMessage("Failed to get status for server: {0} error {1}", new object[] { serverName, ex.Message }); output.WriteErrorSimple(new FailedToGetServiceStatusForNodeException(serverName, ex.Message)); } catch (InvalidOperationException ex2) { output.AppendLogMessage("Failed to get status for server: {0} error {1}", new object[] { serverName, ex2.Message }); output.WriteErrorSimple(new FailedToGetServiceStatusForNodeException(serverName, ex2.Message)); } return(ServiceControllerStatus.StopPending); }
internal static void JoinForceCleanupNode(AmServerName pamServer, AmServerName serverName, HaTaskOutputHelper output) { string verboseLog = null; output.AppendLogMessage("{0} is probably cleaned up, try to evict it and join it back", new object[] { serverName.NetbiosName }); try { ReplayRpcClientWrapper.RunEvictNodeFromCluster(pamServer, serverName, out verboseLog); DagTaskHelper.LogRemoteVerboseLog(output, pamServer.Fqdn, verboseLog); output.AppendLogMessage("Sleep one minute before we issue add", new object[0]); Thread.Sleep(DatabaseAvailabilityGroupAction.WaitBetweenOps); output.AppendLogMessage("joining {0}", new object[] { serverName.NetbiosName }); ReplayRpcClientWrapper.RunAddNodeToCluster(pamServer, serverName, out verboseLog); } finally { DagTaskHelper.LogRemoteVerboseLog(output, pamServer.Fqdn, verboseLog); } }
internal static void JoinOneNode(AmServerName pamServer, AmServerName serverName, HaTaskOutputHelper output) { string verboseLog = null; try { output.AppendLogMessage("joining {0}", new object[] { serverName.NetbiosName }); ReplayRpcClientWrapper.RunAddNodeToCluster(pamServer, serverName, out verboseLog); } catch (DagTaskServerException ex) { Exception ex2; if (!ex.TryGetInnerExceptionOfType(out ex2) && !ex.TryGetInnerExceptionOfType(out ex2)) { throw; } DagTaskHelper.LogRemoteVerboseLog(output, pamServer.Fqdn, verboseLog); output.AppendLogMessage("{0} is probably just starting up, retry add after 1 minute", new object[] { serverName.NetbiosName }); Thread.Sleep(DatabaseAvailabilityGroupAction.WaitBetweenOps); ReplayRpcClientWrapper.RunAddNodeToCluster(pamServer, serverName, out verboseLog); } finally { DagTaskHelper.LogRemoteVerboseLog(output, pamServer.Fqdn, verboseLog); } }
internal static bool ForceCleanupOneNodeLocally(string dagName, Server nodeToForceCleanup, TimeSpan maxTimeToWait, HaTaskOutputHelper output) { if (dagName == null) { throw new ArgumentNullException("dagName"); } if (nodeToForceCleanup == null) { throw new ArgumentNullException("nodeToForceCleanup"); } if (output == null) { throw new ArgumentNullException("output"); } bool result = false; string verboseLog = string.Empty; AmServerName serverName = new AmServerName(nodeToForceCleanup); output.AppendLogMessage("Attempting to run cluster node <LocalNodeName> /forcecleanup on {0}...", new object[] { serverName.NetbiosName }); string error = null; System.Threading.Tasks.Task task = System.Threading.Tasks.Task.Factory.StartNew(delegate() { try { ReplayRpcClientWrapper.RunForceCleanupNode(serverName, out verboseLog); } catch (LocalizedException ex) { error = ex.Message; } }); if (!task.Wait(maxTimeToWait)) { error = string.Format("The operation didn't complete in {0} seconds", maxTimeToWait.TotalSeconds); output.WriteWarning(Strings.FailedToForceCleanupNode(nodeToForceCleanup.Name, dagName, error)); } else { DagTaskHelper.LogRemoteVerboseLog(output, serverName.Fqdn, verboseLog); if (!string.IsNullOrEmpty(error)) { output.WriteWarning(Strings.FailedToForceCleanupNode(nodeToForceCleanup.Name, dagName, error)); } else { result = true; } } return(result); }
internal static void ForceCleanupStoppedNodes(DatabaseAvailabilityGroup dag, IEnumerable <Server> shouldStopServers, TimeSpan maxTimeToWaitForOneNode, HaTaskOutputHelper output) { if (dag == null) { throw new ArgumentNullException("dag"); } if (shouldStopServers == null) { throw new ArgumentNullException("shouldStopServers"); } if (output == null) { throw new ArgumentNullException("output"); } string empty = string.Empty; List <string> list = new List <string>(1); int num = shouldStopServers.Count <Server>(); foreach (Server server in shouldStopServers) { new AmServerName(server); output.WriteProgressIncrementalSimple(Strings.ProgressEvictNode(server.Name), 20 / num); if (!DatabaseAvailabilityGroupAction.ForceCleanupOneNodeLocally(dag.Name, server, maxTimeToWaitForOneNode, output)) { list.Add(server.Name); } } if (list.Count != 0) { output.WriteErrorSimple(new StopDagFailedException(string.Join(",", list.ToArray <string>()), dag.Name)); } }
internal static void EvictStoppedNodes(DatabaseAvailabilityGroup dag, IEnumerable <Server> stoppedServers, HaTaskOutputHelper output) { if (dag == null) { throw new ArgumentNullException("dag"); } if (stoppedServers == null) { throw new ArgumentNullException("stoppedServers"); } if (output == null) { throw new ArgumentNullException("output"); } string error = null; List <string> list = new List <string>(1); string verboseLog = null; using (AmCluster amCluster = AmCluster.OpenDagClus(dag)) { using (IAmClusterGroup amClusterGroup = amCluster.FindCoreClusterGroup()) { output.AppendLogMessage("EvictStoppedNodes has been called. Dumping current cluster state.", new object[0]); try { using (DumpClusterTopology dumpClusterTopology = new DumpClusterTopology(amCluster, output)) { dumpClusterTopology.Dump(); } } catch (ClusterException ex) { output.AppendLogMessage("DumpClusterTopology( {0} ) failed with exception = {1}. This is OK.", new object[] { dag.Name, ex.Message }); output.AppendLogMessage("Ignoring previous error, as it is acceptable if the cluster does not exist yet.", new object[0]); } IEnumerable <AmServerName> source = amCluster.EnumerateNodeNames(); AmServerName ownerNode = amClusterGroup.OwnerNode; int num = stoppedServers.Count <Server>(); foreach (Server server in stoppedServers) { AmServerName amServerName = new AmServerName(server); if (source.Contains(amServerName)) { output.AppendLogMessage("Server '{0}' is still a node in the cluster, and will have to be evicted.", new object[] { amServerName.NetbiosName }); try { try { output.WriteProgressIncrementalSimple(Strings.ProgressForceCleanupNode(server.Name), 20 / num); output.AppendLogMessage("Running the eviction operation by issuing an RPC to the replay service on '{0}'...", new object[] { ownerNode.Fqdn }); ReplayRpcClientWrapper.RunEvictNodeFromCluster(ownerNode, amServerName, out verboseLog); } finally { DagTaskHelper.LogRemoteVerboseLog(output, ownerNode.Fqdn, verboseLog); } } catch (DagTaskOperationFailedException ex2) { output.AppendLogMessage("An exception was thrown! ex={0}", new object[] { ex2.Message }); Exception ex3; if (ex2.TryGetInnerExceptionOfType(out ex3)) { output.AppendLogMessage("Ignore it. It was AmClusterEvictWithoutCleanupException, which is acceptable. It could be completed with cluster node /forcecleanp, but that isn't necessary.", new object[0]); } else if (ex2.TryGetInnerExceptionOfType(out ex3)) { output.AppendLogMessage("That exception is fine. It means that the server has already been evicted from the cluster.", new object[0]); } else { error = ex2.Message; output.WriteWarning(Strings.FailedToEvictNode(server.Name, dag.Name, error)); list.Add(server.Name); } } catch (LocalizedException ex4) { error = ex4.Message; output.WriteWarning(Strings.FailedToEvictNode(server.Name, dag.Name, error)); list.Add(server.Name); } } else { output.AppendLogMessage("Server '{0}' is not in the cluster anymore. It must have already been evicted.", new object[] { amServerName.NetbiosName }); } } } } if (list.Count != 0) { output.WriteErrorSimple(new FailedToEvictNodeException(string.Join(",", list.ToArray <string>()), dag.Name, error)); } }
private void DeleteLastLogGenerationTimeStamps(AmCluster cluster, IEnumerable <Server> stoppedServers, HaTaskOutputHelper output) { output.AppendLogMessage("Deleting last log generation time stamps of all databases on stopped servers...", new object[0]); List <Database> list = new List <Database>(100); foreach (Server server in stoppedServers) { output.AppendLogMessage("Finding all databases on stopped server '{0}'...", new object[] { server.Fqdn }); new AmServerName(server); Database[] databases = server.GetDatabases(); list.AddRange(databases); } if (list.Count == 0) { output.AppendLogMessage("No databases were found on the stopped servers. Skipping time stamp deletion.", new object[0]); return; } output.AppendLogMessage("Found {0} databases on the stopped servers.", new object[] { list.Count }); ClusterBatchLastLogGenDeleter clusterBatchLastLogGenDeleter = new ClusterBatchLastLogGenDeleter(cluster, list, output); clusterBatchLastLogGenDeleter.DeleteTimeStamps(); output.AppendLogMessage("Finished deleting last log generation time stamps on stopped servers.", new object[0]); }