public static async Task <Tuple <SelectedReplica, Replica> > GetSelectedReplicaAsync( FabricClient fabricClient, ReplicaSelector replicaSelector, TimeSpan requestTimeout, TimeSpan operationTimeout, CancellationToken cancellationToken) { ThrowIf.Null(replicaSelector, "ReplicaSelector"); SelectedPartition selectedPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync( fabricClient, replicaSelector.PartitionSelector, requestTimeout, operationTimeout, cancellationToken).ConfigureAwait(false); Guid partitionId = selectedPartition.PartitionId; ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => fabricClient.QueryManager.GetReplicaListAsync( partitionId, 0, requestTimeout, cancellationToken), operationTimeout, cancellationToken).ConfigureAwait(false); Replica replicaResult = replicaSelector.GetSelectedReplica(replicasResult.ToArray(), new Random(), true /*skip invalid replicas*/); var replicaSelectorResult = new SelectedReplica(replicaResult.Id, selectedPartition); return(new Tuple <SelectedReplica, Replica>(replicaSelectorResult, replicaResult)); }
/// <summary> /// Restarts a random node. /// </summary> /// <param name="serviceName">Uri of the service in the format fabric:/[application]/[service name]</param> /// <returns>Task instance.</returns> public async Task RestartNodeAsync(Uri serviceName) { PartitionSelector randomPartitionSelector = PartitionSelector.RandomOf(serviceName); ReplicaSelector primaryofReplicaSelector = ReplicaSelector.PrimaryOf(randomPartitionSelector); // Create FabricClient with connection and security information here await _client.FaultManager.RestartNodeAsync(primaryofReplicaSelector, CompletionMode.Verify).ConfigureAwait(false); }
public RestartDeployedCodePackageAction( Uri applicationName, ReplicaSelector replicaSelector) { this.ApplicationName = applicationName; this.ReplicaSelector = replicaSelector; this.CompletionMode = CompletionMode.Verify; }
public static async Task <RestartReplicaResult> RestartReplicaAsync( FabricClient fabricClient, ReplicaSelector replicaSelector, CompletionMode completionMode, TimeSpan requestTimeout, TimeSpan operationTimeout, CancellationToken cancellationToken) { System.Fabric.Common.TimeoutHelper helper = new System.Fabric.Common.TimeoutHelper(operationTimeout); string nodeName = null; Guid partitionId = Guid.Empty; long replicaId = 0; SelectedReplica replicaSelectorResult = SelectedReplica.None; System.Fabric.Common.ThrowIf.Null(replicaSelector, "ReplicaSelector"); Tuple <SelectedReplica, Replica> replicaStateActionResult = await FaultAnalysisServiceUtility.GetSelectedReplicaAsync( fabricClient, replicaSelector, requestTimeout, operationTimeout, cancellationToken).ConfigureAwait(false); replicaSelectorResult = replicaStateActionResult.Item1; if (replicaSelectorResult == null) { throw new InvalidOperationException("replicaStateActionResult cannot be null"); } partitionId = replicaStateActionResult.Item1.SelectedPartition.PartitionId; Replica replicaStateResult = replicaStateActionResult.Item2; if (replicaStateResult == null) { throw new InvalidOperationException("replicaStateResult cannot be null"); } nodeName = replicaStateResult.NodeName; replicaId = replicaStateResult.Id; ThrowIf.IsTrue(partitionId == Guid.Empty, "PartitionID"); ThrowIf.IsTrue(replicaId == 0, "ReplicaID"); await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => fabricClient.ServiceManager.RestartReplicaAsync( nodeName, partitionId, replicaId, requestTimeout, cancellationToken), FabricClientRetryErrors.RestartReplicaErrors.Value, operationTimeout, cancellationToken).ConfigureAwait(false); return(new RestartReplicaResult(replicaSelectorResult)); }
public async Task <bool> RestartGetSnowConditionsService() { var replicaSelector = ReplicaSelector.PrimaryOf(PartitionSelector.RandomOf(GlobalContext.ServiceName)); //PartitionSelector namedPartitionSelector = PartitionSelector.PartitionKeyOf(new Uri(GlobalContext.appName), "Partition1"); //await fabricClient.ClusterManager.(new ServicePar); ; //statefull return(true); }
internal override Task <RestartDeployedCodePackageResult> InvokeCommandAsync( IClusterConnection clusterConnection, Uri applicationName, ReplicaSelector replicaSelector) { return(clusterConnection.RestartDeployedCodePackageAsync( replicaSelector, applicationName, this.TimeoutSec, this.CommandCompletionMode ?? CompletionMode.Verify, this.GetCancellationToken())); }
private FabricTestAction GetRemoveReplicaAction(ReplicaStateTransitionAction ragAction) { Uri serviceUri = ragAction.ServiceUri; Guid guid = ragAction.PartitionId; long replicaId = ragAction.ReplicaId; string report = StringHelper.Format("Generating Action: {0}\n\t\tService: {1}\n\t\tPartition: {2}\n\t\tReplicaId: {3}", ragAction.ActionType, ragAction.ServiceUri, ragAction.PartitionId, ragAction.ReplicaId); // Select specific replica using ReplicaSelector. ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(serviceUri, guid), replicaId); RemoveReplicaAction restartReplicaAction = new RemoveReplicaAction(replicaSelector); return(restartReplicaAction); }
/// <summary> /// The invoke command async. /// </summary> /// <param name="clusterConnection"> /// The cluster connection. /// </param> /// <param name="replicaSelector"> /// The replica selector. /// </param> /// <param name="completionMode"> /// The completion mode. /// </param> /// <param name="cancellationToken"> /// The cancellation token. /// </param> /// <returns> /// The <see cref="Task"/>. /// </returns> internal override async Task <ReplicaResult> InvokeCommandAsync( IClusterConnection clusterConnection, ReplicaSelector replicaSelector, CompletionMode completionMode, CancellationToken cancellationToken) { return (await Task.FromResult( clusterConnection.RestartReplicaAsync( replicaSelector, this.TimeoutSec, completionMode, cancellationToken).Result)); }
private async Task TestReplicaFaultsAsync( ReplicaSelector replicaSelector, string replicaRole, bool hasPersistedState, CancellationToken token) { await this.InvokeAndValidateFaultAsync( StringHelper.Format("Removing replica state for {0}", replicaRole), () => { return(this.FabricClient.FaultManager.RemoveReplicaAsync( replicaSelector, CompletionMode.Verify, false, this.failoverTestScenarioParameters.OperationTimeout, token)); }, token); if (hasPersistedState) { await this.InvokeAndValidateFaultAsync( StringHelper.Format("Restarting replica state for {0}", replicaRole), () => { return(this.FabricClient.FaultManager.RestartReplicaAsync( replicaSelector, CompletionMode.Verify, this.failoverTestScenarioParameters.OperationTimeout, token)); }, token); } await this.InvokeAndValidateFaultAsync( StringHelper.Format("Restarting code package for {0} replica", replicaRole), () => { return(this.FabricClient.FaultManager.RestartDeployedCodePackageAsync( this.serviceDescription.ApplicationName, replicaSelector, CompletionMode.Verify, this.failoverTestScenarioParameters.OperationTimeout, token)); }, token); }
private void KillPrimaryReplica() { // Kill the primary Application application = _fabricClient.QueryManager.GetApplicationListAsync() .Result.Single(a => a.ApplicationTypeName == DefaultApplicationTypeName); Service service = _fabricClient.QueryManager.GetServiceListAsync(application.ApplicationName).Result.Single(); Partition partition = _fabricClient.QueryManager.GetPartitionListAsync(service.ServiceName).Result.Single(); StatefulServiceReplica primaryReplica = _fabricClient.QueryManager.GetReplicaListAsync(partition.PartitionInformation.Id) .Result.Select(replica => replica as StatefulServiceReplica) .Single(statefulServiceReplica => statefulServiceReplica.ReplicaRole == ReplicaRole.Primary); LogHelper.Log("Killing the primary replica at node {0}", primaryReplica.NodeName); ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(service.ServiceName, partition.PartitionInformation.Id), primaryReplica.Id); _fabricClient.FaultManager.RemoveReplicaAsync(replicaSelector, CompletionMode.DoNotVerify, false); }
public static ReplicaSelector GetExpectedReplicaSelector(ParitionSelectorTestHelper.PartitionCase partitionCase, ReplicaCase replicaCase) { ReplicaSelector result = null; PartitionSelector partitionSelector = ParitionSelectorTestHelper.GetExpectedPartitionSelector(partitionCase); switch (replicaCase) { case ReplicaCase.ReplicaPrimary: { result = ReplicaSelector.PrimaryOf(partitionSelector); break; } case ReplicaCase.ReplicaRandomSecondary: { result = ReplicaSelector.RandomSecondaryOf(partitionSelector); break; } case ReplicaCase.ReplicaId: { result = ReplicaSelector.ReplicaIdOf(partitionSelector, replicaInstance.Value); break; } case ReplicaCase.ReplicaId_NoValue: { result = ReplicaSelector.ReplicaIdOf(partitionSelector, 0); break; } case ReplicaCase.ReplicaRandom: { result = ReplicaSelector.RandomOf(partitionSelector); break; } } return(result); }
/// <summary> /// This API supports the Service Fabric platform and is not meant to be called from your code /// </summary> /// <param name="token">This API supports the Service Fabric platform and is not meant to be called from your code</param> /// <returns></returns> protected override async Task OnExecuteAsync(CancellationToken token) { this.serviceDescription = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync( this.failoverTestScenarioParameters.PartitionSelector.ServiceName, this.failoverTestScenarioParameters.RequestTimeout, token), this.failoverTestScenarioParameters.OperationTimeout, token).ConfigureAwait(false); bool hasPersistedState = false; if (this.serviceDescription.IsStateful()) { StatefulServiceDescription statefulDescription = this.serviceDescription as StatefulServiceDescription; ReleaseAssert.AssertIf(statefulDescription == null, "Stateful service description is not WinFabricStatefulServiceDescription"); hasPersistedState = statefulDescription.HasPersistedState; } Log.WriteInfo(TraceType, "Validating Service health and availability"); await this.FabricClient.TestManager.ValidateServiceAsync( this.failoverTestScenarioParameters.PartitionSelector.ServiceName, this.failoverTestScenarioParameters.MaxServiceStabilizationTimeout, token); Log.WriteInfo(TraceType, "Getting Selected Partition"); var getPartitionStateAction = new GetSelectedPartitionStateAction(this.failoverTestScenarioParameters.PartitionSelector) { RequestTimeout = this.failoverTestScenarioParameters.RequestTimeout, ActionTimeout = this.failoverTestScenarioParameters.OperationTimeout }; await this.TestContext.ActionExecutor.RunAsync(getPartitionStateAction, token); Guid selectedPartitionId = getPartitionStateAction.Result.PartitionId; Log.WriteInfo(TraceType, "Running test for partition {0}", selectedPartitionId); this.ReportProgress("Selected partition {0} for testing failover", selectedPartitionId); PartitionSelector selectedPartition = PartitionSelector.PartitionIdOf(this.failoverTestScenarioParameters.PartitionSelector.ServiceName, selectedPartitionId); while (this.failoverTestScenarioParameters.TimeToRun - this.GetElapsedTime() > TimeSpan.Zero && !token.IsCancellationRequested) { if (this.serviceDescription.IsStateful()) { ReplicaSelector primaryReplicaSelector = ReplicaSelector.PrimaryOf(selectedPartition); ReplicaSelector secondaryReplicaSelector = ReplicaSelector.RandomSecondaryOf(selectedPartition); // Make Primary go through RemoveReplica, RestartReplica and RestartCodePackage await this.TestReplicaFaultsAsync(primaryReplicaSelector, "Primary", hasPersistedState, token); // Make Secondary go through RemoveReplica, RestartReplica and RestartCodePackage await this.TestReplicaFaultsAsync(secondaryReplicaSelector, "Secondary", hasPersistedState, token); } else { ReplicaSelector randomInstanceSelector = ReplicaSelector.RandomOf(selectedPartition); // Make Stateless Instance go through RemoveReplica, RestartReplica and RestartCodePackage await this.TestReplicaFaultsAsync(randomInstanceSelector, "Stateless Instance", hasPersistedState, token); } if (this.serviceDescription.IsStateful()) { // Restart all secondary replicas and make sure the replica set recovers await this.InvokeAndValidateFaultAsync( "Restarting all the secondary replicas", () => { #pragma warning disable 618 return(this.FabricClient.TestManager.RestartPartitionAsync( selectedPartition, RestartPartitionMode.OnlyActiveSecondaries, this.failoverTestScenarioParameters.OperationTimeout, token)); #pragma warning restore 618 }, token); // Restart all replicas if service is persisted if (hasPersistedState) { await this.InvokeAndValidateFaultAsync( "Restarting all replicas including Primary", () => { #pragma warning disable 618 return(this.FabricClient.TestManager.RestartPartitionAsync( selectedPartition, RestartPartitionMode.AllReplicasOrInstances, this.failoverTestScenarioParameters.OperationTimeout, token)); #pragma warning restore 618 }, token); } // Induce move and swap primary a few times await this.InvokeAndValidateFaultAsync( "Move Primary to a different node", () => { return(this.FabricClient.FaultManager.MovePrimaryAsync( string.Empty, selectedPartition, true, this.failoverTestScenarioParameters.OperationTimeout, token)); }, token); // Induce move secondary a few times await this.InvokeAndValidateFaultAsync( "Move Secondary to a different node", () => { return(this.FabricClient.FaultManager.MoveSecondaryAsync( string.Empty, string.Empty, selectedPartition, true, this.failoverTestScenarioParameters.OperationTimeout, token)); }, token); } else { // Restart all stateless instances await this.InvokeAndValidateFaultAsync( "Restarting all stateless instances for partition", () => { #pragma warning disable 618 return(this.FabricClient.TestManager.RestartPartitionAsync( selectedPartition, RestartPartitionMode.AllReplicasOrInstances, this.failoverTestScenarioParameters.OperationTimeout, token)); #pragma warning restore 618 }, token); } } }
internal static ReplicaSelector GetReplicaSelector(string partitionSetName, Guid partitionId, Uri serviceName, string partitionKey, long?replicaOrInstanceId) { ReplicaSelector replicaSelector = null; PartitionSelector partitionSelector = null; if (partitionSetName.Contains("PartitionId")) { partitionSelector = PartitionSelector.PartitionIdOf(serviceName, partitionId); } else { if (partitionSetName.Contains("PartitionSingleton")) { partitionSelector = PartitionSelector.SingletonOf(serviceName); } else if (partitionSetName.Contains("PartitionNamed")) { partitionSelector = PartitionSelector.PartitionKeyOf(serviceName, partitionKey); } else if (partitionSetName.Contains("PartitionUniformedInt")) { long partitionKeyLong; if (!long.TryParse(partitionKey, out partitionKeyLong)) { throw new ArgumentException(StringResources.Error_InvalidPartitionKey); } partitionSelector = PartitionSelector.PartitionKeyOf(serviceName, partitionKeyLong); } else if (!partitionSetName.Contains("Partition")) { partitionSelector = PartitionSelector.RandomOf(serviceName); } } if (partitionSelector == null) { throw new ArgumentException(StringResources.Error_CouldNotParsePartitionSelector); } if (partitionSetName.Contains("ReplicaPrimary")) { replicaSelector = ReplicaSelector.PrimaryOf(partitionSelector); } else if (partitionSetName.Contains("ReplicaRandomSecondary")) { replicaSelector = ReplicaSelector.RandomSecondaryOf(partitionSelector); } else if (partitionSetName.Contains("ReplicaId")) { replicaSelector = ReplicaSelector.ReplicaIdOf(partitionSelector, replicaOrInstanceId ?? 0); } else if (!partitionSetName.Contains("Replica")) { replicaSelector = ReplicaSelector.RandomOf(partitionSelector); } if (replicaSelector == null) { throw new ArgumentException(StringResources.Error_CouldNotParseReplicaSelector); } return(replicaSelector); }
internal abstract Task <RestartDeployedCodePackageResult> InvokeCommandAsync( IClusterConnection clusterConnection, Uri uri, ReplicaSelector replicaSelector);
protected NodeControlRequest(IFabricClient fabricClient, ReplicaSelector replicaSelector, CompletionMode completionMode, TimeSpan timeout) : base(fabricClient, timeout) { this.ReplicaSelector = replicaSelector; this.CompletionMode = completionMode; }
protected NodeControlRequest(IFabricClient fabricClient, ReplicaSelector replicaSelector, TimeSpan timeout) : this(fabricClient, replicaSelector, CompletionMode.Invalid, timeout) { this.ReplicaSelector = replicaSelector; }
public static string FormatOutput(string output, ReplicaSelector replicaSelector) { return(output.Replace("###ReplicaID###", replicaInstance.ToString()).Replace("###ReplicaSelector###", replicaSelector.ToString())); }
public RestartNodeAction(ReplicaSelector replicaSelector, bool createFabricDump) { this.ReplicaSelector = replicaSelector; this.CreateFabricDump = createFabricDump; this.CompletionMode = CompletionMode.Verify; }
public GetSelectedReplicaStateAction(ReplicaSelector replicaSelector) { this.ReplicaSelector = replicaSelector; }
public RestartNodeRequest(IFabricClient fabricClient, ReplicaSelector replicaSelector, bool createFabricDump, CompletionMode completionMode, TimeSpan timeout) : base(fabricClient, replicaSelector, completionMode, timeout) { this.createFabricDump = createFabricDump; }
public RemoveReplicaAction(ReplicaSelector replicaSelector) { this.ReplicaSelector = replicaSelector; this.CompletionMode = CompletionMode.Verify; this.ForceRemove = false; }
/// <summary> /// The invoke command async. /// </summary> /// <param name="clusterConnection"> /// The cluster connection. /// </param> /// <param name="replicaSelector"> /// The replica selector. /// </param> /// <param name="completionMode"> /// The completion mode. /// </param> /// <param name="cancellationToken"> /// The cancellation token. /// </param> /// <returns> /// The <see cref="Task"/>. /// </returns> internal abstract Task <ReplicaResult> InvokeCommandAsync( IClusterConnection clusterConnection, ReplicaSelector replicaSelector, CompletionMode completionMode, CancellationToken cancellationToken);
protected override async Task ExecuteActionAsync(FabricTestContext testContext, InvokeQuorumLossAction action, CancellationToken cancellationToken) { ThrowIf.Null(action.PartitionSelector, "PartitionSelector"); var helper = new TimeoutHelper(action.ActionTimeout); // get info about the service so we can check type and trss ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => testContext.FabricClient.ServiceManager.GetServiceDescriptionAsync( action.PartitionSelector.ServiceName, action.RequestTimeout, cancellationToken), helper.GetRemainingTime(), cancellationToken).ConfigureAwait(false); if (result.Kind != ServiceDescriptionKind.Stateful) { throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "QuorumLoss", "Stateful", action.PartitionSelector.ServiceName, "Stateless")); } StatefulServiceDescription statefulServiceDescription = result as StatefulServiceDescription; ReleaseAssert.AssertIf(statefulServiceDescription == null, "Service is not a stateful service"); if (!statefulServiceDescription.HasPersistedState) { throw new InvalidOperationException(StringHelper.Format(StringResources.Error_InvalidServiceTypeTestability, "QuorumLoss", "Stateful Persistent", action.PartitionSelector.ServiceName, "Stateful In-Memory Only")); } // figure out /which/ partition to select var getPartitionStateAction = new GetSelectedPartitionStateAction(action.PartitionSelector) { RequestTimeout = action.RequestTimeout, ActionTimeout = helper.GetRemainingTime() }; await testContext.ActionExecutor.RunAsync(getPartitionStateAction, cancellationToken); Guid partitionId = getPartitionStateAction.Result.PartitionId; // get data about replicas in that partition ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => testContext.FabricClient.QueryManager.GetReplicaListAsync( partitionId, 0, action.RequestTimeout, cancellationToken), helper.GetRemainingTime(), cancellationToken).ConfigureAwait(false); var removeUTRequestList = new List <Tuple <string, string> >(); Dictionary <Tuple <string, string>, Task> removeUTTaskDictionary = new Dictionary <Tuple <string, string>, Task>(); try { var stableReplicas = replicasResult.Where(r => r.ReplicaStatus == ServiceReplicaStatus.Ready).ToArray(); var stableReplicasToRemove = new List <StatefulServiceReplica>(); long replicasToRestartWithoutPrimary = action.QuorumLossMode == QuorumLossMode.AllReplicas ? stableReplicas.Length - 1 : FabricCluster.GetWriteQuorumSize(replicasResult.Count); foreach (var replica in stableReplicas) { StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica; ReleaseAssert.AssertIf(statefulReplica == null, "Service Replica is not of stateful type even though service is stateful"); if (statefulReplica.ReplicaRole != ReplicaRole.Primary) { replicasToRestartWithoutPrimary--; } if (replicasToRestartWithoutPrimary >= 0 || statefulReplica.ReplicaRole == ReplicaRole.Primary) { stableReplicasToRemove.Add(statefulReplica); } } // for selected replicas, block reopen so that when we restart the replica (NOT remove the replica) it doesn't come up var utTaskList = new List <Task>(); foreach (var statefulReplica in stableReplicasToRemove) { string nodeName = statefulReplica.NodeName; UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "StatefulServiceReopen"); behavior.AddFilterForPartitionId(partitionId); string behaviorName = "BlockStatefulServiceReopen_" + nodeName; removeUTRequestList.Add(new Tuple <string, string>(nodeName, behaviorName)); utTaskList.Add( FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => testContext.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync( nodeName, behaviorName, behavior, action.RequestTimeout, cancellationToken), helper.GetRemainingTime(), cancellationToken)); } await Task.WhenAll(utTaskList).ConfigureAwait(false); // TODO: Wait for some time so that the unreliable transport behavior can be read from the files. // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken); var restartReplicaTaskList = new List <Task>(); foreach (var statefulReplica in stableReplicasToRemove) { ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(action.PartitionSelector.ServiceName, partitionId), statefulReplica.Id); var restartReplicaAction = new RestartReplicaAction(replicaSelector) { CompletionMode = CompletionMode.DoNotVerify, RequestTimeout = action.RequestTimeout, ActionTimeout = helper.GetRemainingTime() }; restartReplicaTaskList.Add(testContext.ActionExecutor.RunAsync(restartReplicaAction, cancellationToken)); } await Task.WhenAll(restartReplicaTaskList).ConfigureAwait(false); await AsyncWaiter.WaitAsync(action.QuorumLossDuration, cancellationToken).ConfigureAwait(false); // validate ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => testContext.FabricClient.QueryManager.GetPartitionListAsync( action.PartitionSelector.ServiceName, null, action.RequestTimeout, cancellationToken), FabricClientRetryErrors.GetPartitionListFabricErrors.Value, helper.GetRemainingTime(), cancellationToken).ConfigureAwait(false); foreach (StatefulServicePartition partition in partitionsResult) { if (partition.PartitionInformation.Id == partitionId) { ReleaseAssert.AssertIf(partition.PartitionStatus != ServicePartitionStatus.InQuorumLoss, "Partition failed to be in Quorum Loss."); break; } } foreach (var removeUTParams in removeUTRequestList) { var currentParams = removeUTParams; Task task = FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync( currentParams.Item1, /*nodeName*/ currentParams.Item2, /*behaviorName*/ action.RequestTimeout, cancellationToken), FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value, helper.GetRemainingTime(), cancellationToken); removeUTTaskDictionary[currentParams] = task; } await Task.WhenAll(removeUTTaskDictionary.Values).ConfigureAwait(false); // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files. // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken); } finally { var removeUTTaskList = new List <Task>(); foreach (var removeUTRequest in removeUTTaskDictionary) { var currentRemoveUTRequest = removeUTRequest; if (currentRemoveUTRequest.Value == null || currentRemoveUTRequest.Value.IsFaulted) { removeUTTaskList.Add( FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => testContext.FabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync( currentRemoveUTRequest.Key.Item1, /*nodeName*/ currentRemoveUTRequest.Key.Item2, /*behaviorName*/ action.RequestTimeout, cancellationToken), FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value, helper.GetRemainingTime(), cancellationToken)); } } Task.WhenAll(removeUTTaskList).Wait(cancellationToken); // TODO: Wait for some time so that the removal of this unreliable transport behavior can be read from the files. // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successully applied Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).GetAwaiter().GetResult(); } action.Result = new InvokeQuorumLossResult(getPartitionStateAction.Result); this.ResultTraceString = StringHelper.Format("InvokeQuorumLossAction succeeded for {0} with QuorumLossMode = {1}", partitionId, action.QuorumLossMode); }
public RestartReplicaAction(ReplicaSelector replicaSelector) { this.ReplicaSelector = replicaSelector; this.CompletionMode = CompletionMode.Verify; }
public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo) { InvokeQuorumLossState state = Convert(this.State); Guid partitionId = state.Info.PartitionId; List <Tuple <string, string> > unreliableTransportInfo = state.Info.UnreliableTransportInfo; List <long> targetReplicas = state.Info.ReplicaIds; var unreliableTransportTaskList = new List <Task>(); List <Tuple <string, string> > unreliableTransportInfoList = new List <Tuple <string, string> >(); foreach (Tuple <string, string> ut in unreliableTransportInfo) { string nodeName = ut.Item1; string behaviorName = ut.Item2; System.Fabric.Common.UnreliableTransportBehavior behavior = new System.Fabric.Common.UnreliableTransportBehavior("*", "StatefulServiceReopen"); behavior.AddFilterForPartitionId(partitionId); TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - applying '{1}'", this.State.OperationId, behaviorName); unreliableTransportTaskList.Add(FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync( nodeName, behaviorName, behavior, this.RequestTimeout, cancellationToken), this.OperationTimeout, cancellationToken)); } await Task.WhenAll(unreliableTransportTaskList).ConfigureAwait(false); // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false); List <Task> tasks = new List <Task>(); foreach (long replicaId in targetReplicas) { ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(this.partitionSelector.ServiceName, partitionId), replicaId); TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - faulting replica with id={1}", this.State.OperationId, replicaId); Task task = FaultAnalysisServiceUtility.RestartReplicaAsync(this.FabricClient, replicaSelector, CompletionMode.DoNotVerify, this.RequestTimeout, this.OperationTimeout, cancellationToken); tasks.Add(task); } await Task.WhenAll(tasks).ConfigureAwait(false); ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true); TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - keeping partition in quorum loss for '{1}'", this.State.OperationId, state.Info.QuorumLossDuration); await Task.Delay(state.Info.QuorumLossDuration, cancellationToken).ConfigureAwait(false); TimeoutHelper timeoutHelper = new TimeoutHelper(this.OperationTimeout); bool conditionSatisfied = false; int quorumLossCheckRetries = FASConstants.QuorumLossCheckRetryCount; do { TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - checking PartitionStatus", this.State.OperationId); ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.FabricClient.QueryManager.GetPartitionListAsync( this.partitionSelector.ServiceName, null, this.RequestTimeout, cancellationToken), this.OperationTimeout, cancellationToken).ConfigureAwait(false); foreach (StatefulServicePartition partition in partitionsResult) { if (partition.PartitionInformation.Id == partitionId) { if (partition.PartitionStatus == ServicePartitionStatus.InQuorumLoss) { conditionSatisfied = true; break; } } } await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false); }while (!conditionSatisfied && quorumLossCheckRetries-- > 0); if (!conditionSatisfied) { string error = string.Format(CultureInfo.InvariantCulture, "{0} - Service could not induce quorum loss for service '{1}', partition '{2}'. Please retry", this.State.OperationId, this.partitionSelector.ServiceName, partitionId); TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, error); throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady); } await QuorumLossStepsFactory.RemoveUTAsync(this.FabricClient, this.State, this.RequestTimeout, this.OperationTimeout, cancellationToken); state.StateProgress.Push(StepStateNames.CompletedSuccessfully); return(state); }
public static async Task <Replica> GetReplicaAsync(this IFabricTestabilityClient client, ReplicaSelector replicaSelector) { var systemFabricCient = GetSystemFabricClient(client); GetSelectedReplicaStateAction replicaAction = new GetSelectedReplicaStateAction(replicaSelector); await systemFabricCient.FabricClient.TestManager.TestContext.ActionExecutor.RunAsync(replicaAction); return(replicaAction.Result.Item2); }