public static StepBase GetStep( StepStateNames stateName, FabricClient fabricClient, ActionStateBase actionState, InvokeQuorumLossAction action, TimeSpan requestTimeout, TimeSpan operationTimeout, CancellationToken cancellationToken) { StepBase step = null; InvokeQuorumLossState state = Convert(actionState); if (stateName == StepStateNames.LookingUpState) { step = new QuorumLossStepsFactory.LookingUpState(fabricClient, state, requestTimeout, operationTimeout, action.PartitionSelector, action.QuorumLossMode); } else if (stateName == StepStateNames.PerformingActions) { step = new QuorumLossStepsFactory.PerformingActions(fabricClient, state, requestTimeout, operationTimeout, action.PartitionSelector); } else if (stateName == StepStateNames.CompletedSuccessfully) { // done - but then this method should not have been called TestabilityTrace.TraceSource.WriteError(StepBase.TraceType, "{0} - GetStep() should not have been called when the state nme is CompletedSuccessfully", actionState.OperationId); ReleaseAssert.Failfast("GetStep() should not have been called when the state nme is CompletedSuccessfully"); } else { string error = string.Format(CultureInfo.InvariantCulture, "{0} - Unexpected state name={1}", actionState.OperationId, stateName); TestabilityTrace.TraceSource.WriteError(StepBase.TraceType, "{0}", error); ReleaseAssert.Failfast(error); } return(step); }
public static InvokeQuorumLossState Convert(ActionStateBase actionState) { InvokeQuorumLossState invokeQuorumLossState = actionState as InvokeQuorumLossState; if (invokeQuorumLossState == null) { throw new InvalidCastException("State object could not be converted"); } return(invokeQuorumLossState); }
public async Task <PartitionQuorumLossProgress> GetInvokeQuorumLossProgressAsync( Guid operationId, TimeSpan timeout, CancellationToken cancellationToken) { this.ThrowIfNotReady(); PartitionQuorumLossProgress progress = null; try { ActionStateBase actionState = await this.MessageProcessor.ProcessGetProgressAsync(operationId, timeout, cancellationToken); StepStateNames stateName = actionState.StateProgress.Peek(); TestCommandProgressState state = FaultAnalysisServiceUtility.ConvertState(actionState, TraceType); InvokeQuorumLossState invokeQuorumLossState = actionState as InvokeQuorumLossState; var selectedPartition = new SelectedPartition { ServiceName = invokeQuorumLossState.Info.PartitionSelector.ServiceName, PartitionId = invokeQuorumLossState.Info.PartitionId }; PartitionQuorumLossResult result = new PartitionQuorumLossResult(selectedPartition, actionState.ErrorCausingRollback); progress = new PartitionQuorumLossProgress(state, result); TestabilityTrace.TraceSource.WriteInfo( TraceType, "{0} - {1} progress - {2}, Exception - {3}", operationId, ActionType.InvokeQuorumLoss, progress.Result != null ? progress.Result.SelectedPartition.ToString() : FASConstants.UnavailableMessage, (progress.Result != null && progress.Result.Exception != null) ? progress.Result.Exception.ToString() : FASConstants.UnavailableMessage); } catch (Exception e) { TestabilityTrace.TraceSource.WriteWarning(TraceType, "{0} - Exception: {1}", operationId, e.ToString()); FaultAnalysisServiceUtility.ThrowTransientExceptionIfRetryable(e); throw; } return(progress); }
// Use this method signature for now until the actual client interface is decided public async Task ProcessQuorumLossCommandAsync(Guid operationId, PartitionSelector partitionSelector, QuorumLossMode quorumLossMode, TimeSpan quorumLossDuration, TimeSpan timeout, ServiceInternalFaultInfo serviceInternalFaultInfo) { ThrowIfQuorumLossModeInvalid(quorumLossMode); InvokeQuorumLossState actionState = new InvokeQuorumLossState(operationId, serviceInternalFaultInfo, partitionSelector, quorumLossMode, quorumLossDuration); try { // After this call finishes the intent has been persisted await this.actionStore.InitializeNewActionAsync(actionState, timeout); this.Enqueue(actionState); } catch (Exception e) { TestabilityTrace.TraceSource.WriteWarning(TraceType, "{0} - Exception {1}", operationId, e); throw; } }
private ActionStateBase ReadData(byte[] bytes) { ActionStateBase result = null; using (BinaryReader br = new BinaryReader(new MemoryStream(bytes))) { // The first 4 bytes are the command type ActionType a = ActionStateBase.ReadCommandType(br); if (a == ActionType.InvokeDataLoss) { result = InvokeDataLossState.FromBytes(br); } else if (a == ActionType.InvokeQuorumLoss) { result = InvokeQuorumLossState.FromBytes(br); } else if (a == ActionType.RestartPartition) { result = RestartPartitionState.FromBytes(br); } else if (a == ActionType.TestStuck) { result = StuckState.FromBytes(br); } else if (a == ActionType.TestRetryStep) { result = TestRetryStepState.FromBytes(br); } else if (a == ActionType.StartNode) { result = NodeCommandState.FromBytes(br, a); } else if (a == ActionType.StopNode) { result = NodeCommandState.FromBytes(br, a); } } return(result); }
public static async Task RemoveUTAsync(FabricClient fabricClient, ActionStateBase state, TimeSpan requestTimeout, TimeSpan operationTimeout, CancellationToken cancellationToken) { InvokeQuorumLossState invokeQuorumLossState = Convert(state); Guid partitionId = invokeQuorumLossState.Info.PartitionId; List <Task> tasks = new List <Task>(); if (invokeQuorumLossState.Info.UnreliableTransportInfo != null) { foreach (Tuple <string, string> info in invokeQuorumLossState.Info.UnreliableTransportInfo) { UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "StatefulServiceReopen"); behavior.AddFilterForPartitionId(partitionId); string nodeName = info.Item1; string behaviorName = info.Item2; TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - Cleaning up behavior={1}", state.OperationId, behaviorName); Task task = FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => fabricClient.TestManager.RemoveUnreliableTransportBehaviorAsync( nodeName, behaviorName, requestTimeout, cancellationToken), FabricClientRetryErrors.RemoveUnreliableTransportBehaviorErrors.Value, operationTimeout, cancellationToken); tasks.Add(task); } await Task.WhenAll(tasks).ConfigureAwait(false); // TODO: Wait for some time so that the unreliable transport behavior can be read from the files. // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false); } }
public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo) { InvokeQuorumLossState state = Convert(this.State); Guid partitionId = state.Info.PartitionId; List <Tuple <string, string> > unreliableTransportInfo = state.Info.UnreliableTransportInfo; List <long> targetReplicas = state.Info.ReplicaIds; var unreliableTransportTaskList = new List <Task>(); List <Tuple <string, string> > unreliableTransportInfoList = new List <Tuple <string, string> >(); foreach (Tuple <string, string> ut in unreliableTransportInfo) { string nodeName = ut.Item1; string behaviorName = ut.Item2; System.Fabric.Common.UnreliableTransportBehavior behavior = new System.Fabric.Common.UnreliableTransportBehavior("*", "StatefulServiceReopen"); behavior.AddFilterForPartitionId(partitionId); TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - applying '{1}'", this.State.OperationId, behaviorName); unreliableTransportTaskList.Add(FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.FabricClient.TestManager.AddUnreliableTransportBehaviorAsync( nodeName, behaviorName, behavior, this.RequestTimeout, cancellationToken), this.OperationTimeout, cancellationToken)); } await Task.WhenAll(unreliableTransportTaskList).ConfigureAwait(false); // Bug#2271465 - Unreliable transport through API should return only once the behavior has been successfully applied await Task.Delay(TimeSpan.FromSeconds(5.0), cancellationToken).ConfigureAwait(false); List <Task> tasks = new List <Task>(); foreach (long replicaId in targetReplicas) { ReplicaSelector replicaSelector = ReplicaSelector.ReplicaIdOf(PartitionSelector.PartitionIdOf(this.partitionSelector.ServiceName, partitionId), replicaId); TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - faulting replica with id={1}", this.State.OperationId, replicaId); Task task = FaultAnalysisServiceUtility.RestartReplicaAsync(this.FabricClient, replicaSelector, CompletionMode.DoNotVerify, this.RequestTimeout, this.OperationTimeout, cancellationToken); tasks.Add(task); } await Task.WhenAll(tasks).ConfigureAwait(false); ActionTest.PerformInternalServiceFaultIfRequested(this.State.OperationId, serviceInternalFaultInfo, this.State, cancellationToken, true); TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - keeping partition in quorum loss for '{1}'", this.State.OperationId, state.Info.QuorumLossDuration); await Task.Delay(state.Info.QuorumLossDuration, cancellationToken).ConfigureAwait(false); TimeoutHelper timeoutHelper = new TimeoutHelper(this.OperationTimeout); bool conditionSatisfied = false; int quorumLossCheckRetries = FASConstants.QuorumLossCheckRetryCount; do { TestabilityTrace.TraceSource.WriteInfo(StepBase.TraceType, "{0} - checking PartitionStatus", this.State.OperationId); ServicePartitionList partitionsResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.FabricClient.QueryManager.GetPartitionListAsync( this.partitionSelector.ServiceName, null, this.RequestTimeout, cancellationToken), this.OperationTimeout, cancellationToken).ConfigureAwait(false); foreach (StatefulServicePartition partition in partitionsResult) { if (partition.PartitionInformation.Id == partitionId) { if (partition.PartitionStatus == ServicePartitionStatus.InQuorumLoss) { conditionSatisfied = true; break; } } } await AsyncWaiter.WaitAsync(TimeSpan.FromSeconds(5), cancellationToken).ConfigureAwait(false); }while (!conditionSatisfied && quorumLossCheckRetries-- > 0); if (!conditionSatisfied) { string error = string.Format(CultureInfo.InvariantCulture, "{0} - Service could not induce quorum loss for service '{1}', partition '{2}'. Please retry", this.State.OperationId, this.partitionSelector.ServiceName, partitionId); TestabilityTrace.TraceSource.WriteWarning(StepBase.TraceType, error); throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady); } await QuorumLossStepsFactory.RemoveUTAsync(this.FabricClient, this.State, this.RequestTimeout, this.OperationTimeout, cancellationToken); state.StateProgress.Push(StepStateNames.CompletedSuccessfully); return(state); }
public PerformingActions(FabricClient fabricClient, InvokeQuorumLossState state, TimeSpan requestTimeout, TimeSpan operationTimeout, PartitionSelector partitionSelector) : base(fabricClient, state, requestTimeout, operationTimeout) { this.partitionSelector = partitionSelector; }
public override async Task <ActionStateBase> RunAsync(CancellationToken cancellationToken, ServiceInternalFaultInfo serviceInternalFaultInfo) { InvokeQuorumLossState state = Convert(this.State); // get info about the service so we can check type and trss ServiceDescription result = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.FabricClient.ServiceManager.GetServiceDescriptionAsync( this.partitionSelector.ServiceName, this.RequestTimeout, cancellationToken), this.OperationTimeout, cancellationToken).ConfigureAwait(false); if (result.Kind != ServiceDescriptionKind.Stateful) { // The message in the first arg is only for debugging, it is not returned to the user. throw new FabricInvalidForStatelessServicesException("FabricInvalidForStatelessServicesException", FabricErrorCode.InvalidForStatelessServices); } StatefulServiceDescription statefulServiceDescription = result as StatefulServiceDescription; ReleaseAssert.AssertIf(statefulServiceDescription == null, string.Format(CultureInfo.InvariantCulture, "{0} - Service is not a stateful service", this.State.OperationId)); if (!statefulServiceDescription.HasPersistedState) { // The message in the first arg is only for debugging, it is not returned to the user. throw new FabricOnlyValidForStatefulPersistentServicesException("This is only valid for stateful persistent services", FabricErrorCode.OnlyValidForStatefulPersistentServices); } SelectedPartition targetPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync( this.FabricClient, this.partitionSelector, this.RequestTimeout, this.OperationTimeout, cancellationToken).ConfigureAwait(false); Guid partitionId = targetPartition.PartitionId; // get data about replicas in that partition ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync( () => this.FabricClient.QueryManager.GetReplicaListAsync( partitionId, 0, this.RequestTimeout, cancellationToken), this.OperationTimeout, cancellationToken).ConfigureAwait(false); List <StatefulServiceReplica> tempReplicas = new List <StatefulServiceReplica>(); foreach (var replica in replicasResult) { StatefulServiceReplica statefulReplica = replica as StatefulServiceReplica; ReleaseAssert.AssertIf(statefulReplica == null, "Expected stateful replica"); tempReplicas.Add(statefulReplica); } List <StatefulServiceReplica> targetReplicas = null; if (this.quorumLossMode == QuorumLossMode.AllReplicas) { targetReplicas = tempReplicas.Where(r => r.ReplicaRole == ReplicaRole.Primary || r.ReplicaRole == ReplicaRole.ActiveSecondary).ToList(); } else if (this.quorumLossMode == QuorumLossMode.QuorumReplicas) { targetReplicas = FaultAnalysisServiceUtility.GetReplicasForPartialLoss(state.OperationId, tempReplicas); } else { throw FaultAnalysisServiceUtility.CreateException(StepBase.TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_INVALIDARG, Strings.StringResources.Error_UnsupportedQuorumLossMode); } if (targetReplicas == null) { // This will cause the command to rollback and retry throw new FabricTransientException("The operation could not be performed, please retry", FabricErrorCode.NotReady); } List <string> targetNodes = new List <string>(); foreach (var replica in targetReplicas) { targetNodes.Add(replica.NodeName); } List <Tuple <string, string> > unreliableTransportInfoList = new List <Tuple <string, string> >(); foreach (string nodeName in targetNodes) { UnreliableTransportBehavior behavior = new UnreliableTransportBehavior("*", "StatefulServiceReopen"); behavior.AddFilterForPartitionId(partitionId); // ApplyingUnreliableTransport.BehaviorNamePrefix + nodeName; string behaviorName = this.CreateBehaviorName(nodeName); unreliableTransportInfoList.Add(new Tuple <string, string>(nodeName, behaviorName)); } state.StateProgress.Push(StepStateNames.PerformingActions); state.Info.PartitionId = partitionId; state.Info.ReplicaIds = targetReplicas.Select(r => r.Id).ToList(); state.Info.UnreliableTransportInfo = unreliableTransportInfoList; return(state); }
public LookingUpState(FabricClient fabricClient, InvokeQuorumLossState state, TimeSpan requestTimeout, TimeSpan operationTimeout, PartitionSelector partitionSelector, QuorumLossMode quorumLossMode) : base(fabricClient, state, requestTimeout, operationTimeout) { this.partitionSelector = partitionSelector; this.quorumLossMode = quorumLossMode; }
private async Task <FabricTestAction> ConstructActionAsync(ActionType actionType, ActionStateBase actionStateBase) { FabricTestAction action = null; if (actionType == ActionType.InvokeDataLoss) { InvokeDataLossState actionState = actionStateBase as InvokeDataLossState; StepStateNames currentState = actionState.StateProgress.Peek(); if (currentState == StepStateNames.IntentSaved) { actionState.StateProgress.Push(StepStateNames.LookingUpState); await this.actionStore.UpdateActionStateAsync(actionState); TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated"); } action = new InvokeDataLossAction( this.stateManager, this.Partition, actionState, actionState.Info.PartitionSelector, actionState.Info.DataLossMode, this.dataLossCheckWaitDurationInSeconds, this.dataLossCheckPollIntervalInSeconds, this.replicaDropWaitDurationInSeconds, this.requestTimeout, this.operationTimeout); } else if (actionType == ActionType.InvokeQuorumLoss) { InvokeQuorumLossState actionState = actionStateBase as InvokeQuorumLossState; StepStateNames currentState = actionState.StateProgress.Peek(); if (currentState == StepStateNames.IntentSaved) { actionState.StateProgress.Push(StepStateNames.LookingUpState); await this.actionStore.UpdateActionStateAsync(actionState); TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated"); } // This is the case for resuming an action after a failover action = new InvokeQuorumLossAction(this.stateManager, this.Partition, actionState, actionState.Info.PartitionSelector, actionState.Info.QuorumLossMode, actionState.Info.QuorumLossDuration, this.requestTimeout, this.operationTimeout); } else if (actionType == ActionType.RestartPartition) { RestartPartitionState actionState = actionStateBase as RestartPartitionState; StepStateNames currentState = actionState.StateProgress.Peek(); if (currentState == StepStateNames.IntentSaved) { actionState.StateProgress.Push(StepStateNames.LookingUpState); await this.actionStore.UpdateActionStateAsync(actionState); TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated"); } // This is the case for resuming an action after a failover action = new RestartPartitionAction(this.stateManager, this.Partition, actionState, actionState.Info.PartitionSelector, actionState.Info.RestartPartitionMode, this.requestTimeout, this.operationTimeout); } else if (actionType == ActionType.TestStuck) { StuckState actionState = actionStateBase as StuckState; StepStateNames currentState = actionState.StateProgress.Peek(); if (currentState == StepStateNames.IntentSaved) { actionState.StateProgress.Push(StepStateNames.LookingUpState); await this.actionStore.UpdateActionStateAsync(actionState); TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated"); } action = new StuckAction(this.stateManager, this.Partition, actionState, this.requestTimeout, this.operationTimeout); } else if (actionType == ActionType.TestRetryStep) { TestRetryStepState actionState = actionStateBase as TestRetryStepState; StepStateNames currentState = actionState.StateProgress.Peek(); if (currentState == StepStateNames.IntentSaved) { actionState.StateProgress.Push(StepStateNames.LookingUpState); await this.actionStore.UpdateActionStateAsync(actionState); TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated"); } action = new TestRetryStepAction(this.stateManager, this.Partition, actionState, this.requestTimeout, this.operationTimeout); } else if (actionType == ActionType.StartNode) { NodeCommandState actionState = actionStateBase as NodeCommandState; actionState.StoppedNodeTable = this.stoppedNodeTable; StepStateNames currentState = actionState.StateProgress.Peek(); if (currentState == StepStateNames.IntentSaved) { actionState.StateProgress.Push(StepStateNames.LookingUpState); await this.actionStore.UpdateActionStateAsync(actionState); TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated"); } action = new StartNodeFromFASAction(this.stateManager, this.Partition, actionState, this.stoppedNodeTable, this.requestTimeout, this.operationTimeout); } else if (actionType == ActionType.StopNode) { NodeCommandState actionState = actionStateBase as NodeCommandState; actionState.StoppedNodeTable = this.stoppedNodeTable; StepStateNames currentState = actionState.StateProgress.Peek(); if (currentState == StepStateNames.IntentSaved) { actionState.StateProgress.Push(StepStateNames.LookingUpState); await this.actionStore.UpdateActionStateAsync(actionState); TestabilityTrace.TraceSource.WriteInfo(TraceType, "action state has been updated"); } action = new StopNodeFromFASAction(this.stateManager, this.Partition, actionState, this.stoppedNodeTable, this.requestTimeout, this.operationTimeout); } else { TestabilityTrace.TraceSource.WriteInfo(TraceType, "Unknown actionType"); } return(action); }