// Select a quorum of P and S that are not Down or Dropped
        internal static List <StatefulServiceReplica> GetReplicasForPartialLoss(Guid operationId, List <StatefulServiceReplica> replicaList)
        {
            List <StatefulServiceReplica> tempReplicas = new List <StatefulServiceReplica>();

            foreach (StatefulServiceReplica replica in replicaList)
            {
                if (FaultAnalysisServiceUtility.IsPrimaryOrSecondary(replica) && FaultAnalysisServiceUtility.IsReplicaUp(replica))
                {
                    TestabilityTrace.TraceSource.WriteInfo(TraceType, "DEBUG {0} temp adding {1},{2},{3}", operationId, replica.Id, replica.ReplicaRole, replica.ReplicaStatus);
                    tempReplicas.Add(replica);
                }
            }

            int replicasToRestartWithoutPrimary = tempReplicas.Count / 2;
            StatefulServiceReplica primary      = tempReplicas.Where(r => r.ReplicaRole == ReplicaRole.Primary).FirstOrDefault();

            if (primary == null)
            {
                return(null);
            }

            List <StatefulServiceReplica> targetReplicas = new List <StatefulServiceReplica>(replicasToRestartWithoutPrimary + 1);

            TestabilityTrace.TraceSource.WriteInfo(TraceType, "DEBUG {0} target adding primary {1},{2},{3}", operationId, primary.Id, primary.ReplicaRole, primary.ReplicaStatus);
            targetReplicas.Add(primary);
            tempReplicas.Remove(primary);

            for (int i = 0; i < replicasToRestartWithoutPrimary; i++)
            {
                TestabilityTrace.TraceSource.WriteInfo(TraceType, "DEBUG {0} target adding {1},{2},{3}", operationId, tempReplicas[i].Id, tempReplicas[i].ReplicaRole, tempReplicas[i].ReplicaStatus);
                targetReplicas.Add(tempReplicas[i]);
            }

            return(targetReplicas);
        }
        public static async Task <Tuple <SelectedReplica, Replica> > GetSelectedReplicaAsync(
            FabricClient fabricClient,
            ReplicaSelector replicaSelector,
            TimeSpan requestTimeout,
            TimeSpan operationTimeout,
            CancellationToken cancellationToken)
        {
            ThrowIf.Null(replicaSelector, "ReplicaSelector");

            SelectedPartition selectedPartition = await FaultAnalysisServiceUtility.GetSelectedPartitionStateAsync(
                fabricClient,
                replicaSelector.PartitionSelector,
                requestTimeout,
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            Guid partitionId = selectedPartition.PartitionId;

            ServiceReplicaList replicasResult = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                () => fabricClient.QueryManager.GetReplicaListAsync(
                    partitionId,
                    0,
                    requestTimeout,
                    cancellationToken),
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            Replica replicaResult         = replicaSelector.GetSelectedReplica(replicasResult.ToArray(), new Random(), true /*skip invalid replicas*/);
            var     replicaSelectorResult = new SelectedReplica(replicaResult.Id, selectedPartition);

            return(new Tuple <SelectedReplica, Replica>(replicaSelectorResult, replicaResult));
        }
        public static async Task <RestartReplicaResult> RestartReplicaAsync(
            FabricClient fabricClient,
            ReplicaSelector replicaSelector,
            CompletionMode completionMode,
            TimeSpan requestTimeout,
            TimeSpan operationTimeout,
            CancellationToken cancellationToken)
        {
            System.Fabric.Common.TimeoutHelper helper = new System.Fabric.Common.TimeoutHelper(operationTimeout);

            string          nodeName              = null;
            Guid            partitionId           = Guid.Empty;
            long            replicaId             = 0;
            SelectedReplica replicaSelectorResult = SelectedReplica.None;

            System.Fabric.Common.ThrowIf.Null(replicaSelector, "ReplicaSelector");

            Tuple <SelectedReplica, Replica> replicaStateActionResult = await FaultAnalysisServiceUtility.GetSelectedReplicaAsync(
                fabricClient,
                replicaSelector,
                requestTimeout,
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            replicaSelectorResult = replicaStateActionResult.Item1;
            if (replicaSelectorResult == null)
            {
                throw new InvalidOperationException("replicaStateActionResult cannot be null");
            }

            partitionId = replicaStateActionResult.Item1.SelectedPartition.PartitionId;

            Replica replicaStateResult = replicaStateActionResult.Item2;

            if (replicaStateResult == null)
            {
                throw new InvalidOperationException("replicaStateResult cannot be null");
            }

            nodeName  = replicaStateResult.NodeName;
            replicaId = replicaStateResult.Id;

            ThrowIf.IsTrue(partitionId == Guid.Empty, "PartitionID");
            ThrowIf.IsTrue(replicaId == 0, "ReplicaID");

            await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                () => fabricClient.ServiceManager.RestartReplicaAsync(
                    nodeName,
                    partitionId,
                    replicaId,
                    requestTimeout,
                    cancellationToken),
                FabricClientRetryErrors.RestartReplicaErrors.Value,
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            return(new RestartReplicaResult(replicaSelectorResult));
        }
 public static void ThrowTransientExceptionIfRetryable(Exception e)
 {
     if (e is OperationCanceledException ||
         e is TransactionFaultedException ||
         e is FabricNotReadableException ||
         e is FabricNotPrimaryException)
     {
         throw FaultAnalysisServiceUtility.CreateException(TraceType, Interop.NativeTypes.FABRIC_ERROR_CODE.E_ABORT, "Operation cancelled");
     }
 }
 public static Task SetStoppedNodeStateAsync(Guid operationId, IStatefulServicePartition partition, IReliableStateManager stateManager, IReliableDictionary <string, bool> stoppedNodeTable, string nodeName, bool setStopped, CancellationToken cancellationToken)
 {
     return(FaultAnalysisServiceUtility.RunAndReportFaultOnRepeatedFailure(
                operationId,
                () => SetStoppedNodeStateInnerAsync(operationId, stateManager, stoppedNodeTable, nodeName, setStopped, cancellationToken),
                partition,
                "FaultAnalysisServiceUtility.SetStoppedNodeStateAsync",
                3,
                cancellationToken));
 }
 public static Task RunAndReportFaultOnRepeatedFailure(Guid operationId, Func <Task> action, IStatefulServicePartition partition, string caller, int numRetries, CancellationToken cancellationToken)
 {
     return(FaultAnalysisServiceUtility.RunAndReportFaultOnRepeatedFailure(
                operationId,
                async() =>
     {
         await action.Invoke();
         return new object();
     },
                partition,
                caller,
                numRetries,
                cancellationToken));
 }
        public static async Task <Node> GetNodeInfoAsync(
            Guid operationId,
            FabricClient fc,
            string nodeName,
            IStatefulServicePartition partition,
            IReliableStateManager stateManager,
            IReliableDictionary <string, bool> stoppedNodeTable,
            TimeSpan requestTimeout,
            TimeSpan operationTimeout,
            CancellationToken cancellationToken)
        {
            // validate
            var nodeList = await FabricClientRetryHelper.ExecuteFabricActionWithRetryAsync(
                () => fc.TestManager.GetNodeListInternalAsync(
                    nodeName,
                    NodeStatusFilter.All,
                    null,
                    true,
                    requestTimeout,
                    cancellationToken),
                operationTimeout,
                cancellationToken).ConfigureAwait(false);

            if (nodeList.Count == 0 ||
                (nodeList[0].NodeStatus == NodeStatus.Invalid ||
                 nodeList[0].NodeStatus == NodeStatus.Unknown ||
                 nodeList[0].NodeStatus == NodeStatus.Removed))
            {
                await FaultAnalysisServiceUtility.SetStoppedNodeStateAsync(
                    operationId,
                    partition,
                    stateManager,
                    stoppedNodeTable,
                    nodeName,
                    false,
                    cancellationToken).ConfigureAwait(false);

                // this is fatal, fail the command
                Exception nodeNotFound = FaultAnalysisServiceUtility.CreateException(
                    TraceType,
                    NativeTypes.FABRIC_ERROR_CODE.FABRIC_E_NODE_NOT_FOUND,
                    string.Format(CultureInfo.InvariantCulture, "Node {0} not found", nodeName),
                    FabricErrorCode.NodeNotFound);
                throw new FatalException("fatal", nodeNotFound);
            }

            return(nodeList[0]);
        }