private void MarkPartitionAsUnsafe(PartitionEntity partitionEntity) { foreach (var r in partitionEntity.ReplicaList) { ReplicaEntity replica = r; var currentReplicaCodePackage = partitionEntity.ParentServiceEntity.ParentApplicationEntity.GetCodePackagEntityForReplica(replica); if (currentReplicaCodePackage != null) { currentReplicaCodePackage.MarkCodePackageAsUnsafeToFault(); } var clusterNode = partitionEntity.ParentServiceEntity.ParentApplicationEntity.ClusterSnapshot.Nodes.FindMatchingNodeEntity(replica.Replica.NodeName); if (clusterNode != null) { clusterNode.MarkNodeAsUnsafeToFault(); } replica.MarkReplicaAsUnsafeToFault(); } }
public ReplicaEntity(Replica replica, PartitionEntity partitionEntity) { this.Replica = replica; this.ParentPartitionEntity = partitionEntity; this.ReplicaFlags = ClusterEntityFlags.Excluded; }
public void MarkAllUnsafeEntities() { TestabilityTrace.TraceSource.WriteNoise(TraceType, "Inside of MarkAllUnsafeEntities ..."); var codepackages = this.GetAllCodePackages().ToArray(); foreach (var cp in codepackages) { CodePackageEntity codePackage = cp; var node = this.Nodes.FirstOrDefault(n => n.CurrentNodeInfo.NodeName == codePackage.NodeName); ChaosUtility.ThrowOrAssertIfTrue( ChaosConstants.MarkReplicaAsInTransition_NodeFaulted_TelemetryId, node == null, string.Format( "Node entity {0} not found for code package {1}:{2}:{3}.", codePackage.NodeName, codePackage.ParentApplicationEntity.Application.ApplicationName, codePackage.CodePackageResult.ServiceManifestName, codePackage.CodePackageResult.CodePackageName)); if (codePackage.Health.AggregatedHealthState != HealthState.Ok) { codePackage.MarkCodePackageAsUnsafeToFault(); node.MarkNodeAsUnsafeToFault(); } else { // TODO: RDBug 7635808 : Test and see if DeployedPartition // loop in the Chaos engine mark unsafe can be eliminated // var deployedPartitions = codePackage.DeployedPartitions; foreach (var deployedPartition in deployedPartitions) { if (deployedPartition.GetPartitionFaultTolerance() <= 0 || deployedPartition.Partition.HealthState != HealthState.Ok) { codePackage.MarkCodePackageAsUnsafeToFault(); node.MarkNodeAsUnsafeToFault(); } } } } // One way would have been to go through every replica and along with its health also check its ancestors' health // but we know that a partition's health state always reflect the worst healthstate among its replicas, so going // through the partitions is enough // var allPartitions = this.GetAllPartitions(null, null, !this.ShouldFaultSystem).ToArray(); foreach (var p in allPartitions) { PartitionEntity partition = p; if (partition.GetPartitionFaultTolerance() <= 0 || partition.Partition.HealthState != HealthState.Ok || partition.ParentServiceEntity.Service.HealthState != HealthState.Ok || partition.ParentServiceEntity.ParentApplicationEntity.Application.HealthState != HealthState.Ok) { this.MarkPartitionAsUnsafe(partition); } } foreach (var unhealthyNode in this.Nodes.Where(n => n.CurrentNodeInfo.HealthState != HealthState.Ok)) { unhealthyNode.MarkNodeAsUnsafeToFault(); } }