private async Task <bool> AnalyzeNodeDownAsync(PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent) { IEnumerable <NodeDownTraceRecord> nodeDownTraceRecords = await this.primaryMoveAnalysisEventStoreReader.GetNodeDownTraceRecordsAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); if (!nodeDownTraceRecords.Any()) { IEnumerable <NodeUpTraceRecord> nodeUpTraceRecords = await this.primaryMoveAnalysisEventStoreReader.GetNodeUpTraceRecordsAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); if (!nodeUpTraceRecords.Any()) { this.Logger.LogWarning("No node up or node down traces found with duration {0}, will try other failover possibilities.", primaryMoveAnalysisEvent.GetDuration()); return(false); } else { primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.NodeDown; primaryMoveAnalysisEvent.AddCorrelatedTraceRecordRange(nodeUpTraceRecords); return(true); } } else { primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.NodeDown; primaryMoveAnalysisEvent.AddCorrelatedTraceRecordRange(nodeDownTraceRecords); return(true); } }
private async Task AnalyzeAppHostDownAsync(PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent) { IEnumerable <ApplicationHostTerminatedTraceRecord> appHostDownTraceRecords = await this.primaryMoveAnalysisQueryStoreReader.GetApplicationHostTerminatedTraceRecordsAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); if (!appHostDownTraceRecords.Any()) { this.Logger.LogWarning("No application host down traces found with the reason activity id {0} and duration {1}, cannot perform further analysis.", primaryMoveAnalysisEvent.ReasonActivityId, primaryMoveAnalysisEvent.GetDuration()); return; } primaryMoveAnalysisEvent.AddCorrelatedTraceRecordRange(appHostDownTraceRecords); }
private async Task AnalyzeReportFaultAsync(PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent) { // This is RAP api report fault IEnumerable <ApiReportFaultTraceRecord> apiReportFaultTraceRecords = await this.primaryMoveAnalysisQueryStoreReader.GetApiReportFaultTraceRecordsAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); if (apiReportFaultTraceRecords.Any()) { primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.ServiceApiReportFault; primaryMoveAnalysisEvent.AddCorrelatedTraceRecordRange(apiReportFaultTraceRecords); } else { //// this is client api report fault var clientApiBeginReportFaultTraceRecords = await this.primaryMoveAnalysisQueryStoreReader.GetBeginReportFaultTraceRecordsAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); if (clientApiBeginReportFaultTraceRecords.Any()) { primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.ClientApiReportFault; primaryMoveAnalysisEvent.AddCorrelatedTraceRecordRange(clientApiBeginReportFaultTraceRecords); } else { var clientApiReportFaultTraceRecords = await this.primaryMoveAnalysisQueryStoreReader.GetReportFaultTraceRecordsAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); if (clientApiReportFaultTraceRecords.Any()) { primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.ClientApiReportFault; primaryMoveAnalysisEvent.AddCorrelatedTraceRecordRange(clientApiReportFaultTraceRecords); } else { this.Logger.LogWarning("No report fault traces found with activity id {0} and duration {1}, cannot perform further analysis.", primaryMoveAnalysisEvent.ReasonActivityId, primaryMoveAnalysisEvent.GetDuration()); return; } } } }
/// <inheritdoc /> public override async Task <Continuation> DoAnalysisAsync(AnalysisContainer analysis) { if (analysis.GetProgressedTill() == ProgressTracker.NotStarted) { PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent = analysis.AnalysisEvent as PrimaryMoveAnalysisEvent; primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.Unknown; var reconfigRecord = primaryMoveAnalysisEvent.TriggerReconfigurationCompletedTraceRecord; primaryMoveAnalysisEvent.PreviousPrimaryContext = await this.primaryReplicaContextStore.GetPrimaryReplicaContextAsync(reconfigRecord.PartitionId).ConfigureAwait(false); if (primaryMoveAnalysisEvent.PreviousPrimaryContext == null) { this.Logger.LogWarning("PreviousPrimaryContext is null, cannot perform PrimaryMoveAnalysis."); analysis.SetProgressedTill(ProgressTracker.Finished); return(Continuation.Done); } primaryMoveAnalysisEvent.CurrentPrimaryContext = new PrimaryReplicaContext(reconfigRecord.PartitionId, reconfigRecord.NodeName, reconfigRecord.NodeInstanceId, reconfigRecord.TimeStamp.Ticks); if (primaryMoveAnalysisEvent.CurrentPrimaryContext == null) { this.Logger.LogWarning("CurrentPrimaryContext is null, cannot perform PrimaryMoveAnalysis."); analysis.SetProgressedTill(ProgressTracker.Finished); return(Continuation.Done); } // CurrentPrimaryContext becomes the PreviousPrimaryContext for the next analysis await this.primaryReplicaContextStore.SavePrimaryReplicaContextAsync(primaryMoveAnalysisEvent.CurrentPrimaryContext).ConfigureAwait(false); analysis.SetProgressedTill(ProgressTracker.Checkpoint1); return(Continuation.ResumeImmediately); } else if (analysis.GetProgressedTill() == ProgressTracker.Checkpoint1) { PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent = analysis.AnalysisEvent as PrimaryMoveAnalysisEvent; if (primaryMoveAnalysisEvent.TriggerReconfigurationCompletedTraceRecord.ReconfigType == ReconfigurationType.Failover) { primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.Failover; analysis.SetProgressedTill(ProgressTracker.Checkpoint2); return(Continuation.ResumeImmediately); } else if (primaryMoveAnalysisEvent.TriggerReconfigurationCompletedTraceRecord.ReconfigType == ReconfigurationType.SwapPrimary) { primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.SwapPrimary; analysis.SetProgressedTill(ProgressTracker.Checkpoint3); return(Continuation.ResumeImmediately); } } else if (analysis.GetProgressedTill() == ProgressTracker.Checkpoint2) { PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent = analysis.AnalysisEvent as PrimaryMoveAnalysisEvent; bool dueToNodeDown = await this.AnalyzeNodeDownAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); if (!dueToNodeDown) { analysis.SetProgressedTill(ProgressTracker.Checkpoint4); return(Continuation.ResumeImmediately); } else { analysis.SetProgressedTill(ProgressTracker.Finished); primaryMoveAnalysisEvent.AnalysisEndTimeStamp = DateTime.UtcNow; return(Continuation.Done); } } else if (analysis.GetProgressedTill() == ProgressTracker.Checkpoint3) { PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent = analysis.AnalysisEvent as PrimaryMoveAnalysisEvent; await this.AnalyzeCRMOperationAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); analysis.SetProgressedTill(ProgressTracker.Finished); primaryMoveAnalysisEvent.AnalysisEndTimeStamp = DateTime.UtcNow; return(Continuation.Done); } else if (analysis.GetProgressedTill() == ProgressTracker.Checkpoint4) { PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent = analysis.AnalysisEvent as PrimaryMoveAnalysisEvent; var replicaStateChangeTraceRecordList = await this.primaryMoveAnalysisQueryStoreReader.GetReplicaStateChangeTraceRecordsAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); if (replicaStateChangeTraceRecordList == null || !replicaStateChangeTraceRecordList.Any()) { this.Logger.LogWarning("No replica closing traces found with duration {0}, cannot perform further analysis.", primaryMoveAnalysisEvent.GetDuration()); analysis.SetProgressedTill(ProgressTracker.Finished); return(Continuation.Done); } primaryMoveAnalysisEvent.ReasonActivityId = replicaStateChangeTraceRecordList.First().ReasonActivityId; primaryMoveAnalysisEvent.ReasonActivityType = replicaStateChangeTraceRecordList.First().ReasonActivityType; primaryMoveAnalysisEvent.AddCorrelatedTraceRecordRange(replicaStateChangeTraceRecordList); if (replicaStateChangeTraceRecordList.First().ReasonActivityType == ActivityType.ServicePackageEvent) { primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.ApplicationHostDown; analysis.SetProgressedTill(ProgressTracker.Checkpoint5); return(Continuation.ResumeImmediately); } else if (replicaStateChangeTraceRecordList.First().ReasonActivityType == ActivityType.ClientReportFaultEvent || replicaStateChangeTraceRecordList.First().ReasonActivityType == ActivityType.ServiceReportFaultEvent) { // TODO: Break report fault analysis into two separate analyses because ReplicaStateChange already shows which one of the two happened primaryMoveAnalysisEvent.Reason = PrimaryMoveReason.ClientApiReportFault; analysis.SetProgressedTill(ProgressTracker.Checkpoint6); return(Continuation.ResumeImmediately); } } else if (analysis.GetProgressedTill() == ProgressTracker.Checkpoint5) { PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent = analysis.AnalysisEvent as PrimaryMoveAnalysisEvent; await this.AnalyzeAppHostDownAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); analysis.SetProgressedTill(ProgressTracker.Finished); primaryMoveAnalysisEvent.AnalysisEndTimeStamp = DateTime.UtcNow; return(Continuation.Done); } else if (analysis.GetProgressedTill() == ProgressTracker.Checkpoint6) { PrimaryMoveAnalysisEvent primaryMoveAnalysisEvent = analysis.AnalysisEvent as PrimaryMoveAnalysisEvent; await this.AnalyzeReportFaultAsync(primaryMoveAnalysisEvent).ConfigureAwait(false); analysis.SetProgressedTill(ProgressTracker.Finished); primaryMoveAnalysisEvent.AnalysisEndTimeStamp = DateTime.UtcNow; return(Continuation.Done); } throw new Exception(string.Format(CultureInfo.InvariantCulture, "Progress Stage {0} not Valid", analysis.GetProgressedTill())); }