/// <summary> /// Gets the outcome for a full round of responses from all the clusters. /// </summary> /// <param name="responsePromises">Promises for the responses for a particular grain from all of the clusters in the multi-cluster network</param> /// <param name="grainId">The ID of the grain that we want to know its owner status</param> /// <param name="logger">The logger in case there is useful information to log.</param> /// <returns>The outcome of aggregating all of the responses. The task will complete as soon as it has enough responses to make a determination, even if not all of the clusters responded yet.</returns> public static Task <GlobalSingleInstanceResponseOutcome> GetOutcomeAsync(Task <RemoteClusterActivationResponse>[] responsePromises, GrainId grainId, ILogger logger) { if (responsePromises.Any(t => t == null)) { throw new ArgumentException("All response promises should have been initiated", nameof(responsePromises)); } var details = new GlobalSingleInstanceResponseTracker(responsePromises, grainId, logger); return(details.Task); }
public Task <GlobalSingleInstanceResponseOutcome> SendRequestRound(ActivationAddress address, List <string> remoteClusters) { // array that holds the responses var responses = new Task <RemoteClusterActivationResponse> [remoteClusters.Count]; // send all requests for (int i = 0; i < responses.Length; i++) { responses[i] = SendRequest(address.Grain, remoteClusters[i]); } // response processor return(GlobalSingleInstanceResponseTracker.GetOutcomeAsync(responses, address.Grain, logger)); }
private async Task RunBatchedActivationRequests(List <string> remoteClusters, List <GrainId> grains) { var addresses = new List <ActivationAddress>(); foreach (var grain in grains) { // retrieve activation ActivationAddress address; int version; var mcstate = router.DirectoryPartition.TryGetActivation(grain, out address, out version); // work on the doubtful ones only if (mcstate == GrainDirectoryEntryStatus.Doubtful) { // try to start retry by moving into requested_ownership state if (router.DirectoryPartition.UpdateClusterRegistrationStatus(grain, address.Activation, GrainDirectoryEntryStatus.RequestedOwnership, GrainDirectoryEntryStatus.Doubtful)) { addresses.Add(address); } } } if (addresses.Count == 0) { return; } var batchResponses = new List <RemoteClusterActivationResponse[]>(); var tasks = remoteClusters.Select(async remotecluster => { // find gateway and send batched request try { var clusterGatewayAddress = this.multiClusterOracle.GetRandomClusterGateway(remotecluster); var clusterGrainDir = this.grainFactory.GetSystemTarget <IClusterGrainDirectory>(Constants.ClusterDirectoryServiceId, clusterGatewayAddress); var r = await clusterGrainDir.ProcessActivationRequestBatch(addresses.Select(a => a.Grain).ToArray(), this.config.ClusterId); batchResponses.Add(r); } catch (Exception e) { batchResponses.Add( Enumerable.Repeat <RemoteClusterActivationResponse>( new RemoteClusterActivationResponse(ActivationResponseStatus.Faulted) { ResponseException = e }, addresses.Count).ToArray()); } }).ToList(); // wait for all the responses to arrive or fail await Task.WhenAll(tasks); if (logger.IsVerbose) { foreach (var br in batchResponses) { var summary = br.Aggregate(new { Pass = 0, Failed = 0, FailedA = 0, FailedOwned = 0, Faulted = 0 }, (agg, r) => { switch (r.ResponseStatus) { case ActivationResponseStatus.Pass: return(new { Pass = agg.Pass + 1, agg.Failed, agg.FailedA, agg.FailedOwned, agg.Faulted }); case ActivationResponseStatus.Failed: if (!r.Owned) { return(r.ExistingActivationAddress.Address == null ? new { agg.Pass, Failed = agg.Failed + 1, agg.FailedA, agg.FailedOwned, agg.Faulted } : new { agg.Pass, agg.Failed, FailedA = agg.FailedA + 1, agg.FailedOwned, agg.Faulted }); } else { return(new { agg.Pass, agg.Failed, agg.FailedA, FailedOwned = agg.FailedOwned + 1, agg.Faulted }); } default: return(new { agg.Pass, agg.Failed, agg.FailedA, agg.FailedOwned, Faulted = agg.Faulted + 1 }); } }); logger.Verbose("GSIP:M batchresponse PASS:{0} FAILED:{1} FAILED(a){2}: FAILED(o){3}: FAULTED:{4}", summary.Pass, summary.Failed, summary.FailedA, summary.FailedOwned, summary.Faulted); } } // process each address var loser_activations_per_silo = new Dictionary <SiloAddress, List <ActivationAddress> >(); for (int i = 0; i < addresses.Count; i++) { var address = addresses[i]; // array that holds the responses var responses = new RemoteClusterActivationResponse[remoteClusters.Count]; for (int j = 0; j < batchResponses.Count; j++) { responses[j] = batchResponses[j][i]; } // response processor var outcomeDetails = GlobalSingleInstanceResponseTracker.GetOutcome(responses, address.Grain, logger); var outcome = outcomeDetails.State; if (logger.IsVerbose2) { logger.Verbose2("GSIP:M {0} Result={1}", address.Grain, outcomeDetails); } switch (outcome) { case OutcomeState.RemoteOwner: case OutcomeState.RemoteOwnerLikely: { // record activations that lost and need to be deactivated List <ActivationAddress> losers; if (!loser_activations_per_silo.TryGetValue(address.Silo, out losers)) { loser_activations_per_silo[address.Silo] = losers = new List <ActivationAddress>(); } losers.Add(address); router.DirectoryPartition.CacheOrUpdateRemoteClusterRegistration(address.Grain, address.Activation, outcomeDetails.RemoteOwnerAddress.Address); continue; } case OutcomeState.Succeed: { var ok = (router.DirectoryPartition.UpdateClusterRegistrationStatus(address.Grain, address.Activation, GrainDirectoryEntryStatus.Owned, GrainDirectoryEntryStatus.RequestedOwnership)); if (ok) { continue; } else { break; } } case OutcomeState.Inconclusive: { break; } } // we were not successful, reread state to determine what is going on int version; var mcstatus = router.DirectoryPartition.TryGetActivation(address.Grain, out address, out version); // in each case, go back to DOUBTFUL if (mcstatus == GrainDirectoryEntryStatus.RequestedOwnership) { // we failed because of inconclusive answers var success = router.DirectoryPartition.UpdateClusterRegistrationStatus(address.Grain, address.Activation, GrainDirectoryEntryStatus.Doubtful, GrainDirectoryEntryStatus.RequestedOwnership); if (!success) { ProtocolError(address, "unable to transition from REQUESTED_OWNERSHIP to DOUBTFUL"); } } else if (mcstatus == GrainDirectoryEntryStatus.RaceLoser) { // we failed because an external request moved us to RACE_LOSER var success = router.DirectoryPartition.UpdateClusterRegistrationStatus(address.Grain, address.Activation, GrainDirectoryEntryStatus.Doubtful, GrainDirectoryEntryStatus.RaceLoser); if (!success) { ProtocolError(address, "unable to transition from RACE_LOSER to DOUBTFUL"); } } else { ProtocolError(address, "unhandled protocol state"); } TrackDoubtfulGrain(address.Grain); } // remove loser activations foreach (var kvp in loser_activations_per_silo) { var catalog = this.grainFactory.GetSystemTarget <ICatalog>(Constants.CatalogId, kvp.Key); catalog.DeleteActivations(kvp.Value).Ignore(); } }
/// <summary> /// Gets the outcome for a full round of responses from all the clusters. /// </summary> /// <param name="responsePromises">Promises fot the responses for a particular grain from all of the clusters in the multi-cluster network</param> /// <param name="grainId">The ID of the grain that we want to know its owner status</param> /// <param name="logger">The logger in case there is useful information to log.</param> /// <returns>The outcome of aggregating all of the responses. The task will complete as soon as it has enough responses to make a determination, even if not all of the clusters responded yet.</returns> public static Task<GlobalSingleInstanceResponseOutcome> GetOutcomeAsync(Task<RemoteClusterActivationResponse>[] responsePromises, GrainId grainId, Logger logger) { if (responsePromises.Any(t => t == null)) throw new ArgumentException("All response promises should have been initiated", nameof(responsePromises)); var details = new GlobalSingleInstanceResponseTracker(responsePromises, grainId, logger); return details.Task; }