private async Task RunBatchedActivationRequests(List<string> remoteClusters, List<GrainId> grains) { var addresses = new List<ActivationAddress>(); foreach (var grain in grains) { // retrieve activation ActivationAddress address; int version; var mcstate = router.DirectoryPartition.TryGetActivation(grain, out address, out version); // work on the doubtful ones only if (mcstate == GrainDirectoryEntryStatus.Doubtful) { // try to start retry by moving into requested_ownership state if (router.DirectoryPartition.UpdateClusterRegistrationStatus(grain, address.Activation, GrainDirectoryEntryStatus.RequestedOwnership, GrainDirectoryEntryStatus.Doubtful)) { addresses.Add(address); } } } if (addresses.Count == 0) return; var batchResponses = new List<RemoteClusterActivationResponse[]>(); var tasks = remoteClusters.Select(async remotecluster => { // find gateway var gossiporacle = Silo.CurrentSilo.LocalMultiClusterOracle; // send batched request try { var clusterGatewayAddress = gossiporacle.GetRandomClusterGateway(remotecluster); var clusterGrainDir = InsideRuntimeClient.Current.InternalGrainFactory.GetSystemTarget<IClusterGrainDirectory>(Constants.ClusterDirectoryServiceId, clusterGatewayAddress); var r = await clusterGrainDir.ProcessActivationRequestBatch(addresses.Select(a => a.Grain).ToArray(), Silo.CurrentSilo.ClusterId); batchResponses.Add(r); } catch (Exception e) { batchResponses.Add( Enumerable.Repeat<RemoteClusterActivationResponse>( new RemoteClusterActivationResponse(ActivationResponseStatus.Faulted) { ResponseException = e }, addresses.Count).ToArray()); } }).ToList(); // wait for all the responses to arrive or fail await Task.WhenAll(tasks); if (logger.IsVerbose) { foreach (var br in batchResponses) { var summary = br.Aggregate(new { Pass = 0, Failed = 0, FailedA = 0, FailedOwned = 0, Faulted = 0 }, (agg, r) => { switch (r.ResponseStatus) { case ActivationResponseStatus.Pass: return new { Pass = agg.Pass + 1, agg.Failed, agg.FailedA, agg.FailedOwned, agg.Faulted }; case ActivationResponseStatus.Failed: if (!r.Owned) { return r.ExistingActivationAddress.Address == null ? new { agg.Pass, Failed = agg.Failed + 1, agg.FailedA, agg.FailedOwned, agg.Faulted } : new { agg.Pass, agg.Failed, FailedA = agg.FailedA + 1, agg.FailedOwned, agg.Faulted }; } else { return new { agg.Pass, agg.Failed, agg.FailedA, FailedOwned = agg.FailedOwned + 1, agg.Faulted }; } default: return new { agg.Pass, agg.Failed, agg.FailedA, agg.FailedOwned, Faulted = agg.Faulted + 1 }; } }); logger.Verbose("GSIP:M batchresponse PASS:{0} FAILED:{1} FAILED(a){2}: FAILED(o){3}: FAULTED:{4}", summary.Pass, summary.Failed, summary.FailedA, summary.FailedOwned, summary.Faulted); } } // process each address var loser_activations_per_silo = new Dictionary<SiloAddress, List<ActivationAddress>>(); for (int i = 0; i < addresses.Count; i++) { var address = addresses[i]; // array that holds the responses var responses = new RemoteClusterActivationResponse[remoteClusters.Count]; for (int j = 0; j < batchResponses.Count; j++) responses[j] = batchResponses[j][i]; // response processor var outcomeDetails = GlobalSingleInstanceResponseTracker.GetOutcome(responses, address.Grain, logger); var outcome = outcomeDetails.State; if (logger.IsVerbose2) logger.Verbose2("GSIP:M {0} Result={1}", address.Grain, outcomeDetails); switch (outcome) { case OutcomeState.RemoteOwner: case OutcomeState.RemoteOwnerLikely: { // record activations that lost and need to be deactivated List<ActivationAddress> losers; if (!loser_activations_per_silo.TryGetValue(address.Silo, out losers)) loser_activations_per_silo[address.Silo] = losers = new List<ActivationAddress>(); losers.Add(address); router.DirectoryPartition.CacheOrUpdateRemoteClusterRegistration(address.Grain, address.Activation, outcomeDetails.RemoteOwnerAddress.Address); continue; } case OutcomeState.Succeed: { var ok = (router.DirectoryPartition.UpdateClusterRegistrationStatus(address.Grain, address.Activation, GrainDirectoryEntryStatus.Owned, GrainDirectoryEntryStatus.RequestedOwnership)); if (ok) continue; else break; } case OutcomeState.Inconclusive: { break; } } // we were not successful, reread state to determine what is going on int version; var mcstatus = router.DirectoryPartition.TryGetActivation(address.Grain, out address, out version); // in each case, go back to DOUBTFUL if (mcstatus == GrainDirectoryEntryStatus.RequestedOwnership) { // we failed because of inconclusive answers var success = router.DirectoryPartition.UpdateClusterRegistrationStatus(address.Grain, address.Activation, GrainDirectoryEntryStatus.Doubtful, GrainDirectoryEntryStatus.RequestedOwnership); if (!success) ProtocolError(address, "unable to transition from REQUESTED_OWNERSHIP to DOUBTFUL"); } else if (mcstatus == GrainDirectoryEntryStatus.RaceLoser) { // we failed because an external request moved us to RACE_LOSER var success = router.DirectoryPartition.UpdateClusterRegistrationStatus(address.Grain, address.Activation, GrainDirectoryEntryStatus.Doubtful, GrainDirectoryEntryStatus.RaceLoser); if (!success) ProtocolError(address, "unable to transition from RACE_LOSER to DOUBTFUL"); } else { ProtocolError(address, "unhandled protocol state"); } TrackDoubtfulGrain(address.Grain); } // remove loser activations foreach (var kvp in loser_activations_per_silo) { var catalog = InsideRuntimeClient.Current.InternalGrainFactory.GetSystemTarget<ICatalog>(Constants.CatalogId, kvp.Key); catalog.DeleteActivations(kvp.Value).Ignore(); } }
private RemoteClusterActivationResponse ProcessRequestLocal(GrainId grain, string requestClusterId) { RemoteClusterActivationResponse response; //This function will be called only on the Owner silo. try { ActivationAddress address; int version; GrainDirectoryEntryStatus existingActivationStatus = router.DirectoryPartition.TryGetActivation(grain, out address, out version); //Return appropriate protocol response, given current mc status switch (existingActivationStatus) { case GrainDirectoryEntryStatus.Invalid: response = RemoteClusterActivationResponse.Pass; break; case GrainDirectoryEntryStatus.Owned: response = new RemoteClusterActivationResponse(ActivationResponseStatus.Failed) { ExistingActivationAddress = new AddressAndTag() { Address = address, VersionTag = version }, ClusterId = clusterId, Owned = true }; break; case GrainDirectoryEntryStatus.Cached: case GrainDirectoryEntryStatus.RaceLoser: response = RemoteClusterActivationResponse.Pass; break; case GrainDirectoryEntryStatus.RequestedOwnership: case GrainDirectoryEntryStatus.Doubtful: var iWin = MultiClusterUtils.ActivationPrecedenceFunc(grain, clusterId, requestClusterId); if (iWin) { response = new RemoteClusterActivationResponse(ActivationResponseStatus.Failed) { ExistingActivationAddress = new AddressAndTag() { Address = address, VersionTag = version }, ClusterId = clusterId, Owned = false }; } else { response = RemoteClusterActivationResponse.Pass; //update own activation status to race loser. if (existingActivationStatus == GrainDirectoryEntryStatus.RequestedOwnership) { logger.Verbose2("GSIP:Rsp {0} Origin={1} RaceLoser", grain.ToString(), requestClusterId); var success = router.DirectoryPartition.UpdateClusterRegistrationStatus(grain, address.Activation, GrainDirectoryEntryStatus.RaceLoser, GrainDirectoryEntryStatus.RequestedOwnership); if (!success) { // there was a race. retry. logger.Verbose2("GSIP:Rsp {0} Origin={1} Retry", grain.ToString(), requestClusterId); return ProcessRequestLocal(grain, requestClusterId); } } } break; default: throw new InvalidOperationException("Invalid MultiClusterStatus value"); } } catch (Exception ex) { //LOG exception response = new RemoteClusterActivationResponse(ActivationResponseStatus.Faulted) { ResponseException = ex }; } if (logger.IsVerbose) logger.Verbose("GSIP:Rsp {0} Origin={1} Result={2}", grain.ToString(), requestClusterId, response); return response; }
/// <summary> /// Gets the outcome for a full round of responses from all the clusters. /// </summary> /// <param name="responses">Responses for a particular grain from all of the clusters in the multi-cluster network</param> /// <param name="grainId">The ID of the grain that we want to know its owner status</param> /// <param name="logger">The logger in case there is useful information to log.</param> /// <returns>The outcome of aggregating all of the responses.</returns> public static GlobalSingleInstanceResponseOutcome GetOutcome(RemoteClusterActivationResponse[] responses, GrainId grainId, Logger logger) { if (responses.Any(t => t == null)) throw new ArgumentException("All responses should have a value", nameof(responses)); return GetOutcome(responses, grainId, logger, hasPendingResponses: false).Value; }