protected override void LogQueuedPublish(MultiClusterData data) { if (TargetsRemoteCluster) oracle.logger.Verbose("enqueued publish to cluster {0}, cumulative: {1}", Cluster, data); else oracle.logger.Verbose("enqueued publish to silo {0}, cumulative: {1}", Silo, data); }
internal MultiClusterOracleData(Logger log) { logger = log; localData = new MultiClusterData(); activeGatewaysByCluster = new Dictionary <string, List <SiloAddress> >(); confListeners = new HashSet <GrainReference>(); }
private void DemoteLocalGateways(IReadOnlyList<SiloAddress> activeGateways, ref MultiClusterData deltas) { var now = DateTime.UtcNow; // mark gateways as inactive if they have not recently advertised their existence, // and if they are not designated gateways as per membership table var toBeUpdated = this.localData.Current.Gateways.Values .Where(g => g.ClusterId == clusterId && g.Status == GatewayStatus.Active && (now - g.HeartbeatTimestamp > CleanupSilentGoneGatewaysAfter) && !activeGateways.Contains(g.SiloAddress)) .Select(g => new GatewayEntry() { ClusterId = g.ClusterId, SiloAddress = g.SiloAddress, Status = GatewayStatus.Inactive, HeartbeatTimestamp = g.HeartbeatTimestamp + CleanupSilentGoneGatewaysAfter, }).ToList(); if (toBeUpdated.Count == 0) return; var data = new MultiClusterData(toBeUpdated); if (logger.IsVerbose) logger.Verbose("-DemoteLocalGateways {0}", data.ToString()); var delta = this.localData.ApplyDataAndNotify(data); if (!delta.IsEmpty) { deltas = deltas.Merge(delta); } }
private void PublishMyStatusToNewDestinations(MultiClusterData delta) { // for quicker convergence, we publish active local status information // immediately when we learn about a new destination GatewayEntry myEntry; // don't do this if we are not an active gateway if (!localData.Current.Gateways.TryGetValue(this.Silo, out myEntry) || myEntry.Status != GatewayStatus.Active) { return; } foreach (var gateway in delta.Gateways.Values) { var gossipworker = (gateway.ClusterId == this.clusterId) ? GetSiloWorker(gateway.SiloAddress) : GetClusterWorker(gateway.ClusterId); var destinationCluster = gateway.ClusterId; if (!gossipworker.KnowsMe) { gossipworker.Publish(new MultiClusterData(myEntry)); } } }
protected override async Task Work() { // publish data that has been queued var data = toPublish; if (!data.IsEmpty) { toPublish = new MultiClusterData(); // clear queued data int id = ++oracle.idCounter; LastUse = DateTime.UtcNow; await Publish(id, data); LastUse = DateTime.UtcNow; } ; // do a full synchronize if flag is set if (doSynchronize) { doSynchronize = false; // clear flag int id = ++oracle.idCounter; LastUse = DateTime.UtcNow; await Synchronize(id); LastUse = DateTime.UtcNow; } }
protected async override Task Publish(int id, MultiClusterData data) { // optimization: can skip publish to local clusters if we are doing a full synchronize anyway if (!TargetsRemoteCluster && doSynchronize) return; // for remote clusters, pick a random gateway if we don't already have one, or it is not active anymore if (TargetsRemoteCluster && (Silo == null || !oracle.localData.Current.IsActiveGatewayForCluster(Silo, Cluster))) { Silo = oracle.GetRandomClusterGateway(Cluster); } oracle.logger.Verbose("-{0} Publish to silo {1} ({2}) {3}", id, Silo, Cluster ?? "local", data); try { // publish to the remote system target var remoteOracle = this.grainFactory.GetSystemTarget<IMultiClusterGossipService>(Constants.MultiClusterOracleId, Silo); await remoteOracle.Publish(data, TargetsRemoteCluster); LastException = null; if (data.Gateways.ContainsKey(oracle.Silo)) KnowsMe = data.Gateways[oracle.Silo].Status == GatewayStatus.Active; oracle.logger.Verbose("-{0} Publish to silo successful", id); } catch (Exception e) { oracle.logger.Warn(ErrorCode.MultiClusterNetwork_GossipCommunicationFailure, $"-{id} Publish to silo {Silo} ({Cluster ?? "local"}) failed", e); if (TargetsRemoteCluster) Silo = null; // pick a different gateway next time LastException = e; } }
public async Task Publish(MultiClusterData data) { logger.Verbose("-Publish data:{0}", data); // this is (almost) always called with just one item in data to be written back // so we are o.k. with doing individual tasks for each storage read and write var tasks = new List <Task>(); if (data.Configuration != null) { Func <Task> publishconfig = async() => { var configInStorage = await tableManager.ReadConfigurationEntryAsync(); await DiffAndWriteBackConfigAsync(data.Configuration, configInStorage); }; tasks.Add(publishconfig()); } foreach (var gateway in data.Gateways.Values) { Func <Task> publishgatewayinfo = async() => { var gatewayInfoInStorage = await tableManager.ReadGatewayEntryAsync(gateway); await DiffAndWriteBackGatewayInfoAsync(gateway, gatewayInfoInStorage); }; tasks.Add(publishgatewayinfo()); } await Task.WhenAll(tasks); }
private void InjectLocalStatus(bool isGateway, ref MultiClusterData deltas) { var myStatus = new GatewayEntry() { ClusterId = clusterId, SiloAddress = Silo, Status = isGateway ? GatewayStatus.Active : GatewayStatus.Inactive, HeartbeatTimestamp = DateTime.UtcNow, }; GatewayEntry existingEntry; // do not update if we are reporting inactive status and entry is not already there if (!this.localData.Current.Gateways.TryGetValue(Silo, out existingEntry) && !isGateway) { return; } // send if status is changed, or we are active and haven't said so in a while if (existingEntry == null || existingEntry.Status != myStatus.Status || (myStatus.Status == GatewayStatus.Active && myStatus.HeartbeatTimestamp - existingEntry.HeartbeatTimestamp > this.resendActiveStatusAfter)) { logger.Info($"Report as {myStatus}"); // update current data with status var delta = this.localData.ApplyDataAndNotify(new MultiClusterData(myStatus)); if (!delta.IsEmpty) { deltas = deltas.Merge(delta); } } }
internal MultiClusterOracleData(ILogger log, IInternalGrainFactory grainFactory) { logger = log; this.grainFactory = grainFactory; localData = new MultiClusterData(); activeGatewaysByCluster = new Dictionary <string, List <SiloAddress> >(); confListeners = new HashSet <GrainReference>(); }
public IMultiClusterGossipData ApplyDataAndNotify(IMultiClusterGossipData data) { if (data.IsEmpty) { return(data); } MultiClusterData delta; MultiClusterData prev = this.localData; this.localData = prev.Merge(data, out delta); if (logger.IsEnabled(LogLevel.Trace)) { logger.Trace("ApplyDataAndNotify: delta {0}", delta); } if (delta.IsEmpty) { return(delta); } if (delta.Gateways.Count > 0) { // some gateways have changed ComputeAvailableGatewaysPerCluster(); } if (delta.Configuration != null) { // notify configuration listeners of change List <IMultiClusterConfigurationListener> listenersToNotify; lock (confListeners) { // make a copy under the lock listenersToNotify = confListeners.ToList(); } foreach (var listener in listenersToNotify) { try { listener.OnMultiClusterConfigurationChange(delta.Configuration); } catch (Exception exc) { logger.Error(ErrorCode.MultiClusterNetwork_LocalSubscriberException, String.Format("IMultiClusterConfigurationListener {0} threw exception processing configuration {1}", listener, delta.Configuration), exc); } } } return(delta); }
public async Task <IMultiClusterGossipData> Synchronize(IMultiClusterGossipData pushed) { logger.Debug("-Synchronize pushed:{0}", pushed); try { // read the entire table from storage var entriesFromStorage = await tableManager.ReadAllEntriesAsync(); var configInStorage = entriesFromStorage.Item1; var gatewayInfoInStorage = entriesFromStorage.Item2; // diff and write back configuration var configDeltaTask = DiffAndWriteBackConfigAsync(pushed.Configuration, configInStorage); // diff and write back gateway info for each gateway appearing locally or in storage var gatewayDeltaTasks = new List <Task <GatewayEntry> >(); var allAddresses = gatewayInfoInStorage.Keys.Union(pushed.Gateways.Keys); foreach (var address in allAddresses) { GatewayEntry pushedInfo = null; pushed.Gateways.TryGetValue(address, out pushedInfo); GossipTableEntry infoInStorage = null; gatewayInfoInStorage.TryGetValue(address, out infoInStorage); gatewayDeltaTasks.Add(DiffAndWriteBackGatewayInfoAsync(pushedInfo, infoInStorage)); } // wait for all the writeback tasks to complete // these are not batched because we want them to fail individually on e-tag conflicts, not all await configDeltaTask; await Task.WhenAll(gatewayDeltaTasks); // assemble delta pieces var gw = new Dictionary <SiloAddress, GatewayEntry>(); foreach (var t in gatewayDeltaTasks) { var d = t.Result; if (d != null) { gw.Add(d.SiloAddress, d); } } var delta = new MultiClusterData(gw, configDeltaTask.Result); logger.Debug("-Synchronize pulled delta:{0}", delta); return(delta); } catch (Exception e) { logger.Info("-Synchronize encountered exception {0}", e); throw e; } }
public void Publish(MultiClusterData data) { // add the data to the data waiting to be published toPublish = toPublish.Merge(data); if (oracle.logger.IsVerbose) LogQueuedPublish(toPublish); Notify(); }
public void Publish(IMultiClusterGossipData data) { // add the data to the data waiting to be published toPublish = toPublish.Merge(data); if (oracle.logger.IsEnabled(LogLevel.Debug)) { LogQueuedPublish(toPublish); } Notify(); }
/// <summary> /// incorporate source, producing new result, and report delta. /// Ignores expired entries in source, and removes expired entries from this. /// </summary> /// <param name="source">The source data to apply to the data in this object</param> /// <param name="delta">A delta of what changes were actually applied, used for change listeners</param> /// <returns>The updated data</returns> public MultiClusterData Merge(MultiClusterData source, out MultiClusterData delta) { //-- configuration var sourceConf = source.Configuration; var thisConf = this.Configuration; MultiClusterConfiguration resultConf; MultiClusterConfiguration deltaConf = null; if (MultiClusterConfiguration.OlderThan(thisConf, sourceConf)) { resultConf = sourceConf; deltaConf = sourceConf; } else { resultConf = thisConf; } //-- gateways var sourceList = source.Gateways; var thisList = this.Gateways; var resultList = new Dictionary <SiloAddress, GatewayEntry>(); var deltaList = new Dictionary <SiloAddress, GatewayEntry>(); foreach (var key in sourceList.Keys.Union(thisList.Keys).Distinct()) { GatewayEntry thisEntry; GatewayEntry sourceEntry; thisList.TryGetValue(key, out thisEntry); sourceList.TryGetValue(key, out sourceEntry); if (sourceEntry != null && !sourceEntry.Expired && (thisEntry == null || thisEntry.HeartbeatTimestamp < sourceEntry.HeartbeatTimestamp)) { resultList.Add(key, sourceEntry); deltaList.Add(key, sourceEntry); } else if (thisEntry != null) { if (!thisEntry.Expired) { resultList.Add(key, thisEntry); } else { deltaList.Add(key, thisEntry); } } } delta = new MultiClusterData(deltaList, deltaConf); return(new MultiClusterData(resultList, resultConf)); }
protected async override Task Publish(int id, MultiClusterData data) { oracle.logger.Verbose("-{0} Publish to channel {1} {2}", id, channel.Name, data); try { await channel.Publish(data); LastException = null; oracle.logger.Verbose("-{0} Publish to channel successful, answer={1}", id, data); } catch (Exception e) { oracle.logger.Warn(ErrorCode.MultiClusterNetwork_GossipCommunicationFailure, string.Format("-{0} Publish to channel {1} failed", id, channel.Name), e); LastException = e; } }
private void InjectConfiguration(ref MultiClusterData deltas) { if (this.injectedConfig == null) return; var data = new MultiClusterData(this.injectedConfig); this.injectedConfig = null; if (logger.IsVerbose) logger.Verbose("-InjectConfiguration {0}", data.Configuration.ToString()); var delta = this.localData.ApplyDataAndNotify(data); if (!delta.IsEmpty) deltas = deltas.Merge(delta); }
/// <summary> /// Returns all data of this object except for what keys appear in exclude /// </summary> /// <param name="exclude"></param> /// <returns></returns> public MultiClusterData Minus(MultiClusterData exclude) { IReadOnlyDictionary <SiloAddress, GatewayEntry> resultList; if (exclude.Gateways.Count == 0) { resultList = this.Gateways; } else { resultList = this.Gateways .Where(g => !exclude.Gateways.ContainsKey(g.Key)) .ToDictionary(g => g.Key, g => g.Value); } var resultConf = exclude.Configuration == null ? this.Configuration : null; return(new MultiClusterData(resultList, resultConf)); }
private void InjectConfiguration(ref MultiClusterData deltas) { if (this.injectedConfig == null) { return; } var data = new MultiClusterData(this.injectedConfig); this.injectedConfig = null; if (logger.IsEnabled(LogLevel.Debug)) { logger.Debug("-InjectConfiguration {0}", data.Configuration.ToString()); } var delta = this.localData.ApplyDataAndNotify(data); if (!delta.IsEmpty) { deltas = deltas.Merge(delta); } }
public MultiClusterData ApplyDataAndNotify(MultiClusterData data) { if (data.IsEmpty) { return(data); } MultiClusterData delta; MultiClusterData prev = this.localData; this.localData = prev.Merge(data, out delta); if (logger.IsVerbose2) { logger.Verbose2("ApplyDataAndNotify: delta {0}", delta); } if (delta.IsEmpty) { return(delta); } if (delta.Gateways.Count > 0) { // some gateways have changed ComputeAvailableGatewaysPerCluster(); } if (delta.Configuration != null) { // notify configuration listeners of change // code will be added in separate PR } return(delta); }
protected override void LogQueuedPublish(MultiClusterData data) { oracle.logger.Debug("enqueue publish to channel {0}, cumulative: {1}", channel.Name, data); }
protected abstract void LogQueuedPublish(MultiClusterData data);
protected abstract Task Publish(int id, MultiClusterData data);
// called in response to changed status, and periodically private void PublishChanges() { logger.Debug("--- PublishChanges: assess"); var activeLocalGateways = this.siloStatusOracle.GetApproximateMultiClusterGateways(); var iAmGateway = activeLocalGateways.Contains(Silo); // collect deltas that need to be published to all other gateways. // Most of the time, this will contain just zero or one change. var deltas = new MultiClusterData(); // Determine local status, and add to deltas if it changed InjectLocalStatus(iAmGateway, ref deltas); // Determine if admin has injected a new configuration, and add to deltas if that is the case InjectConfiguration(ref deltas); // Determine if there are some stale gateway entries of this cluster that should be demoted, // and add those demotions to deltas if (iAmGateway) { DemoteLocalGateways(activeLocalGateways, ref deltas); } if (logger.IsEnabled(LogLevel.Debug)) { logger.Debug("--- PublishChanges: found activeGateways={0} iAmGateway={1} publish={2}", string.Join(",", activeLocalGateways), iAmGateway, deltas); } if (!deltas.IsEmpty) { // Now we do the actual publishing. Note that we publish deltas only once and // simply log any errors without retrying. To handle problems // caused by lost messages we rely instead on the periodic background gossip: // each node periodically does full two-way gossip (Synchronize) with // some random other node or channel. This ensures all information // eventually gets everywhere. // publish deltas to all remote clusters foreach (var x in this.AllClusters().Where(x => x != this.clusterId)) { GetClusterWorker(x).Publish(deltas); } // publish deltas to all local silos var activeLocalClusterSilos = this.GetApproximateOtherActiveSilos(); foreach (var activeLocalClusterSilo in activeLocalClusterSilos) { GetSiloWorker(activeLocalClusterSilo).Publish(deltas); } // publish deltas to all gossip channels foreach (var ch in gossipChannels) { GetChannelWorker(ch).Publish(deltas); } } if (deltas.Gateways.ContainsKey(this.Silo) && deltas.Gateways[this.Silo].Status == GatewayStatus.Active) { // Fully synchronize with channels if we just went active, which helps with initial startup time. // Note: doing a partial publish just before this full synchronize is by design, so that it reduces stabilization // time when several Silos are starting up at the same time, and there already is information about each other // before they attempt the full gossip foreach (var ch in gossipChannels) { GetChannelWorker(ch).Synchronize(); } } logger.Debug("--- PublishChanges: done"); }
public MultiClusterData ApplyDataAndNotify(MultiClusterData data) { if (data.IsEmpty) { return(data); } MultiClusterData delta; MultiClusterData prev = this.localData; this.localData = prev.Merge(data, out delta); if (logger.IsEnabled(LogLevel.Trace)) { logger.Trace("ApplyDataAndNotify: delta {0}", delta); } if (delta.IsEmpty) { return(delta); } if (delta.Gateways.Count > 0) { // some gateways have changed ComputeAvailableGatewaysPerCluster(); } if (delta.Configuration != null) { // notify configuration listeners of change List <GrainReference> listenersToNotify; lock (confListeners) { // make a copy under the lock listenersToNotify = confListeners.ToList(); } foreach (var listener in listenersToNotify) { try { if (logger.IsEnabled(LogLevel.Trace)) { logger.Trace("-NotificationWork: notify IProtocolParticipant {0} of configuration {1}", listener, delta.Configuration); } // enqueue conf change event as grain call var g = this.grainFactory.Cast <ILogConsistencyProtocolParticipant>(listener); g.OnMultiClusterConfigurationChange(delta.Configuration).Ignore(); } catch (Exception exc) { logger.Error(ErrorCode.MultiClusterNetwork_LocalSubscriberException, String.Format("IProtocolParticipant {0} threw exception processing configuration {1}", listener, delta.Configuration), exc); } } } return(delta); }
internal MultiClusterOracleData(Logger log) { logger = log; localData = new MultiClusterData(); activeGatewaysByCluster = new Dictionary <string, List <SiloAddress> >(); }
/// <summary> /// merge source into this object, and return result. /// Ignores expired entries in source, and removes expired entries from this. /// </summary> /// <param name="source">The source data to apply to the data in this object</param> /// <returns>The updated data</returns> public MultiClusterData Merge(MultiClusterData source) { MultiClusterData ignore; return(Merge(source, out ignore)); }