//private void OnMembershipChanged(ConfigChangeEventArgs args) //{ // // needs to be reviewed. Esp for the remote shard purposes. // //Refactoring required. // //if (_localShard != null) // // _localShard.OnMembershipChanged(args); // if (args.ConfigurationChangeType.Equals(ChangeType.PrimarySelected) && args.Membership.Primary != null || (args.ConfigurationChangeType.Equals(ChangeType.PrimaryGone) && args.Membership.Primary==null)) // { // HandlePrimaryChangeForRemoteshard(args); // //HandleMembershipChangeForRemoteShard(); // } //} private void HandlePrimaryChangeForRemoteshard(ConfigChangeEventArgs args) { try { if (_localShard.NodeRole == NodeRole.Primary) { if (_remoteShards != null && _remoteShards.Count > 0) { string clusterName = args.GetParamValue <string>(EventParamName.ClusterName); string shardName = args.GetParamValue <string>(EventParamName.ShardName); if (clusterName == null || shardName == null) { return; } foreach (KeyValuePair <String, IShard> remoteShard in _remoteShards) { if (args != null && clusterName == this.context.ClusterName && shardName == remoteShard.Key) { if (remoteShard.Value != null) { //if (!((RemoteShard)remoteShard.Value).IsStarted) // remoteShard.Value.Start(); //else //{ if (context.ConfigurationSession != null) { //RTD: Should the dependency on CS be removed? ClusterInfo latestInfo = context.ConfigurationSession.GetDatabaseClusterInfo(context.ClusterName); ShardInfo latestShard = null; if (latestInfo != null) { latestShard = latestInfo.GetShardInfo(remoteShard.Key); } if (latestShard != null) { ((RemoteShard)remoteShard.Value).OnPrimaryChanged(latestShard.Primary, latestShard.Port); } } //} break; } } } } //if (context.ConfigurationSession != null) //{ // ClusterInfo latestInfo = context.ConfigurationSession.GetDatabaseClusterInfo(context.ClusterName); // ShardInfo[] latestShards = latestInfo.ShardInfo; // if (latestShards.Length > 1) // { // foreach (ShardInfo info in latestShards) // { // if (info.Name != context.LocalShardName) // { // RemoteShard remoteShard = _remoteShards[info.Name] as RemoteShard; // if (remoteShard != null) // remoteShard.OnPrimaryChanged(info.Primary, info.Port); // } // } // } //} } } catch (Exception ex) { if (LoggerManager.Instance.ServerLogger != null && LoggerManager.Instance.ServerLogger.IsErrorEnabled) { LoggerManager.Instance.ServerLogger.Error("ClusterManager.HandlePrimaryChangedForRemoteshard() ", "HandlePrimaryChangeForRemoteshard :" + ex); } } }
/// <summary> /// Speculative Phase: /// 1. We search for the node with the latest op-log entry. This detail bears the highest value. /// 2. Next, if multiple nodes have the same op-log entry, we move onto the next step. /// 3. All those nodes which have the same op-log entry AND are connected to the CS are considered. /// 4. Highest priority from amongst these active nodes are taken into consideration. /// 5. If this node fulfills all of the above, it successfully passes the speculative phase. /// </summary> /// <param name="heartbeatReport"></param> /// <returns></returns> private bool ShouldIInitiateElection(LocalShardHeartbeatReporting heartbeatReport, Activity activity) { IList <Address> activeNodes = null; if (heartbeatReport != null) { activeNodes = heartbeatReport.GetReportTable.Keys.ToList(); OperationId maxOplog = null; IList <string> matchingOplogServerIPs = new List <string>(); HeartbeatInfo localHeartbeat = heartbeatReport.GetHeartbeatInfo(_context.LocalAddress); if (localHeartbeat == null) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsWarnEnabled) { LoggerManager.Instance.ShardLogger.Warn( "ElectionBasedMembershipStrategy.ShouldIInititateElections()", "local node heartbeat is null"); } return(false); } OperationId lastRepId = null; if (activity.Equals(Activity.TakeoverElectionsTriggered)) { HeartbeatInfo info = null; if (_shard != null && _shard.Primary != null) { info = heartbeatReport.GetHeartbeatInfo(_shard.Primary.Address); } } else { ShardConfiguration sConfig = _clusterConfigMgr.GetShardConfiguration(_context.LocalShardName); int configuredNodesCount = 0; OperationId OpIdAtCS = null; if (sConfig != null && sConfig.Servers != null && sConfig.Servers.Nodes != null) { configuredNodesCount = sConfig.Servers.Nodes.Count; } if (configuredNodesCount > 0 && activeNodes != null && activeNodes.Count < configuredNodesCount) { ShardInfo sInfo = null; ClusterInfo cInfo = _context.ConfigurationSession.GetDatabaseClusterInfo(_context.ClusterName); if (cInfo != null) { sInfo = cInfo.GetShardInfo(_context.LocalShardName); } if (sInfo != null) { OpIdAtCS = sInfo.LastOperationId; } if (OpIdAtCS > lastRepId) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { LoggerManager.Instance.ShardLogger.Info( "electBasedMemSt.ShouldIInitElections()", "CS has an operation newer than my operation. Hence, waiting."); } if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsInfoEnabled) { if (OpIdAtCS != null) { LoggerManager.Instance.ShardLogger.Info( "electBasedMemSt.ShouldIInitElections()", "Operation ID on the CS:- " + OpIdAtCS.ElectionId + ":" + OpIdAtCS.ElectionBasedSequenceId); } else { LoggerManager.Instance.ShardLogger.Info( "electBasedMemSt.ShouldIInitElections()", "The operation ID at the CS is set to null."); } if (lastRepId != null) { LoggerManager.Instance.ShardLogger.Info( "electBasedMemSt.ShouldIInitElections()", "Local node Operation ID:- " + lastRepId.ElectionId + ":" + lastRepId.ElectionBasedSequenceId); } else { LoggerManager.Instance.ShardLogger.Info( "electBasedMemSt.ShouldIInitElections()", "The local node operation ID is set to null."); } } //We maintain the last replicated operation log entry with the CS. //If a node in a shard with older data (usually the previous secondary) is up before the node with the //latest data(usually the previous primary), it waits for a configurable amount of time (2 minutes for //now) before proceeding with the election procedure if it is still unable to detect a primary node. //This way we give the node with the latest data a chance to become primary and therefore avoid data loss. lock (_mutexOnWait) { Monitor.Wait(_mutexOnWait, _waitTimeout); } } } for (int i = 0; i < activeNodes.Count; i++) { HeartbeatInfo info = heartbeatReport.GetHeartbeatInfo(activeNodes[i]); OperationId currIndexOplog = info.LastOplogOperationId; if (currIndexOplog > maxOplog) { maxOplog = currIndexOplog; } if (((localHeartbeat.LastOplogOperationId == null && info.LastOplogOperationId == null) || localHeartbeat.LastOplogOperationId != null && localHeartbeat.LastOplogOperationId.Equals(info.LastOplogOperationId)) && info.CSStatus == ConnectivityStatus.Connected) { matchingOplogServerIPs.Add(activeNodes[i].IpAddress.ToString()); } } } if (localHeartbeat.LastOplogOperationId != null && maxOplog != null && maxOplog > localHeartbeat.LastOplogOperationId && (lastRepId == null || maxOplog > lastRepId)) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.ShouldIInitiateElect()", "Local operation log is behind max op log wrt " + _context.LocalShardName + " shard."); } if (maxOplog != null && LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.ShouldIInitiateElect()", "maxOplog: " + maxOplog.ElectionId + ":" + maxOplog.ElectionBasedSequenceId); } if (localHeartbeat.LastOplogOperationId != null && LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.ShouldIInitiateElect()", "local opLog (from the heartbeat): " + localHeartbeat.LastOplogOperationId.ElectionId + ":" + localHeartbeat.LastOplogOperationId.ElectionBasedSequenceId); } if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled && lastRepId != null) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.ShouldIInitiateElect()", "LastOpFromOpLog (from the replication module): " + lastRepId.ElectionId + ":" + lastRepId.ElectionBasedSequenceId); } return(false); } else if (maxOplog == localHeartbeat.LastOplogOperationId || (lastRepId != null && lastRepId.Equals(maxOplog))) { //if: there are multiple nodes that have the same oplog entry, //decision will be made on the basis of the priorities. //else: the node with the highest oplog entry will be considered eligible. if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.ShouldIInitiateElect()", "Local operation log is equal to the max op log wrt " + _context.LocalShardName + " shard."); } if (maxOplog != null && (localHeartbeat != null && localHeartbeat.LastOplogOperationId != null)) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.ShouldIInitiateElect()", "maxOplog: " + maxOplog.ElectionId + ":" + maxOplog.ElectionBasedSequenceId); } if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.ShouldIInitiateElect()", "local opLog (from the heartbeat): " + localHeartbeat.LastOplogOperationId.ElectionId + ":" + localHeartbeat.LastOplogOperationId.ElectionBasedSequenceId); } if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled && lastRepId != null) { LoggerManager.Instance.ShardLogger.Debug("electBasedMemSt.ShouldIInitiateElect()", "LastOpFromOpLog (from the replication module): " + lastRepId.ElectionId + ":" + lastRepId.ElectionBasedSequenceId); } } if (matchingOplogServerIPs.Count > 0) { int highestRunningNodePriority = _manager.GetHighestNodePriority(activeNodes, matchingOplogServerIPs); if (highestRunningNodePriority.Equals(_manager.LocalServerPriority)) { if (LoggerManager.Instance.ShardLogger != null && LoggerManager.Instance.ShardLogger.IsDebugEnabled) { LoggerManager.Instance.ShardLogger.Debug( "ElectionBasedMembershipStrategy.ShouldIInitiateElection()", "Node : " + _context.LocalAddress.IpAddress.ToString() + " in shard: " + _context.LocalShardName + " is eligible having priority: " + highestRunningNodePriority + " ."); } return(true); } else { return(false); } } } } return(false); }
public void OnConfigurationChanged(ConfigChangeEventArgs arguments) { ChangeType type = ChangeType.None; if (arguments != null) { string clusterName = arguments.GetParamValue <string>(EventParamName.ClusterName); if (clusterName != null && !clusterName.Equals(context.ClusterName)) { return; } type = arguments.GetParamValue <ChangeType>(EventParamName.ConfigurationChangeType); switch (type) { case ChangeType.DistributionStrategyConfigured: case ChangeType.DatabaseCreated: case ChangeType.DatabaseDropped: case ChangeType.CollectionCreated: case ChangeType.CollectionMoved: case ChangeType.CollectionDropped: case ChangeType.ConfigRestored: case ChangeType.ResyncDatabase: case ChangeType.IntraShardStateTrxferCompleted: if (this.configChangeListener != null) { configChangeListener.OnConfigurationChanged(arguments); } break; case ChangeType.ConfigurationUpdated: break; case ChangeType.ShardAdded: { ShardInfo newShard = null; if (context.ConfigurationSession != null) { ClusterInfo latestInfo = context.ConfigurationSession.GetDatabaseClusterInfo(arguments.GetParamValue <string>(EventParamName.ClusterName)); newShard = latestInfo.GetShardInfo(arguments.GetParamValue <string>(EventParamName.ShardName)); } OnShardAdded(newShard); if (this._clusterListener != null) { _clusterListener.OnShardAdd(newShard); } } break; case ChangeType.ShardRemovedForceful: if (this._clusterListener != null) { ShardInfo removedShard = new ShardInfo() { Name = arguments.GetParamValue <string>(EventParamName.ShardName) }; _clusterListener.OnShardRemove(removedShard, false); } break; case ChangeType.ShardRemovedGraceful: if (this._clusterListener != null) { ShardInfo removedShard = new ShardInfo() { Name = arguments.GetParamValue <string>(EventParamName.ShardName) }; _clusterListener.OnShardRemove(removedShard, true); } break; case ChangeType.DistributionChanged: if (this._clusterListener != null) { _clusterListener.OnDistributionChanged(); } break; //write code for check if the primary has been changed for remote shard(s) connect with the new one case ChangeType.MembershipChanged: case ChangeType.PrimarySelected: case ChangeType.NodeJoined: // AR: This should be removing the node from the restoration manager of the local shard. //if (arguments.ConfigurationChangeType == ChangeType.PrimarySelected || arguments.ConfigurationChangeType == ChangeType.PrimaryGone) //{ // ControlConfigurationChangeTask(arguments); if (arguments.GetParamValue <ChangeType>(EventParamName.ConfigurationChangeType) == ChangeType.MembershipChanged) { HandlePrimaryChangeForRemoteshard(arguments); } //} //if (arguments.ConfigurationChangeType.Equals(ChangeType.PrimarySelected) && arguments.Membership.Primary != null || (arguments.ConfigurationChangeType.Equals(ChangeType.PrimaryGone) && arguments.Membership.Primary == null)) //{ //HandleMembershipChangeForRemoteShard(); //} //OnMembershipChanged(arguments); //HandlePrimaryChangeForRemoteshard(arguments); //HandleMembershipChanged(); break; case ChangeType.NodeAdded: case ChangeType.NodeRemoved: case ChangeType.PriorityChanged: case ChangeType.NodeLeft: case ChangeType.PrimaryGone: if (_localShard != null) { ((LocalShard)_localShard).OnConfigurationChanged(arguments); } break; case ChangeType.RangeUpdated: _clusterListener.OnRangesUpdated(); break; case ChangeType.NewRangeAdded: _clusterListener.OnNewRangeAdded(); break; default: //write code for check if the primary has been changed for remote shard(s) connect with the new one break; } } }