private void StabilizeSuccessorsCache(BackgroundTaskArgs args) { Log.Write(LogEvent.Debug, "Calling stabilize successor cache for node {0}", LocalNode); var successorNodeChannel = CommunicationMgr.GetChannel(LocalNode.Successor); IOverlayNode successorNodeSuccessor = null; IOverlayNode[] successorNodeSuccessorCache = null; try { // first entry in my successor cache -> the successor of my successor successorNodeSuccessor = successorNodeChannel.Service.GetSuccessor(); } catch (Exception ex) { if (successorNodeChannel.IsUnavailable) { Log.Write( LogEvent.Warn, "Cannot perform stabilize successor cache because service for node {0} successor ({1}) is unavailable, skipping until successor is fixed. Error details: \r\n {2}", LocalNode, LocalNode.Successor, ex.ToString() ); // part of stabilize routine: if node successor has failed, replace it with the next successor from the successor cache Status = MaintenanceStatus.FixingSuccessor; // repeat stabilize immediately args.IdleTimeMs = 0; return; } else { throw; } } try { // 2 to n entries in my successor cache -> 1 to n-1 entries in my successor's successor cache successorNodeSuccessorCache = successorNodeChannel.Service.GetSuccessorCache(); } catch (Exception ex) { if (successorNodeChannel.IsUnavailable) { Log.Write( LogEvent.Warn, "Cannot get successor successor cache because service for node {0} successor ({1}) is unavailable, trying to reassign first available cached successor as successor. Error details: \r\n {2}", LocalNode, LocalNode.Successor, ex.ToString() ); // part of stabilize routine: if node successor has failed, replace it with the next successor from the successor cache Status = MaintenanceStatus.FixingSuccessor; // repeat stabilize immediately args.IdleTimeMs = 0; return; } else { throw; } } // everything is ok, so we can stabilize successors cache for (var i = 0; i < OverlayNodeSrv.Params.SuccessorCacheSize; ++i) { if (i == 0) { if (successorNodeSuccessor != null) { LocalNode.SuccessorCache[i] = successorNodeSuccessor; } } else { if (successorNodeSuccessorCache != null && successorNodeSuccessorCache[i-1] != null) { LocalNode.SuccessorCache[i] = successorNodeSuccessorCache[i-1]; } } } Log.Write(LogEvent.Debug, "Finished stabilizing successor cache for node {0}", LocalNode); }
private void FixFingers(BackgroundTaskArgs args, int? fingerRowIdx = null) { var actualFingerRowIdx = fingerRowIdx.HasValue ? (fingerRowIdx.Value > OverlayNodeSrv.Params.RingLength - 1 ? 0 : fingerRowIdx.Value ) : ChordOverlayHelper.GetRandomFingerTableIndex(OverlayNodeSrv.Params.RingLength); Log.Write(LogEvent.Debug, "Calling fix fingers for node {0}, finger row position {1}", LocalNode, actualFingerRowIdx); var successorForFinger = OverlayNodeSrv.FindKeySuccessor(new FindKeySuccessorArg(LocalNode.Fingers[actualFingerRowIdx].Start, true)).Node; var successorForFingerChannel = CommunicationMgr.GetChannel(successorForFinger); try { successorForFingerChannel.Service.Ping(); LocalNode.Fingers[actualFingerRowIdx] = new Finger { Node = successorForFinger, Start = ChordOverlayHelper.GetFingerStart(LocalNode.Id, actualFingerRowIdx, OverlayNodeSrv.Params.RingLength) }; Log.Write(LogEvent.Debug, "Finished fixing fingers for node {0}, finger row position {1}", LocalNode, actualFingerRowIdx); } catch (Exception ex) { // if remote service is no good -> proceed to the next successor if (successorForFingerChannel.IsUnavailable) { Log.Write( LogEvent.Debug, "FixFingers for node {0} failed because successor {1} for finger {2} is unavailable. Skipping until successor is fixed. Error details: \r\n {3}", LocalNode, successorForFinger, LocalNode.Fingers[actualFingerRowIdx].Start, ex ); // part of stabilize routine: if node successor has failed, replace it with the next successor from the successor cache Status = MaintenanceStatus.FixingSuccessor; // repeat stabilize immediately args.IdleTimeMs = 0; return; } else { throw; } } }
private void Stabilize(BackgroundTaskArgs args) { Log.Write(LogEvent.Debug, "Calling stabilize for node {0}", LocalNode); var successorNodeChannel = CommunicationMgr.GetChannel(LocalNode.Successor); IOverlayNode succPredecessor = null; try { succPredecessor = successorNodeChannel.Service.GetPredecessor(); } catch (Exception ex) { if (successorNodeChannel.IsUnavailable) { Log.Write( LogEvent.Warn, "Cannot perform stabilization because service for node {0} successor ({1}) is unavailable, trying to reassign first available cached successor as successor. Error details: \r\n {2}", LocalNode, LocalNode.Successor, ex.ToString() ); // part of stabilize routine: if node successor has failed, replace it with the next successor from the successor cache Status = MaintenanceStatus.FixingSuccessor; return; } else { throw; } } if (IsNodeFailed(succPredecessor)) { // if the successor node thinks that his predecessor is the node that we know has failed, we need to tell our successor to set predecessor (i.e. to set us as its predecessor) successorNodeChannel.Service.SetPredecessor(LocalNode); // ??? remove failed node from the list (not needed anymore) FailedNodes.Remove(succPredecessor.Id); // restart stabilitzation cycle return; } if (succPredecessor != null) { // include right? if (ChordOverlayHelper.IsInCircularInterval(succPredecessor.Id, LocalNode.Id, LocalNode.Successor.Id)) { Log.Write(LogEvent.Debug, "Successor for node {0} changed from {1} to {2} during stabilization", LocalNode, LocalNode.Successor, succPredecessor); var previousSuccessor = LocalNode.Successor; LocalNode.Successor = succPredecessor; if (!previousSuccessor.Equals(LocalNode.Successor)) { // get the service for changed successor successorNodeChannel = CommunicationMgr.GetChannel(LocalNode.Successor); } } } try { successorNodeChannel.Service.FixPredecessor(LocalNode); Log.Write(LogEvent.Debug, "Finished stabilizing for node {0}", LocalNode); } catch (Exception ex) { if (successorNodeChannel.IsUnavailable) { Log.Write( LogEvent.Warn, "Cannot perform stabilization fix predecessor because service for node {0} successor ({1}) is unavailable, skipping until successor is fixed. Error details: \r\n {2}", LocalNode, LocalNode.Successor, ex.ToString() ); // part of stabilize routine: if node successor has failed, replace it with the next successor from the successor cache Status = MaintenanceStatus.FixingSuccessor; // repeat stabilize immediately args.IdleTimeMs = 0; return; } else { throw; } } }
public void Start() { // reset Stop(); Cancellation = new CancellationTokenSource(); var stabilizeTask = TaskFactory.StartNew( () => { var fingerRowIdxToFix = 0; while (!Cancellation.IsCancellationRequested) { Log.Write(LogEvent.Debug, "New stabilization cycle for node {0}", LocalNode); if (IsFixingSuccessor) { Log.Write(LogEvent.Debug, "Putting node {0} into failed nodes list, trying to perform fixing successor step", LocalNode.Successor.Id); FailedNodes.Add(LocalNode.Successor.Id); if (!SetSuccessorFromCache()) { // either all nodes are failed or only local nodes are available - something wrong, we need to try rejoining SetRejoinRequested(); continue; } else { Status = MaintenanceStatus.Started; } } var argsStab = new BackgroundTaskArgs { IdleTimeMs = AfterStabilizeWaitInterval }; var argsStabSc = new BackgroundTaskArgs { IdleTimeMs = AfterStabilizeSuccCacheWaitInterval }; var argsFf = new BackgroundTaskArgs { IdleTimeMs = AfterFixFingersWaitInterval }; if (!Cancellation.IsCancellationRequested && !IsFixingSuccessor) { Stabilize(argsStab); Log.Write(LogEvent.Debug, "'Stabilize' for node {0} is finished", LocalNode); } else { continue; } if (!Cancellation.IsCancellationRequested && !IsFixingSuccessor) { StabilizeSuccessorsCache(argsStabSc); Log.Write(LogEvent.Debug, "'StabilizeSuccessorsCache' for node {0} is finished", LocalNode); } else { continue; } if (!Cancellation.IsCancellationRequested && !IsFixingSuccessor) { FixFingers(argsFf, fingerRowIdxToFix); Log.Write(LogEvent.Debug, "'FixFingers' for node {0} is finished", LocalNode); } else { continue; } if (!Cancellation.IsCancellationRequested && !IsFixingSuccessor) { fingerRowIdxToFix = fingerRowIdxToFix == OverlayNodeSrv.Params.RingLength - 1 ? 0 : fingerRowIdxToFix + 1; } else { Log.Write(LogEvent.Debug, "Exiting from stabilization cycle for node {0}", LocalNode); continue; } var ttw = Math.Min(argsFf.IdleTimeMs, Math.Min(argsStab.IdleTimeMs, argsStabSc.IdleTimeMs)); Log.Write(LogEvent.Debug, "Stabilization cycle for node {0} is finished, waiting for {1} ms", LocalNode, ttw); Thread.Sleep(ttw); } }, Cancellation.Token ); RunningTasks.Add(stabilizeTask); Status = MaintenanceStatus.Started; }
private void StabilizeSuccessorsCache(BackgroundTaskArgs args) { Log.Write(LogEvent.Debug, "Calling stabilize successor cache for node {0}", LocalNode.Endpoint); var successorNodeSrv = NodeServices.GetRemoteNodeService(LocalNode.Successor); NodeDescriptor successorNodeSuccessor = null; NodeDescriptor[] successorNodeSuccessorCache = null; try { // first entry in my successor cache -> the successor of my successor successorNodeSuccessor = successorNodeSrv.Service.GetNodeSuccessor(); } catch (Exception ex) { if (successorNodeSrv.IsUnavailable) { Log.Write( LogEvent.Warn, "Cannot perform stabilize successor cache because service for node {0} successor ({1}) is unavailable, trying to reassign first available cached successor as successor. Error details: \r\n {2}", LocalNode.Endpoint, LocalNode.Successor, ex.ToString() ); // part of stabilize routine: if node successor has failed, replace it with the next successor from the successor cache if (!SetSuccessorFromCache()) { // either all nodes are failed or only local nodes are available - something wrong, we need to try rejoining SetRejoinRequested(); return; } else { // repeat stabilize immediately args.IdleTimeMs = 0; return; } } else { throw; } } try { // 2 to n entries in my successor cache -> 1 to n-1 entries in my successor's successor cache successorNodeSuccessorCache = successorNodeSrv.Service.GetNodeSuccessorCache(); } catch (Exception ex) { if (successorNodeSrv.IsUnavailable) { Log.Write( LogEvent.Warn, "Cannot get successor successor cache because service for node {0} successor ({1}) is unavailable, trying to reassign first available cached successor as successor. Error details: \r\n {2}", LocalNode.Endpoint, LocalNode.Successor, ex.ToString() ); // part of stabilize routine: if node successor has failed, replace it with the next successor from the successor cache if (!SetSuccessorFromCache()) { // either all nodes are failed or only local nodes are available - something wrong, we need to try rejoining SetRejoinRequested(); return; } else { // repeat stabilize immediately args.IdleTimeMs = 0; return; } } else { throw; } } // everything is ok, so we can stabilize successors cache for (var i = 0; i < LocalNode.SuccessorCacheSize; ++i) { if (i == 0) { if (successorNodeSuccessor != null) { LocalNode.SuccessorCache[i] = successorNodeSuccessor; } } else { if (successorNodeSuccessorCache != null && successorNodeSuccessorCache[i-1] != null) { LocalNode.SuccessorCache[i] = successorNodeSuccessorCache[i-1]; } } } Log.Write(LogEvent.Debug, "Finished stabilizing successor cache for node {0}", LocalNode.Endpoint); }
private void Stabilize(BackgroundTaskArgs args) { Log.Write(LogEvent.Debug, "Calling stabilize for node {0}", LocalNode.Endpoint); var successorNodeSrv = NodeServices.GetRemoteNodeService(LocalNode.Successor); NodeDescriptor succPredecessor = null; try { succPredecessor = successorNodeSrv.Service.GetNodePredecessor(); } catch (Exception ex) { if (successorNodeSrv.IsUnavailable) { Log.Write( LogEvent.Warn, "Cannot perform stabilization because service for node {0} successor ({1}) is unavailable, trying to reassign first available cached successor as successor. Error details: \r\n {2}", LocalNode.Endpoint, LocalNode.Successor, ex.ToString() ); // part of stabilize routine: if node successor has failed, replace it with the next successor from the successor cache if (!SetSuccessorFromCache()) { // either all nodes are failed or only local nodes are available - something wrong, we need to try rejoining SetRejoinRequested(); return; } else { // repeat stabilize immediately args.IdleTimeMs = 0; return; } } else { throw; } } if (succPredecessor != null) { // include right? if (TopologyHelper.IsInCircularInterval(succPredecessor.Id, LocalNode.Id, LocalNode.Successor.Id)) { Log.Write(LogEvent.Debug, "Successor for node {0} changed from {1} to {2} during stabilization", LocalNode.Endpoint, LocalNode.Successor, succPredecessor); var previousSuccessor = LocalNode.Successor; LocalNode.Successor = succPredecessor; if (!previousSuccessor.Equals(LocalNode.Successor)) { // get the service for changed successor successorNodeSrv = NodeServices.GetRemoteNodeService(LocalNode.Successor); } } } try { successorNodeSrv.Service.FixPredecessor(LocalNode.Endpoint); Log.Write(LogEvent.Debug, "Finished stabilizing for node {0}", LocalNode.Endpoint); } catch (Exception ex) { if (successorNodeSrv.IsUnavailable) { Log.Write( LogEvent.Warn, "Cannot perform stabilization fix predecessor because service for node {0} successor ({1}) is unavailable, trying to reassign first available cached successor as successor. Error details: \r\n {2}", LocalNode.Endpoint, LocalNode.Successor, ex.ToString() ); // part of stabilize routine: if node successor has failed, replace it with the next successor from the successor cache if (!SetSuccessorFromCache()) { // either all nodes are failed or only local nodes are available - something wrong, we need to try rejoining SetRejoinRequested(); return; } else { // repeat stabilize immediately args.IdleTimeMs = 0; return; } } else { throw; } } }
private void FixFingers(BackgroundTaskArgs args, int? fingerRowIdx = null) { var actualFingerRowIdx = fingerRowIdx.HasValue ? (fingerRowIdx.Value > LocalInstance.RingLength - 1 ? 0 : fingerRowIdx.Value ) : LocalNode.GetRandomFingerTableIndex(); Log.Write(LogEvent.Debug, "Calling fix fingers for node {0}, finger row position {1}", LocalNode.Endpoint, actualFingerRowIdx); var successorForFinger = LocalInstance.FindSuccessorForId(LocalNode.Fingers[actualFingerRowIdx].Key); LocalNode.Fingers[actualFingerRowIdx] = new KeyValuePair<ulong, NodeDescriptor>(TopologyHelper.GetFingerTableKey(LocalNode.Id, actualFingerRowIdx, LocalInstance.RingLength), successorForFinger); Log.Write(LogEvent.Debug, "Finished fixing fingers for node {0}, finger row position {1}", LocalNode.Endpoint, actualFingerRowIdx); }
public void Start() { // reset Stop(); Cancellation = new CancellationTokenSource(); var stabilizeTask = TaskFactory.StartNew( () => { var fingerRowIdxToFix = 0; while (!Cancellation.IsCancellationRequested) { Log.Write(LogEvent.Debug, "New stabilization cycle for node {0}", LocalNode); var argsStab = new BackgroundTaskArgs { IdleTimeMs = StabilizeIdleTimeMs }; var argsStabSc = new BackgroundTaskArgs { IdleTimeMs = StabilizeSuccessorCacheIdleTimeMs }; var argsFf = new BackgroundTaskArgs { IdleTimeMs = FixFingersIdleTimeMs }; if (!Cancellation.IsCancellationRequested) { Stabilize(argsStab); Log.Write(LogEvent.Debug, "'Stabilize' for node {0} is finished", LocalNode); } else { break; } if (!Cancellation.IsCancellationRequested) { StabilizeSuccessorsCache(argsStabSc); Log.Write(LogEvent.Debug, "'StabilizeSuccessorsCache' for node {0} is finished", LocalNode); } else { break; } if (!Cancellation.IsCancellationRequested) { FixFingers(argsFf, fingerRowIdxToFix); Log.Write(LogEvent.Debug, "'FixFingers' for node {0} is finished", LocalNode); } else { break; } if (!Cancellation.IsCancellationRequested) { fingerRowIdxToFix = fingerRowIdxToFix == LocalInstance.RingLength - 1 ? 0 : fingerRowIdxToFix + 1; var ttw = Math.Min(argsStab.IdleTimeMs, argsStabSc.IdleTimeMs); Log.Write(LogEvent.Debug, "Stabilization cycle for node {0} is finished, waiting for {1} ms", LocalNode, ttw); Thread.Sleep(ttw); } else { Log.Write(LogEvent.Debug, "Exiting from tabilization cycle for node {0}", LocalNode); break; } } }, Cancellation.Token ); RunningTasks.Add(stabilizeTask); Status = MaintenanceStatus.Started; }