public async void SendHeartbeats() { if (NodeStateService.Role != NodeState.Leader) { Logger.LogWarning("Detected request to send hearbeat as non-leader"); return; } var startTime = DateTime.Now; var recognizedhosts = 1; ConcurrentBag <Guid> unreachableNodes = new ConcurrentBag <Guid>(); var clients = _clusterConnectionPool.GetAllNodeClients(); if (clients.Count() < StateMachine.GetNodes().Count()) { _clusterConnectionPool.CheckClusterConnectionPool(); clients = _clusterConnectionPool.GetAllNodeClients(); } var tasks = clients.Where(nc => nc.Key != _nodeStorage.Id && nc.Value.Address != NodeStateService.Url).Select(async node => { try { //Add the match index if required if (!NodeStateService.NextIndex.ContainsKey(node.Key)) { NodeStateService.NextIndex.Add(node.Key, _nodeStorage.GetTotalLogCount() + 1); } if (!NodeStateService.MatchIndex.ContainsKey(node.Key)) { NodeStateService.MatchIndex.TryAdd(node.Key, 0); } Logger.LogDebug(NodeStateService.GetNodeLogId() + "Sending heartbeat to " + node.Key); var entriesToSend = new List <LogEntry>(); var prevLogIndex = Math.Max(0, NodeStateService.NextIndex[node.Key] - 1); var startingLogsToSend = NodeStateService.NextIndex[node.Key] == 0 ? 1 : NodeStateService.NextIndex[node.Key]; //if (startingLogsToSend <= _nodeStorage.GetTotalLogCount()) // { if (_nodeStorage.LastSnapshotIncludedIndex < startingLogsToSend) { var unsentLogs = _nodeStorage.GetTotalLogCount() - startingLogsToSend; int endingLogsToSend = (startingLogsToSend + (unsentLogs < ClusterOptions.MaxLogsToSend ? unsentLogs : ClusterOptions.MaxLogsToSend)); int prevLogTerm = 0; if (_nodeStorage.LastSnapshotIncludedIndex == prevLogIndex) { prevLogTerm = _nodeStorage.LastSnapshotIncludedTerm; } else { prevLogTerm = (_nodeStorage.GetTotalLogCount() > 0 && prevLogIndex > 0) ? _nodeStorage.GetLogAtIndex(prevLogIndex).Term : 0; } if (NodeStateService.NextIndex[node.Key] <= _nodeStorage.GetLastLogIndex() && _nodeStorage.GetLastLogIndex() != 0 && !LogsSent.GetOrAdd(node.Key, false)) { Logger.LogDebug(NodeStateService.GetNodeLogId() + node.Key + " Requires logs from " + startingLogsToSend + " to " + endingLogsToSend); entriesToSend = _nodeStorage.GetLogRange(startingLogsToSend, endingLogsToSend).OrderBy(log => log.Index).ToList(); // entriesToSend = _nodeStorage.Logs.Where(l => l.Index >= NextIndex[connector.Key]).ToList(); //Logger.LogDebug(NodeStateService.GetNodeLogId() + "Detected node " + node.Key + " is not upto date, sending logs from " + entriesToSend.First().Index + " to " + entriesToSend.Last().Index); } // Console.WriteLine("Sending logs with from " + entriesToSend.First().Index + " to " + entriesToSend.Last().Index + " sent " + entriesToSend.Count + "logs."); LogsSent.AddOrUpdate(node.Key, true, (key, oldvalue) => { return(true); }); var result = await _clusterClient.Send(node.Key, new AppendEntry() { Term = _nodeStorage.CurrentTerm, Entries = entriesToSend, LeaderCommit = NodeStateService.CommitIndex, LeaderId = _nodeStorage.Id, PrevLogIndex = prevLogIndex, PrevLogTerm = prevLogTerm }); LogsSent.TryUpdate(node.Key, false, true); if (result != null && node.Key != result.NodeId && result.NodeId != default(Guid)) { Logger.LogInformation("Detected change of client"); RemoveNodesFromCluster(new Guid[] { node.Key }); AddNodesToCluster(new NodeInformation[] { new NodeInformation { Id = result.NodeId, TransportAddress = node.Value.Address, IsContactable = true, Name = "", } }); } if (result == null) { Logger.LogError(NodeStateService.GetNodeLogId() + "The node " + node.Key + " has returned a empty response"); } else if (result != null && result.IsSuccessful) { Logger.LogDebug(NodeStateService.GetNodeLogId() + "Successfully updated logs on " + node.Key); if (entriesToSend.Count() > 0) { var lastIndexToSend = entriesToSend.Last().Index; NodeStateService.NextIndex[node.Key] = lastIndexToSend + 1; int previousValue; bool SuccessfullyGotValue = NodeStateService.MatchIndex.TryGetValue(node.Key, out previousValue); if (!SuccessfullyGotValue) { Logger.LogError("Concurrency issues encountered when getting the Next Match Index"); } var updateWorked = NodeStateService.MatchIndex.TryUpdate(node.Key, lastIndexToSend, previousValue); //If the updated did not execute, there hs been a concurrency issue while (!updateWorked) { SuccessfullyGotValue = NodeStateService.MatchIndex.TryGetValue(node.Key, out previousValue); // If the match index has already exceeded the previous value, dont bother updating it if (previousValue > lastIndexToSend && SuccessfullyGotValue) { updateWorked = true; } else { updateWorked = NodeStateService.MatchIndex.TryUpdate(node.Key, lastIndexToSend, previousValue); } } Logger.LogDebug(NodeStateService.GetNodeLogId() + "Updated match index to " + NodeStateService.MatchIndex); } } else if (result.ConflictName == AppendEntriesExceptionNames.MissingLogEntryException) { Logger.LogDebug(NodeStateService.GetNodeLogId() + "Detected node " + node.Key + " is missing the previous log, sending logs from log " + (result.LastLogEntryIndex.Value + 1)); NodeStateService.NextIndex[node.Key] = (result.LastLogEntryIndex.Value + 1); } else if (result.ConflictName == AppendEntriesExceptionNames.ConflictingLogEntryException) { var firstEntryOfTerm = _nodeStorage.Logs.Where(l => l.Value.Term == result.ConflictingTerm).FirstOrDefault(); var revertedIndex = firstEntryOfTerm.Value.Index < result.FirstTermIndex ? firstEntryOfTerm.Value.Index : result.FirstTermIndex.Value; Logger.LogDebug(NodeStateService.GetNodeLogId() + "Detected node " + node.Key + " has conflicting values, reverting to " + revertedIndex); //Revert back to the first index of that term NodeStateService.NextIndex[node.Key] = revertedIndex; } else if (result.ConflictName == AppendEntriesExceptionNames.OldTermException) { Logger.LogDebug(NodeStateService.GetNodeLogId() + "Detected node " + node.Key + " rejected heartbeat due to invalid leader term."); } } else { Logger.LogInformation("Detected pending snapshot to send, sending snapshot with included index " + _nodeStorage.LastSnapshotIncludedIndex); //Mark that you have send the logs to this node LogsSent.AddOrUpdate(node.Key, true, (key, oldvalue) => { return(true); }); var lastIndex = _nodeStorage.LastSnapshotIncludedIndex; var result = await _clusterClient.Send(node.Key, new InstallSnapshot() { Term = _nodeStorage.CurrentTerm, LastIncludedIndex = lastIndex, LastIncludedTerm = _nodeStorage.LastSnapshotIncludedTerm, LeaderId = _nodeStorage.Id, Snapshot = _nodeStorage.LastSnapshot }); if (result.IsSuccessful) { //mark the node has successfully received the logs LogsSent.TryUpdate(node.Key, false, true); NodeStateService.NextIndex[node.Key] = lastIndex + 1; } } /* } * //The node is up to date * else * { * var result = await _clusterClient.Send(node.Key, new AppendEntry() * { * Term = _nodeStorage.CurrentTerm, * Entries = entriesToSend, * LeaderCommit = NodeStateService.CommitIndex, * LeaderId = _nodeStorage.Id, * PrevLogIndex = prevLogIndex, * PrevLogTerm = _nodeStorage.GetLogAtIndex(prevLogIndex).Term * }); * } */ Interlocked.Increment(ref recognizedhosts); } catch (HttpRequestException e) { unreachableNodes.Add(node.Key); Logger.LogWarning(NodeStateService.GetNodeLogId() + "Heartbeat to node " + node.Key + " had a connection issue."); } catch (TaskCanceledException e) { unreachableNodes.Add(node.Key); Logger.LogWarning(NodeStateService.GetNodeLogId() + "Heartbeat to node " + node.Key + " timed out"); } catch (Exception e) { unreachableNodes.Add(node.Key); Logger.LogWarning(NodeStateService.GetNodeLogId() + "Encountered error while sending heartbeat to node " + node.Key + ", request failed with error \"" + e.GetType().Name + e.Message + Environment.NewLine + e.StackTrace);//+ "\"" + e.StackTrace); } }); await Task.WhenAll(tasks); //If less then the required number recognize the request, go back to being a candidate if (recognizedhosts < ClusterOptions.MinimumNodes) { SetNodeRole(NodeState.Candidate); } else { //RemoveNodesFromCluster(unreachableNodes.ToList()); } }
public IActionResult GetClients() { return(Ok(_clusterConnectionPool.GetAllNodeClients())); }