Example #1
0
        private async Task AcceptConnection(TcpListener tcpListener, RachisConsensus rachis)
        {
            rachis.OnDispose += (sender, args) => tcpListener.Stop();

            while (true)
            {
                TcpClient tcpClient;
                try
                {
                    tcpClient = await tcpListener.AcceptTcpClientAsync();
                }
                catch (InvalidOperationException)
                {
                    break;
                }
                var stream = tcpClient.GetStream();
                try
                {
                    rachis.AcceptNewConnection(stream, () => tcpClient.Client.Disconnect(false), tcpClient.Client.RemoteEndPoint, hello =>
                    {
                        if (rachis.Url == null)
                        {
                            return;
                        }

                        lock (this)
                        {
                            if (_rejectionList.TryGetValue(rachis.Url, out var set))
                            {
                                if (set.Contains(hello.DebugSourceIdentifier))
                                {
                                    throw new InvalidComObjectException("Simulated failure");
                                }
                            }
                            var connections = _connections.GetOrAdd(rachis.Url, _ => new ConcurrentSet <Tuple <string, TcpClient> >());
                            connections.Add(Tuple.Create(hello.DebugSourceIdentifier, tcpClient));
                        }
                    });
                }
                catch
                {
                    // expected
                }
            }
        }
Example #2
0
        protected RachisConsensus <CountingStateMachine> SetupServer(bool bootstrap = false, int port = 0, [CallerMemberName] string caller = null)
        {
            var tcpListener = new TcpListener(IPAddress.Loopback, port);

            tcpListener.Start();
            var ch = (char)(66 + _count++);

            if (bootstrap)
            {
                ch = (char)65;
                _count--;
            }

            var url = "tcp://localhost:" + ((IPEndPoint)tcpListener.LocalEndpoint).Port + "/?" + caller + "#" + ch;

            var server = StorageEnvironmentOptions.CreateMemoryOnly();

            int seed          = PredictableSeeds ? _random.Next(int.MaxValue) : _count;
            var configuration = new RavenConfiguration(caller, ResourceType.Server);

            configuration.Initialize();

            var serverStore        = new RavenServer(configuration).ServerStore;
            var rachis             = new RachisConsensus <CountingStateMachine>(serverStore, seed);
            var storageEnvironment = new StorageEnvironment(server);

            rachis.Initialize(storageEnvironment, configuration, configuration.Core.ServerUrl);
            rachis.OnDispose += (sender, args) =>
            {
                storageEnvironment.Dispose();
            };
            if (bootstrap)
            {
                rachis.Bootstrap(url);
            }

            rachis.Url = url;
            _listeners.Add(tcpListener);
            RachisConsensuses.Add(rachis);
            var task = AcceptConnection(tcpListener, rachis);

            return(rachis);
        }
Example #3
0
        private async Task Concurrent_SendAppendEntriesPendingToLeaderAsync(CancellationTokenSource cts, long currentTerm, long lastLogIndex)
        {
            var timeoutPeriod = _engine.Timeout.TimeoutPeriod / 4;
            var timeToWait    = TimeSpan.FromMilliseconds(timeoutPeriod);

            using (_engine.ContextPool.AllocateOperationContext(out JsonOperationContext timeoutCtx))
            {
                while (cts.IsCancellationRequested == false)
                {
                    try
                    {
                        timeoutCtx.Reset();
                        timeoutCtx.Renew();

                        await TimeoutManager.WaitFor(timeToWait, cts.Token);

                        if (cts.IsCancellationRequested)
                        {
                            break;
                        }
                        _engine.Timeout.Defer(_connection.Source);
                        _connection.Send(timeoutCtx, new AppendEntriesResponse
                        {
                            Pending      = true,
                            Success      = false,
                            CurrentTerm  = currentTerm,
                            LastLogIndex = lastLogIndex
                        });
                    }
                    catch (TimeoutException)
                    {
                        break;
                    }
                    catch (Exception e) when(RachisConsensus.IsExpectedException(e))
                    {
                        break;
                    }
                }
            }
        }
Example #4
0
        public ClusterObserver(
            ServerStore server,
            ClusterMaintenanceSupervisor maintenance,
            RachisConsensus <ClusterStateMachine> engine,
            TransactionContextPool contextPool,
            CancellationToken token)
        {
            _maintenance = maintenance;
            _nodeTag     = server.NodeTag;
            _server      = server;
            _engine      = engine;
            _contextPool = contextPool;
            _logger      = LoggingSource.Instance.GetLogger <ClusterObserver>(_nodeTag);
            _cts         = CancellationTokenSource.CreateLinkedTokenSource(token);

            var config = server.Configuration.Cluster;

            SupervisorSamplePeriod   = config.SupervisorSamplePeriod.AsTimeSpan;
            _stabilizationTime       = (long)config.StabilizationTime.AsTimeSpan.TotalMilliseconds;
            _breakdownTimeout        = config.AddReplicaTimeout.AsTimeSpan;
            _hardDeleteOnReplacement = config.HardDeleteOnReplacement;
            _observe = Run(_cts.Token);
        }
Example #5
0
 public Leader(RachisConsensus engine, long term)
 {
     Term    = term;
     _engine = engine;
 }
Example #6
0
 public Elector(RachisConsensus engine, RemoteConnection connection)
 {
     _engine     = engine;
     _connection = connection;
 }
Example #7
0
        private void ReadAndCommitSnapshot(ClusterOperationContext context, InstallSnapshot snapshot, CancellationToken token)
        {
            using (context.OpenWriteTransaction())
            {
                var lastTerm        = _engine.GetTermFor(context, snapshot.LastIncludedIndex);
                var lastCommitIndex = _engine.GetLastEntryIndex(context);

                if (_engine.GetSnapshotRequest(context) == false &&
                    snapshot.LastIncludedTerm == lastTerm && snapshot.LastIncludedIndex < lastCommitIndex)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"{ToString()}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it");
                    }

                    //This is okay to ignore because we will just get the committed entries again and skip them
                    ReadInstallSnapshotAndIgnoreContent(token);
                }
                else if (InstallSnapshot(context, token))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"{ToString()}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} ");
                    }

                    _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                    _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                }
                else
                {
                    var lastEntryIndex = _engine.GetLastEntryIndex(context);
                    if (lastEntryIndex < snapshot.LastIncludedIndex)
                    {
                        var message =
                            $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}";
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"{ToString()}: {message}");
                        }

                        throw new InvalidOperationException(message);
                    }
                }

                // snapshot always has the latest topology
                if (snapshot.Topology == null)
                {
                    const string message = "Expected to get topology on snapshot";
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: {message}");
                    }

                    throw new InvalidOperationException(message);
                }

                using (var topologyJson = context.ReadObject(snapshot.Topology, "topology"))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: topology on install snapshot: {topologyJson}");
                    }

                    var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson);

                    RachisConsensus.SetTopology(_engine, context, topology);
                }

                _engine.SetSnapshotRequest(context, false);

                context.Transaction.InnerTransaction.LowLevelTransaction.OnDispose += t =>
                {
                    if (t is LowLevelTransaction llt && llt.Committed)
                    {
                        // we might have moved from passive node, so we need to start the timeout clock
                        _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout);
                    }
                };

                context.Transaction.Commit();
            }
        }
Example #8
0
        private (bool HasRemovedFromTopology, long LastAcknowledgedIndex, long LastTruncate, long LastCommit)  ApplyLeaderStateToLocalState(Stopwatch sp, ClusterOperationContext context, List <RachisEntry> entries, AppendEntries appendEntries)
        {
            long lastTruncate;
            long lastCommit;

            bool removedFromTopology = false;

            // we start the tx after we finished reading from the network
            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Ready to start tx in {sp.Elapsed}");
            }

            using (var tx = context.OpenWriteTransaction())
            {
                _engine.ValidateTerm(_term);

                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Tx running in {sp.Elapsed}");
                }

                if (entries.Count > 0)
                {
                    var(lastTopology, lastTopologyIndex) = _engine.AppendToLog(context, entries);
                    using (lastTopology)
                    {
                        if (lastTopology != null)
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"Topology changed to {lastTopology}");
                            }

                            var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology);

                            if (topology.Members.ContainsKey(_engine.Tag) ||
                                topology.Promotables.ContainsKey(_engine.Tag) ||
                                topology.Watchers.ContainsKey(_engine.Tag))
                            {
                                RachisConsensus.SetTopology(_engine, context, topology);
                            }
                            else
                            {
                                removedFromTopology = true;
                                _engine.ClearAppendedEntriesAfter(context, lastTopologyIndex);
                            }
                        }
                    }
                }

                var lastEntryIndexToCommit = Math.Min(
                    _engine.GetLastEntryIndex(context),
                    appendEntries.LeaderCommit);

                var lastAppliedIndex = _engine.GetLastCommitIndex(context);
                var lastAppliedTerm  = _engine.GetTermFor(context, lastEntryIndexToCommit);

                // we start to commit only after we have any log with a term of the current leader
                if (lastEntryIndexToCommit > lastAppliedIndex && lastAppliedTerm == appendEntries.Term)
                {
                    lastAppliedIndex = _engine.Apply(context, lastEntryIndexToCommit, null, sp);
                }

                lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastAppliedIndex);
                _engine.TruncateLogBefore(context, lastTruncate);

                lastCommit = lastAppliedIndex;
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Ready to commit in {sp.Elapsed}");
                }

                tx.Commit();
            }

            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Processing entries request with {entries.Count} entries took {sp.Elapsed}");
            }

            var lastAcknowledgedIndex = entries.Count == 0 ? appendEntries.PrevLogIndex : entries[entries.Count - 1].Index;

            return(HasRemovedFromTopology : removedFromTopology, LastAcknowledgedIndex : lastAcknowledgedIndex, LastTruncate : lastTruncate, LastCommit : lastCommit);
        }
Example #9
0
        private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation)
        {
            _debugRecorder.Start();
            // only the leader can send append entries, so if we accepted it, it's the leader
            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Got a negotiation request for term {negotiation.Term} where our term is {_term}");
            }
            if (negotiation.Term != _term)
            {
                //  Our leader is no longer a valid one
                var msg = $"The term was changed after leader validation from {_term} to {negotiation.Term}, so you are no longer a valid leader";
                _connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status       = LogLengthNegotiationResponse.ResponseStatus.Rejected,
                    Message      = msg,
                    CurrentTerm  = _term,
                    LastLogIndex = negotiation.PrevLogIndex
                });
                throw new InvalidOperationException($"We close this follower because: {msg}");
            }
            long prevTerm;

            using (context.OpenReadTransaction())
            {
                prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0;
            }
            if (prevTerm != negotiation.PrevLogTerm)
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" +
                                     " will negotiate to find next matched index");
                }
                // we now have a mismatch with the log position, and need to negotiate it with
                // the leader
                NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation);
            }
            else
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Got a negotiation request with identical PrevLogTerm will continue to steady state");
                }
                // this (or the negotiation above) completes the negotiation process
                _connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                    Message      = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}",
                    CurrentTerm  = _term,
                    LastLogIndex = negotiation.PrevLogIndex
                });
            }
            _debugRecorder.Record("Matching Negotiation is over, waiting for snapshot");
            _engine.Timeout.Defer(_connection.Source);

            // at this point, the leader will send us a snapshot message
            // in most cases, it is an empty snapshot, then start regular append entries
            // the reason we send this is to simplify the # of states in the protocol

            var snapshot = _connection.ReadInstallSnapshot(context);

            _debugRecorder.Record("Snapshot received");
            using (context.OpenWriteTransaction())
            {
                var lastTerm        = _engine.GetTermFor(context, snapshot.LastIncludedIndex);
                var lastCommitIndex = _engine.GetLastEntryIndex(context);
                if (snapshot.LastIncludedTerm == lastTerm && snapshot.LastIncludedIndex < lastCommitIndex)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"{ToString()}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it");
                    }

                    //This is okay to ignore because we will just get the committed entries again and skip them
                    ReadInstallSnapshotAndIgnoreContent(context);
                }
                else if (InstallSnapshot(context))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"{ToString()}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} ");
                    }

                    _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                    _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                }
                else
                {
                    var lastEntryIndex = _engine.GetLastEntryIndex(context);
                    if (lastEntryIndex < snapshot.LastIncludedIndex)
                    {
                        var message =
                            $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}";
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"{ToString()}: {message}");
                        }
                        throw new InvalidOperationException(message);
                    }
                }

                // snapshot always has the latest topology
                if (snapshot.Topology == null)
                {
                    const string message = "Expected to get topology on snapshot";
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: {message}");
                    }
                    throw new InvalidOperationException(message);
                }
                using (var topologyJson = context.ReadObject(snapshot.Topology, "topology"))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: topology on install snapshot: {topologyJson}");
                    }

                    var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson);

                    RachisConsensus.SetTopology(_engine, context, topology);
                }

                context.Transaction.Commit();
            }

            _engine.Timeout.Defer(_connection.Source);
            _debugRecorder.Record("Invoking StateMachine.SnapshotInstalled");

            // notify the state machine, we do this in an async manner, and start
            // the operator in a separate thread to avoid timeouts while this is
            // going on

            var task = Task.Run(() => _engine.SnapshotInstalledAsync(snapshot.LastIncludedIndex));

            var sp = Stopwatch.StartNew();

            var timeToWait = (int)(_engine.ElectionTimeout.TotalMilliseconds / 4);

            while (task.Wait(timeToWait) == false)
            {
                // this may take a while, so we let the other side know that
                // we are still processing, and we reset our own timer while
                // this is happening
                MaybeNotifyLeaderThatWeAreStillAlive(context, sp);
            }

            _debugRecorder.Record("Done with StateMachine.SnapshotInstalled");

            // we might have moved from passive node, so we need to start the timeout clock
            _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout);

            _debugRecorder.Record("Snapshot installed");
            //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater
            //So we could validate that the entries sent by the leader are indeed the same as the ones we have.
            _connection.Send(context, new InstallSnapshotResponse
            {
                Done         = true,
                CurrentTerm  = _term,
                LastLogIndex = snapshot.LastIncludedIndex
            });

            _engine.Timeout.Defer(_connection.Source);
        }
Example #10
0
 public Candidate(RachisConsensus engine)
 {
     _engine = engine;
 }
Example #11
0
 public virtual void Initialize(RachisConsensus parent, TransactionOperationContext context)
 {
     _parent = parent;
     ContextPoolForReadOnlyOperations = _parent.ContextPool;
     Validator = InitializeValidator();
 }
Example #12
0
        private void FollowerSteadyState()
        {
            var  entries = new List <RachisEntry>();
            long lastCommit = 0, lastTruncate = 0;

            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"Follower {_engine.Tag}: Entering steady state");
            }
            while (true)
            {
                entries.Clear();

                using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                {
                    var appendEntries = _connection.Read <AppendEntries>(context);

                    if (appendEntries.Term != _engine.CurrentTerm)
                    {
                        _connection.Send(context, new AppendEntriesResponse
                        {
                            CurrentTerm = _engine.CurrentTerm,
                            Message     = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term,
                            Success     = false
                        });
                        if (_engine.Log.IsInfoEnabled && entries.Count > 0)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection...");
                        }

                        return;
                    }


                    _engine.Timeout.Defer(_connection.Source);
                    var sp = Stopwatch.StartNew();
                    if (appendEntries.EntriesCount != 0)
                    {
                        for (int i = 0; i < appendEntries.EntriesCount; i++)
                        {
                            entries.Add(_connection.ReadRachisEntry(context));
                            _engine.Timeout.Defer(_connection.Source);
                        }
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})"
#if DEBUG
                                             + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]"
#endif
                                             );
                        }
                    }

                    var lastLogIndex = appendEntries.PrevLogIndex;

                    // don't start write transaction fro noop
                    if (lastCommit != appendEntries.LeaderCommit ||
                        lastTruncate != appendEntries.TruncateLogBefore ||
                        entries.Count != 0)
                    {
                        bool removedFromTopology = false;
                        // we start the tx after we finished reading from the network
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Ready to start tx in {sp.Elapsed}");
                        }
                        using (var tx = context.OpenWriteTransaction())
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"Follower {_engine.Tag}: Tx running in {sp.Elapsed}");
                            }
                            if (entries.Count > 0)
                            {
                                using (var lastTopology = _engine.AppendToLog(context, entries))
                                {
                                    if (lastTopology != null)
                                    {
                                        if (_engine.Log.IsInfoEnabled)
                                        {
                                            _engine.Log.Info($"Topology changed to {lastTopology}");
                                        }

                                        var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology);

                                        if (topology.Members.ContainsKey(_engine.Tag) ||
                                            topology.Promotables.ContainsKey(_engine.Tag) ||
                                            topology.Watchers.ContainsKey(_engine.Tag))
                                        {
                                            RachisConsensus.SetTopology(_engine, context, topology);
                                        }
                                        else
                                        {
                                            removedFromTopology = true;
                                        }
                                    }
                                }
                            }

                            lastLogIndex = _engine.GetLastEntryIndex(context);

                            var lastEntryIndexToCommit = Math.Min(
                                lastLogIndex,
                                appendEntries.LeaderCommit);


                            var lastAppliedIndex = _engine.GetLastCommitIndex(context);

                            if (lastEntryIndexToCommit > lastAppliedIndex)
                            {
                                _engine.Apply(context, lastEntryIndexToCommit, null);
                            }

                            lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastEntryIndexToCommit);
                            _engine.TruncateLogBefore(context, lastTruncate);
                            lastCommit = lastEntryIndexToCommit;
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"Follower {_engine.Tag}: Ready to commit in {sp.Elapsed}");
                            }
                            tx.Commit();
                        }

                        if (_engine.Log.IsInfoEnabled && entries.Count > 0)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Processing entries request with {entries.Count} entries took {sp.Elapsed}");
                        }

                        if (removedFromTopology)
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info("Was notified that I was removed from the node topoloyg, will be moving to passive mode now.");
                            }
                            _engine.SetNewState(RachisState.Passive, null, appendEntries.Term,
                                                "I was kicked out of the cluster and moved to passive mode");
                            return;
                        }
                    }

                    if (appendEntries.ForceElections)
                    {
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Got a request to become candidate from the leader.");
                        }
                        _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true);
                        return;
                    }

                    _connection.Send(context, new AppendEntriesResponse
                    {
                        CurrentTerm  = _engine.CurrentTerm,
                        LastLogIndex = lastLogIndex,
                        Success      = true
                    });

                    _engine.Timeout.Defer(_connection.Source);

                    _engine.ReportLeaderTime(appendEntries.TimeAsLeader);
                }
            }
        }
Example #13
0
        private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation)
        {
            // only the leader can send append entries, so if we accepted it, it's the leader
            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request for term {negotiation.Term} where our term is {_engine.CurrentTerm}");
            }
            if (negotiation.Term > _engine.CurrentTerm)
            {
                _engine.FoundAboutHigherTerm(negotiation.Term);
            }

            long prevTerm;

            using (context.OpenReadTransaction())
            {
                prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0;
            }
            if (prevTerm != negotiation.PrevLogTerm)
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" +
                                     " will negotiate to find next matched index");
                }
                // we now have a mismatch with the log position, and need to negotiate it with
                // the leader
                NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation);
            }
            else
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with identical PrevLogTerm will continue to steady state");
                }
                // this (or the negotiation above) completes the negotiation process
                _connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                    Message      = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}",
                    CurrentTerm  = _engine.CurrentTerm,
                    LastLogIndex = negotiation.PrevLogIndex
                });
            }

            _engine.Timeout.Defer(_connection.Source);

            // at this point, the leader will send us a snapshot message
            // in most cases, it is an empty snapshot, then start regular append entries
            // the reason we send this is to simplify the # of states in the protocol

            var snapshot = _connection.ReadInstallSnapshot(context);

            using (context.OpenWriteTransaction())
            {
                var lastCommitIndex = _engine.GetLastCommitIndex(context);
                if (snapshot.LastIncludedIndex < lastCommitIndex)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"Follower {_engine.Tag}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it");
                    }
                    //This is okay to ignore because we will just get the commited entries again and skip them
                    ReadInstallSnapshotAndIgnoreContent(context);
                }
                else if (InstallSnapshot(context))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"Follower {_engine.Tag}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} ");
                    }
                    _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                    _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                }
                else
                {
                    var lastEntryIndex = _engine.GetLastEntryIndex(context);
                    if (lastEntryIndex < snapshot.LastIncludedIndex)
                    {
                        var message =
                            $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}";
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: {message}");
                        }
                        throw new InvalidOperationException(message);
                    }
                }

                // snapshot always has the latest topology
                if (snapshot.Topology == null)
                {
                    const string message = "Expected to get topology on snapshot";
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"Follower {_engine.Tag}: {message}");
                    }
                    throw new InvalidOperationException(message);
                }
                using (var topologyJson = context.ReadObject(snapshot.Topology, "topology"))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"Follower {_engine.Tag}: topology on install snapshot: {topologyJson}");
                    }

                    var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson);

                    RachisConsensus.SetTopology(_engine, context, topology);
                }

                context.Transaction.Commit();
            }
            //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater
            //So we could validate that the entries sent by the leader are indeed the same as the ones we have.
            _connection.Send(context, new InstallSnapshotResponse
            {
                Done         = true,
                CurrentTerm  = _engine.CurrentTerm,
                LastLogIndex = snapshot.LastIncludedIndex
            });

            _engine.Timeout.Defer(_connection.Source);

            // notify the state machine
            _engine.SnapshotInstalled(context, snapshot.LastIncludedIndex);

            _engine.Timeout.Defer(_connection.Source);
        }
Example #14
0
 public Follower(RachisConsensus engine, RemoteConnection remoteConnection)
 {
     _engine     = engine;
     _connection = remoteConnection;
 }