Пример #1
0
        public void EndReceiveInstallSnapshot(FileStream s, InstallSnapshot r)
        {
            lock (Raft)
            {
                // 6. If existing log entry has same index and term as snapshot’s
                // last included entry, retain log entries following it and reply
                var last = ReadLog(r.Argument.LastIncludedIndex);
                if (null != last && last.Term == r.Argument.LastIncludedTerm)
                {
                    // 这里全部保留更简单吧,否则如果没有applied,那不就糟了吗?
                    // RemoveLogReverse(r.Argument.LastIncludedIndex - 1);
                    return;
                }
                // 7. Discard the entire log
                // 整个删除,那么下一次AppendEnties又会找不到prev。不就xxx了吗?
                // 我的想法是,InstallSnapshot 最后一个 trunk 带上 LastIncludedLog,
                // 接收者清除log,并把这条日志插入(这个和系统初始化时插入的Index=0的日志道理差不多)。
                // 【除了快照最后包含的日志,其他都删除。】
                var lastIncludedLog = RaftLog.Decode(r.Argument.LastIncludedLog, Raft.StateMachine.LogFactory);
                SaveLog(lastIncludedLog);
                // follower 没有并发请求需要处理,在锁内删除。
                RemoveLogReverse(lastIncludedLog.Index - 1, FirstIndex);
                RemoveLogAndCancelStart(lastIncludedLog.Index + 1, LastIndex);
                LastIndex   = lastIncludedLog.Index;
                FirstIndex  = lastIncludedLog.Index;
                CommitIndex = FirstIndex;
                LastApplied = FirstIndex;

                // 8. Reset state machine using snapshot contents (and load
                // snapshot’s cluster configuration)
                Raft.StateMachine.LoadFromSnapshot(s.Name);
                logger.Debug("{0} EndReceiveInstallSnapshot Path={1}", Raft.Name, s.Name);
            }
        }
Пример #2
0
        public static RaftLog Decode(Binary data, Func <int, Log> logFactory)
        {
            var raftLog = new RaftLog(logFactory);

            data.Decode(raftLog);
            return(raftLog);
        }
Пример #3
0
        public void AppendLog(Log log, bool WaitApply = true)
        {
            if (false == Raft.IsLeader)
            {
                throw new TaskCanceledException(); // 快速失败
            }
            TaskCompletionSource <int> future = null;

            lock (Raft)
            {
                ++LastIndex;
                var raftLog = new RaftLog(Term, LastIndex, log);
                if (WaitApply)
                {
                    future = new TaskCompletionSource <int>();
                    if (false == WaitApplyFutures.TryAdd(raftLog.Index, future))
                    {
                        throw new Exception("Impossible");
                    }
                }
                SaveLog(raftLog);
            }

            // 广播给followers并异步等待多数确认
            Raft.Server.Config.ForEachConnector(
                (connector) => TrySendAppendEntries(connector as Server.ConnectorEx, null));

            if (WaitApply)
            {
                future.Task.Wait();
            }
        }
Пример #4
0
        internal int FollowerOnAppendEntries(AppendEntries r)
        {
            LeaderActiveTime = Zeze.Util.Time.NowUnixMillis;
            r.Result.Term    = Term;
            r.Result.Success = false; // set default false

            if (r.Argument.Term < Term)
            {
                // 1. Reply false if term < currentTerm (§5.1)
                r.SendResult();
                return(Procedure.LogicError);
            }

            var prevLog = ReadLog(r.Argument.PrevLogIndex);

            if (prevLog == null || prevLog.Term != r.Argument.PrevLogTerm)
            {
                // 2. Reply false if log doesn’t contain an entry
                // at prevLogIndex whose term matches prevLogTerm(§5.3)
                r.SendResult();
                return(Procedure.LogicError);
            }

            foreach (var raftLogData in r.Argument.Entries)
            {
                var copyLog       = RaftLog.Decode(raftLogData, Raft.StateMachine.LogFactory);
                var conflictCheck = ReadLog(copyLog.Index);
                if (null != conflictCheck)
                {
                    if (conflictCheck.Term != copyLog.Term)
                    {
                        // 3. If an existing entry conflicts
                        // with a new one (same index but different terms),
                        // delete the existing entry and all that follow it(§5.3)
                        // raft.pdf 5.3
                        RemoveLogAndCancelStart(conflictCheck.Index, LastIndex);
                        LastIndex = prevLog.Index;
                    }
                }
                else
                {
                    // 4. Append any new entries not already in the log
                    SaveLog(copyLog);
                }
                // 复用这个变量。当冲突需要删除时,精确指到前一个日志。
                prevLog = copyLog;
            }
            // 5. If leaderCommit > commitIndex,
            // set commitIndex = min(leaderCommit, index of last new entry)
            if (r.Argument.LeaderCommit > CommitIndex)
            {
                CommitIndex = Math.Min(r.Argument.LeaderCommit, LastRaftLog().Index);
                TryApply(ReadLog(CommitIndex));
            }
            r.Result.Success = true;
            logger.Debug("{0}: {1}", Raft.Name, r);
            r.SendResultCode(0);
            return(Procedure.Success);
        }
Пример #5
0
        private RaftLog ReadLog(long index)
        {
            var key = ByteBuffer.Allocate();

            key.WriteLong8(index);
            var value = Logs.Get(key.Bytes, key.Size);

            if (null == value)
            {
                return(null);
            }
            return(RaftLog.Decode(new Binary(value), Raft.StateMachine.LogFactory));
        }
Пример #6
0
        private void SaveLog(RaftLog log)
        {
            LastIndex = log.Index; // 记住最后一个Index,用来下一次生成。

            var key = ByteBuffer.Allocate();

            key.WriteLong8(log.Index);
            var value = log.Encode();

            // key,value offset must 0
            Logs.Put(
                key.Bytes, key.Size,
                value.Bytes, value.Size,
                null, new WriteOptions().SetSync(true)
                );
        }
Пример #7
0
        private void TrySendAppendEntries(Server.ConnectorEx connector, AppendEntries pending)
        {
            lock (Raft)
            {
                // 按理说,多个Follower设置一次就够了,这里就不做这个处理了。
                AppendLogActiveTime = Util.Time.NowUnixMillis;

                if (connector.Pending != pending)
                {
                    return;
                }

                // 先清除,下面中断(return)不用每次自己清除。
                connector.Pending = null;
                if (false == connector.IsHandshakeDone)
                {
                    // Hearbeat Will Retry
                    return;
                }

                // 【注意】
                // 正在安装Snapshot,此时不复制日志,肯定失败。
                // 不做这个判断也是可以工作的,算是优化。
                if (connector.InstallSnapshotting)
                {
                    return;
                }

                if (connector.NextIndex > LastIndex)
                {
                    return;
                }

                var nextLog = ReadLogReverse(connector.NextIndex);
                if (nextLog.Index == FirstIndex)
                {
                    // 已经到了日志开头,此时不会有prev-log,无法复制日志了。
                    // 这一般发生在Leader进行了Snapshot,但是Follower的日志还更老。
                    // 新起的Follower也一样。
                    StartInstallSnapshot(connector);
                    return;
                }

                // 现在Index总是递增,但没有确认步长总是为1,这样能处理不为1的情况。
                connector.NextIndex = nextLog.Index;

                connector.Pending = new AppendEntries();
                connector.Pending.Argument.Term         = Term;
                connector.Pending.Argument.LeaderId     = Raft.Name;
                connector.Pending.Argument.LeaderCommit = CommitIndex;

                // 肯定能找到的。
                var prevLog = ReadLogReverse(nextLog.Index - 1);
                connector.Pending.Argument.PrevLogIndex = prevLog.Index;
                connector.Pending.Argument.PrevLogTerm  = prevLog.Term;

                // 限制一次发送的日志数量,【注意】这个不是raft要求的。
                int     maxCount    = Raft.RaftConfig.MaxAppendEntiresCount;
                RaftLog lastCopyLog = nextLog;
                for (var copyLog = nextLog;
                     maxCount > 0 && null != copyLog && copyLog.Index <= LastIndex;
                     copyLog = ReadLogStart(copyLog.Index + 1), --maxCount
                     )
                {
                    lastCopyLog = copyLog;
                    connector.Pending.Argument.Entries.Add(new Binary(copyLog.Encode()));
                }
                connector.Pending.Argument.LastEntryIndex = lastCopyLog.Index;
                if (false == connector.Pending.Send(
                        connector.Socket,
                        (p) => ProcessAppendEntriesResult(connector, p),
                        Raft.RaftConfig.AppendEntriesTimeout))
                {
                    connector.Pending = null;
                    // Hearbeat Will Retry
                }
            }
        }
Пример #8
0
        private RaftLog FindMaxMajorityLog(long startIndex)
        {
            RaftLog lastMajorityLog = null;

            for (long index = startIndex; index <= LastIndex; /**/)
            {
                var raftLog = ReadLogStart(index);
                if (null == raftLog)
                {
                    break;
                }
                index           = raftLog.Index + 1;
                lastMajorityLog = raftLog;
                int MajorityCount = 0;
                Raft.Server.Config.ForEachConnector(
                    (c) =>
                {
                    var cex = c as Server.ConnectorEx;
                    if (cex.MatchIndex >= raftLog.Index)
                    {
                        ++MajorityCount;
                    }
                });

                // 没有达成多数派,中断循环。后面返回上一个majority,仍可能为null。
                // 等于的时候加上自己就是多数派了。
                if (MajorityCount < Raft.RaftConfig.HalfCount)
                {
                    break;
                }
            }
            return(lastMajorityLog);
        }

        private void TryCommit(AppendEntries rpc, Server.ConnectorEx connector)
        {
            connector.NextIndex  = rpc.Argument.LastEntryIndex + 1;
            connector.MatchIndex = rpc.Argument.LastEntryIndex;

            // 已经提交的,旧的 AppendEntries 的结果,不用继续处理了。
            // 【注意】这个不是必要的,是一个小优化。
            if (rpc.Argument.LastEntryIndex <= CommitIndex)
            {
                return;
            }

            // Rules for Servers
            // If there exists an N such that N > commitIndex, a majority
            // of matchIndex[i] ≥ N, and log[N].term == currentTerm:
            // set commitIndex = N(§5.3, §5.4).

            // TODO 对于 Leader CommitIndex 初始化问题。
            var raftLog = FindMaxMajorityLog(CommitIndex + 1);

            if (null == raftLog)
            {
                return; // 一个多数派都没有找到。
            }
            if (raftLog.Term != Term)
            {
                // 如果是上一个 Term 未提交的日志在这一次形成的多数派,
                // 不自动提交。
                // 总是等待当前 Term 推进时,顺便提交它。
                return;
            }
            CommitIndex = raftLog.Index;
            TryApply(raftLog);
        }

        private void TryApply(RaftLog lastApplyableLog)
        {
            if (null == lastApplyableLog)
            {
                logger.Error("lastApplyableLog is null.");
                return;
            }
            for (long index = LastApplied + 1;
                 index <= lastApplyableLog.Index;
                 /**/)
            {
                var raftLog = ReadLogStart(index);
                if (null == raftLog)
                {
                    return; // end?
                }
                index = raftLog.Index + 1;

                if (Raft.RaftConfig.AutoKeyLocalStep > 0 && raftLog.Log.UniqueRequestId > 0)
                {
                    // 这是防止请求重复执行用的。
                    // 需要对每个Raft.Agent的请求排队处理。
                    // see Net.cs Server.DispatchProtocol
                    var appInstance = raftLog.Log.UniqueRequestId % Raft.RaftConfig.AutoKeyLocalStep;
                    LastAppliedAppRpcSessionId[appInstance] = raftLog.Log.UniqueRequestId;
                }
                raftLog.Log.Apply(Raft.StateMachine);
                LastApplied = raftLog.Index; // 循环可能退出,在这里修改。

                if (WaitApplyFutures.TryRemove(raftLog.Index, out var future))
                {
                    future.SetResult(0);
                }
            }
        }
Пример #9
0
        public LogSequence(Raft raft)
        {
            Raft = raft;
            var options = new DbOptions().SetCreateIfMissing(true);

            Rafts = RocksDb.Open(options, Path.Combine(Raft.RaftConfig.DbHome, "rafts"));
            {
                // Read Term
                var termKey = ByteBuffer.Allocate();
                termKey.WriteInt(0);
                RaftsTermKey = termKey.Copy();
                var termValue = Rafts.Get(RaftsTermKey);
                if (null != termValue)
                {
                    var bb = ByteBuffer.Wrap(termValue);
                    Term = bb.ReadLong();
                }
                else
                {
                    Term = 0;
                }
                // Read VoteFor
                var voteForKey = ByteBuffer.Allocate();
                voteForKey.WriteInt(1);
                RaftsVoteForKey = voteForKey.Copy();
                var voteForvalue = Rafts.Get(RaftsVoteForKey);
                if (null != voteForvalue)
                {
                    var bb = ByteBuffer.Wrap(voteForvalue);
                    VoteFor = bb.ReadString();
                }
                else
                {
                    VoteFor = string.Empty;
                }
            }


            Logs = RocksDb.Open(options, Path.Combine(Raft.RaftConfig.DbHome, "logs"));
            {
                // Read Last Log Index
                using var itLast = Logs.NewIterator();
                itLast.SeekToLast();
                if (itLast.Valid())
                {
                    LastIndex = RaftLog.Decode(
                        new Binary(itLast.Value()),
                        Raft.StateMachine.LogFactory
                        ).Index;
                }
                else
                {
                    // empty. add one for prev.
                    SaveLog(new RaftLog(Term, 0, new HeartbeatLog()));
                    LastIndex = 0;
                }

                using var itFirst = Logs.NewIterator();
                itFirst.SeekToFirst();
                FirstIndex = RaftLog.Decode(
                    new Binary(itFirst.Value()),
                    Raft.StateMachine.LogFactory
                    ).Index;
                // 【注意】snapshot 以后 FirstIndex 会推进,不再是从0开始。
                LastApplied = FirstIndex;
                CommitIndex = FirstIndex;
            }
        }