コード例 #1
0
        /// <exception cref="System.IO.IOException"/>
        public override void RecoverUnfinalizedSegments()
        {
            Preconditions.CheckState(!isActiveWriter, "already active writer");
            Log.Info("Starting recovery process for unclosed journal segments...");
            IDictionary <AsyncLogger, QJournalProtocolProtos.NewEpochResponseProto> resps = CreateNewUniqueEpoch
                                                                                                ();

            Log.Info("Successfully started new epoch " + loggers.GetEpoch());
            if (Log.IsDebugEnabled())
            {
                Log.Debug("newEpoch(" + loggers.GetEpoch() + ") responses:\n" + QuorumCall.MapToString
                              (resps));
            }
            long mostRecentSegmentTxId = long.MinValue;

            foreach (QJournalProtocolProtos.NewEpochResponseProto r in resps.Values)
            {
                if (r.HasLastSegmentTxId())
                {
                    mostRecentSegmentTxId = Math.Max(mostRecentSegmentTxId, r.GetLastSegmentTxId());
                }
            }
            // On a completely fresh system, none of the journals have any
            // segments, so there's nothing to recover.
            if (mostRecentSegmentTxId != long.MinValue)
            {
                RecoverUnclosedSegment(mostRecentSegmentTxId);
            }
            isActiveWriter = true;
        }
コード例 #2
0
        /// <summary>Run recovery/synchronization for a specific segment.</summary>
        /// <remarks>
        /// Run recovery/synchronization for a specific segment.
        /// Postconditions:
        /// <ul>
        /// <li>This segment will be finalized on a majority
        /// of nodes.</li>
        /// <li>All nodes which contain the finalized segment will
        /// agree on the length.</li>
        /// </ul>
        /// </remarks>
        /// <param name="segmentTxId">the starting txid of the segment</param>
        /// <exception cref="System.IO.IOException"/>
        private void RecoverUnclosedSegment(long segmentTxId)
        {
            Preconditions.CheckArgument(segmentTxId > 0);
            Log.Info("Beginning recovery of unclosed segment starting at txid " + segmentTxId
                     );
            // Step 1. Prepare recovery
            QuorumCall <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> prepare
                = loggers.PrepareRecovery(segmentTxId);
            IDictionary <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> prepareResponses
                = loggers.WaitForWriteQuorum(prepare, prepareRecoveryTimeoutMs, "prepareRecovery("
                                             + segmentTxId + ")");

            Log.Info("Recovery prepare phase complete. Responses:\n" + QuorumCall.MapToString
                         (prepareResponses));
            // Determine the logger who either:
            // a) Has already accepted a previous proposal that's higher than any
            //    other
            //
            //  OR, if no such logger exists:
            //
            // b) Has the longest log starting at this transaction ID
            // TODO: we should collect any "ties" and pass the URL for all of them
            // when syncing, so we can tolerate failure during recovery better.
            KeyValuePair <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> bestEntry
                = Sharpen.Collections.Max(prepareResponses, SegmentRecoveryComparator.Instance);
            AsyncLogger bestLogger = bestEntry.Key;

            QJournalProtocolProtos.PrepareRecoveryResponseProto bestResponse = bestEntry.Value;
            // Log the above decision, check invariants.
            if (bestResponse.HasAcceptedInEpoch())
            {
                Log.Info("Using already-accepted recovery for segment " + "starting at txid " + segmentTxId
                         + ": " + bestEntry);
            }
            else
            {
                if (bestResponse.HasSegmentState())
                {
                    Log.Info("Using longest log: " + bestEntry);
                }
                else
                {
                    // None of the responses to prepareRecovery() had a segment at the given
                    // txid. This can happen for example in the following situation:
                    // - 3 JNs: JN1, JN2, JN3
                    // - writer starts segment 101 on JN1, then crashes before
                    //   writing to JN2 and JN3
                    // - during newEpoch(), we saw the segment on JN1 and decide to
                    //   recover segment 101
                    // - before prepare(), JN1 crashes, and we only talk to JN2 and JN3,
                    //   neither of which has any entry for this log.
                    // In this case, it is allowed to do nothing for recovery, since the
                    // segment wasn't started on a quorum of nodes.
                    // Sanity check: we should only get here if none of the responses had
                    // a log. This should be a postcondition of the recovery comparator,
                    // but a bug in the comparator might cause us to get here.
                    foreach (QJournalProtocolProtos.PrepareRecoveryResponseProto resp in prepareResponses
                             .Values)
                    {
                        System.Diagnostics.Debug.Assert(!resp.HasSegmentState(), "One of the loggers had a response, but no best logger "
                                                        + "was found.");
                    }
                    Log.Info("None of the responders had a log to recover: " + QuorumCall.MapToString
                                 (prepareResponses));
                    return;
                }
            }
            QJournalProtocolProtos.SegmentStateProto logToSync = bestResponse.GetSegmentState
                                                                     ();
            System.Diagnostics.Debug.Assert(segmentTxId == logToSync.GetStartTxId());
            // Sanity check: none of the loggers should be aware of a higher
            // txid than the txid we intend to truncate to
            foreach (KeyValuePair <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto
                                   > e in prepareResponses)
            {
                AsyncLogger logger = e.Key;
                QJournalProtocolProtos.PrepareRecoveryResponseProto resp = e.Value;
                if (resp.HasLastCommittedTxId() && resp.GetLastCommittedTxId() > logToSync.GetEndTxId
                        ())
                {
                    throw new Exception("Decided to synchronize log to " + logToSync + " but logger "
                                        + logger + " had seen txid " + resp.GetLastCommittedTxId() + " committed");
                }
            }
            Uri syncFromUrl = bestLogger.BuildURLToFetchLogs(segmentTxId);
            QuorumCall <AsyncLogger, Void> accept = loggers.AcceptRecovery(logToSync, syncFromUrl
                                                                           );

            loggers.WaitForWriteQuorum(accept, acceptRecoveryTimeoutMs, "acceptRecovery(" + TextFormat
                                       .ShortDebugString(logToSync) + ")");
            // If one of the loggers above missed the synchronization step above, but
            // we send a finalize() here, that's OK. It validates the log before
            // finalizing. Hence, even if it is not "in sync", it won't incorrectly
            // finalize.
            QuorumCall <AsyncLogger, Void> finalize = loggers.FinalizeLogSegment(logToSync.GetStartTxId
                                                                                     (), logToSync.GetEndTxId());

            loggers.WaitForWriteQuorum(finalize, finalizeSegmentTimeoutMs, string.Format("finalizeLogSegment(%s-%s)"
                                                                                         , logToSync.GetStartTxId(), logToSync.GetEndTxId()));
        }