/// <summary>Run recovery/synchronization for a specific segment.</summary> /// <remarks> /// Run recovery/synchronization for a specific segment. /// Postconditions: /// <ul> /// <li>This segment will be finalized on a majority /// of nodes.</li> /// <li>All nodes which contain the finalized segment will /// agree on the length.</li> /// </ul> /// </remarks> /// <param name="segmentTxId">the starting txid of the segment</param> /// <exception cref="System.IO.IOException"/> private void RecoverUnclosedSegment(long segmentTxId) { Preconditions.CheckArgument(segmentTxId > 0); Log.Info("Beginning recovery of unclosed segment starting at txid " + segmentTxId ); // Step 1. Prepare recovery QuorumCall <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> prepare = loggers.PrepareRecovery(segmentTxId); IDictionary <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> prepareResponses = loggers.WaitForWriteQuorum(prepare, prepareRecoveryTimeoutMs, "prepareRecovery(" + segmentTxId + ")"); Log.Info("Recovery prepare phase complete. Responses:\n" + QuorumCall.MapToString (prepareResponses)); // Determine the logger who either: // a) Has already accepted a previous proposal that's higher than any // other // // OR, if no such logger exists: // // b) Has the longest log starting at this transaction ID // TODO: we should collect any "ties" and pass the URL for all of them // when syncing, so we can tolerate failure during recovery better. KeyValuePair <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> bestEntry = Sharpen.Collections.Max(prepareResponses, SegmentRecoveryComparator.Instance); AsyncLogger bestLogger = bestEntry.Key; QJournalProtocolProtos.PrepareRecoveryResponseProto bestResponse = bestEntry.Value; // Log the above decision, check invariants. if (bestResponse.HasAcceptedInEpoch()) { Log.Info("Using already-accepted recovery for segment " + "starting at txid " + segmentTxId + ": " + bestEntry); } else { if (bestResponse.HasSegmentState()) { Log.Info("Using longest log: " + bestEntry); } else { // None of the responses to prepareRecovery() had a segment at the given // txid. This can happen for example in the following situation: // - 3 JNs: JN1, JN2, JN3 // - writer starts segment 101 on JN1, then crashes before // writing to JN2 and JN3 // - during newEpoch(), we saw the segment on JN1 and decide to // recover segment 101 // - before prepare(), JN1 crashes, and we only talk to JN2 and JN3, // neither of which has any entry for this log. // In this case, it is allowed to do nothing for recovery, since the // segment wasn't started on a quorum of nodes. // Sanity check: we should only get here if none of the responses had // a log. This should be a postcondition of the recovery comparator, // but a bug in the comparator might cause us to get here. foreach (QJournalProtocolProtos.PrepareRecoveryResponseProto resp in prepareResponses .Values) { System.Diagnostics.Debug.Assert(!resp.HasSegmentState(), "One of the loggers had a response, but no best logger " + "was found."); } Log.Info("None of the responders had a log to recover: " + QuorumCall.MapToString (prepareResponses)); return; } } QJournalProtocolProtos.SegmentStateProto logToSync = bestResponse.GetSegmentState (); System.Diagnostics.Debug.Assert(segmentTxId == logToSync.GetStartTxId()); // Sanity check: none of the loggers should be aware of a higher // txid than the txid we intend to truncate to foreach (KeyValuePair <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto > e in prepareResponses) { AsyncLogger logger = e.Key; QJournalProtocolProtos.PrepareRecoveryResponseProto resp = e.Value; if (resp.HasLastCommittedTxId() && resp.GetLastCommittedTxId() > logToSync.GetEndTxId ()) { throw new Exception("Decided to synchronize log to " + logToSync + " but logger " + logger + " had seen txid " + resp.GetLastCommittedTxId() + " committed"); } } Uri syncFromUrl = bestLogger.BuildURLToFetchLogs(segmentTxId); QuorumCall <AsyncLogger, Void> accept = loggers.AcceptRecovery(logToSync, syncFromUrl ); loggers.WaitForWriteQuorum(accept, acceptRecoveryTimeoutMs, "acceptRecovery(" + TextFormat .ShortDebugString(logToSync) + ")"); // If one of the loggers above missed the synchronization step above, but // we send a finalize() here, that's OK. It validates the log before // finalizing. Hence, even if it is not "in sync", it won't incorrectly // finalize. QuorumCall <AsyncLogger, Void> finalize = loggers.FinalizeLogSegment(logToSync.GetStartTxId (), logToSync.GetEndTxId()); loggers.WaitForWriteQuorum(finalize, finalizeSegmentTimeoutMs, string.Format("finalizeLogSegment(%s-%s)" , logToSync.GetStartTxId(), logToSync.GetEndTxId())); }
/// <summary> /// Test that the JournalNode performs correctly as a Paxos /// <em>Acceptor</em> process. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestAcceptRecoveryBehavior() { // We need to run newEpoch() first, or else we have no way to distinguish // different proposals for the same decision. try { ch.PrepareRecovery(1L).Get(); NUnit.Framework.Assert.Fail("Did not throw IllegalState when trying to run paxos without an epoch" ); } catch (ExecutionException ise) { GenericTestUtils.AssertExceptionContains("bad epoch", ise); } ch.NewEpoch(1).Get(); ch.SetEpoch(1); // prepare() with no previously accepted value and no logs present QJournalProtocolProtos.PrepareRecoveryResponseProto prep = ch.PrepareRecovery(1L) .Get(); System.Console.Error.WriteLine("Prep: " + prep); NUnit.Framework.Assert.IsFalse(prep.HasAcceptedInEpoch()); NUnit.Framework.Assert.IsFalse(prep.HasSegmentState()); // Make a log segment, and prepare again -- this time should see the // segment existing. ch.StartLogSegment(1L, NameNodeLayoutVersion.CurrentLayoutVersion).Get(); ch.SendEdits(1L, 1L, 1, QJMTestUtil.CreateTxnData(1, 1)).Get(); prep = ch.PrepareRecovery(1L).Get(); System.Console.Error.WriteLine("Prep: " + prep); NUnit.Framework.Assert.IsFalse(prep.HasAcceptedInEpoch()); NUnit.Framework.Assert.IsTrue(prep.HasSegmentState()); // accept() should save the accepted value in persistent storage ch.AcceptRecovery(prep.GetSegmentState(), new Uri("file:///dev/null")).Get(); // So another prepare() call from a new epoch would return this value ch.NewEpoch(2); ch.SetEpoch(2); prep = ch.PrepareRecovery(1L).Get(); NUnit.Framework.Assert.AreEqual(1L, prep.GetAcceptedInEpoch()); NUnit.Framework.Assert.AreEqual(1L, prep.GetSegmentState().GetEndTxId()); // A prepare() or accept() call from an earlier epoch should now be rejected ch.SetEpoch(1); try { ch.PrepareRecovery(1L).Get(); NUnit.Framework.Assert.Fail("prepare from earlier epoch not rejected"); } catch (ExecutionException ioe) { GenericTestUtils.AssertExceptionContains("epoch 1 is less than the last promised epoch 2" , ioe); } try { ch.AcceptRecovery(prep.GetSegmentState(), new Uri("file:///dev/null")).Get(); NUnit.Framework.Assert.Fail("accept from earlier epoch not rejected"); } catch (ExecutionException ioe) { GenericTestUtils.AssertExceptionContains("epoch 1 is less than the last promised epoch 2" , ioe); } }