Beispiel #1
0
 /// <summary>
 /// Test that the JournalNode performs correctly as a Paxos
 /// <em>Acceptor</em> process.
 /// </summary>
 /// <exception cref="System.Exception"/>
 public virtual void TestAcceptRecoveryBehavior()
 {
     // We need to run newEpoch() first, or else we have no way to distinguish
     // different proposals for the same decision.
     try
     {
         ch.PrepareRecovery(1L).Get();
         NUnit.Framework.Assert.Fail("Did not throw IllegalState when trying to run paxos without an epoch"
                                     );
     }
     catch (ExecutionException ise)
     {
         GenericTestUtils.AssertExceptionContains("bad epoch", ise);
     }
     ch.NewEpoch(1).Get();
     ch.SetEpoch(1);
     // prepare() with no previously accepted value and no logs present
     QJournalProtocolProtos.PrepareRecoveryResponseProto prep = ch.PrepareRecovery(1L)
                                                                .Get();
     System.Console.Error.WriteLine("Prep: " + prep);
     NUnit.Framework.Assert.IsFalse(prep.HasAcceptedInEpoch());
     NUnit.Framework.Assert.IsFalse(prep.HasSegmentState());
     // Make a log segment, and prepare again -- this time should see the
     // segment existing.
     ch.StartLogSegment(1L, NameNodeLayoutVersion.CurrentLayoutVersion).Get();
     ch.SendEdits(1L, 1L, 1, QJMTestUtil.CreateTxnData(1, 1)).Get();
     prep = ch.PrepareRecovery(1L).Get();
     System.Console.Error.WriteLine("Prep: " + prep);
     NUnit.Framework.Assert.IsFalse(prep.HasAcceptedInEpoch());
     NUnit.Framework.Assert.IsTrue(prep.HasSegmentState());
     // accept() should save the accepted value in persistent storage
     ch.AcceptRecovery(prep.GetSegmentState(), new Uri("file:///dev/null")).Get();
     // So another prepare() call from a new epoch would return this value
     ch.NewEpoch(2);
     ch.SetEpoch(2);
     prep = ch.PrepareRecovery(1L).Get();
     NUnit.Framework.Assert.AreEqual(1L, prep.GetAcceptedInEpoch());
     NUnit.Framework.Assert.AreEqual(1L, prep.GetSegmentState().GetEndTxId());
     // A prepare() or accept() call from an earlier epoch should now be rejected
     ch.SetEpoch(1);
     try
     {
         ch.PrepareRecovery(1L).Get();
         NUnit.Framework.Assert.Fail("prepare from earlier epoch not rejected");
     }
     catch (ExecutionException ioe)
     {
         GenericTestUtils.AssertExceptionContains("epoch 1 is less than the last promised epoch 2"
                                                  , ioe);
     }
     try
     {
         ch.AcceptRecovery(prep.GetSegmentState(), new Uri("file:///dev/null")).Get();
         NUnit.Framework.Assert.Fail("accept from earlier epoch not rejected");
     }
     catch (ExecutionException ioe)
     {
         GenericTestUtils.AssertExceptionContains("epoch 1 is less than the last promised epoch 2"
                                                  , ioe);
     }
 }
        /// <summary>Run recovery/synchronization for a specific segment.</summary>
        /// <remarks>
        /// Run recovery/synchronization for a specific segment.
        /// Postconditions:
        /// <ul>
        /// <li>This segment will be finalized on a majority
        /// of nodes.</li>
        /// <li>All nodes which contain the finalized segment will
        /// agree on the length.</li>
        /// </ul>
        /// </remarks>
        /// <param name="segmentTxId">the starting txid of the segment</param>
        /// <exception cref="System.IO.IOException"/>
        private void RecoverUnclosedSegment(long segmentTxId)
        {
            Preconditions.CheckArgument(segmentTxId > 0);
            Log.Info("Beginning recovery of unclosed segment starting at txid " + segmentTxId
                     );
            // Step 1. Prepare recovery
            QuorumCall <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> prepare
                = loggers.PrepareRecovery(segmentTxId);
            IDictionary <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> prepareResponses
                = loggers.WaitForWriteQuorum(prepare, prepareRecoveryTimeoutMs, "prepareRecovery("
                                             + segmentTxId + ")");

            Log.Info("Recovery prepare phase complete. Responses:\n" + QuorumCall.MapToString
                         (prepareResponses));
            // Determine the logger who either:
            // a) Has already accepted a previous proposal that's higher than any
            //    other
            //
            //  OR, if no such logger exists:
            //
            // b) Has the longest log starting at this transaction ID
            // TODO: we should collect any "ties" and pass the URL for all of them
            // when syncing, so we can tolerate failure during recovery better.
            KeyValuePair <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> bestEntry
                = Sharpen.Collections.Max(prepareResponses, SegmentRecoveryComparator.Instance);
            AsyncLogger bestLogger = bestEntry.Key;

            QJournalProtocolProtos.PrepareRecoveryResponseProto bestResponse = bestEntry.Value;
            // Log the above decision, check invariants.
            if (bestResponse.HasAcceptedInEpoch())
            {
                Log.Info("Using already-accepted recovery for segment " + "starting at txid " + segmentTxId
                         + ": " + bestEntry);
            }
            else
            {
                if (bestResponse.HasSegmentState())
                {
                    Log.Info("Using longest log: " + bestEntry);
                }
                else
                {
                    // None of the responses to prepareRecovery() had a segment at the given
                    // txid. This can happen for example in the following situation:
                    // - 3 JNs: JN1, JN2, JN3
                    // - writer starts segment 101 on JN1, then crashes before
                    //   writing to JN2 and JN3
                    // - during newEpoch(), we saw the segment on JN1 and decide to
                    //   recover segment 101
                    // - before prepare(), JN1 crashes, and we only talk to JN2 and JN3,
                    //   neither of which has any entry for this log.
                    // In this case, it is allowed to do nothing for recovery, since the
                    // segment wasn't started on a quorum of nodes.
                    // Sanity check: we should only get here if none of the responses had
                    // a log. This should be a postcondition of the recovery comparator,
                    // but a bug in the comparator might cause us to get here.
                    foreach (QJournalProtocolProtos.PrepareRecoveryResponseProto resp in prepareResponses
                             .Values)
                    {
                        System.Diagnostics.Debug.Assert(!resp.HasSegmentState(), "One of the loggers had a response, but no best logger "
                                                        + "was found.");
                    }
                    Log.Info("None of the responders had a log to recover: " + QuorumCall.MapToString
                                 (prepareResponses));
                    return;
                }
            }
            QJournalProtocolProtos.SegmentStateProto logToSync = bestResponse.GetSegmentState
                                                                     ();
            System.Diagnostics.Debug.Assert(segmentTxId == logToSync.GetStartTxId());
            // Sanity check: none of the loggers should be aware of a higher
            // txid than the txid we intend to truncate to
            foreach (KeyValuePair <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto
                                   > e in prepareResponses)
            {
                AsyncLogger logger = e.Key;
                QJournalProtocolProtos.PrepareRecoveryResponseProto resp = e.Value;
                if (resp.HasLastCommittedTxId() && resp.GetLastCommittedTxId() > logToSync.GetEndTxId
                        ())
                {
                    throw new Exception("Decided to synchronize log to " + logToSync + " but logger "
                                        + logger + " had seen txid " + resp.GetLastCommittedTxId() + " committed");
                }
            }
            Uri syncFromUrl = bestLogger.BuildURLToFetchLogs(segmentTxId);
            QuorumCall <AsyncLogger, Void> accept = loggers.AcceptRecovery(logToSync, syncFromUrl
                                                                           );

            loggers.WaitForWriteQuorum(accept, acceptRecoveryTimeoutMs, "acceptRecovery(" + TextFormat
                                       .ShortDebugString(logToSync) + ")");
            // If one of the loggers above missed the synchronization step above, but
            // we send a finalize() here, that's OK. It validates the log before
            // finalizing. Hence, even if it is not "in sync", it won't incorrectly
            // finalize.
            QuorumCall <AsyncLogger, Void> finalize = loggers.FinalizeLogSegment(logToSync.GetStartTxId
                                                                                     (), logToSync.GetEndTxId());

            loggers.WaitForWriteQuorum(finalize, finalizeSegmentTimeoutMs, string.Format("finalizeLogSegment(%s-%s)"
                                                                                         , logToSync.GetStartTxId(), logToSync.GetEndTxId()));
        }
Beispiel #3
0
                              > MakeEntry(QJournalProtocolProtos.PrepareRecoveryResponseProto proto)
 {
     return(Maps.ImmutableEntry(Org.Mockito.Mockito.Mock <AsyncLogger>(), proto));
 }