public virtual void TestMissFinalizeAndNextStart() { // Logger 0: miss finalize(1-3) and start(4) TestQuorumJournalManagerUnit.FutureThrows(new IOException("injected")).When(spies [0]).FinalizeLogSegment(Org.Mockito.Mockito.Eq(1L), Org.Mockito.Mockito.Eq(3L)); TestQuorumJournalManagerUnit.FutureThrows(new IOException("injected")).When(spies [0]).StartLogSegment(Org.Mockito.Mockito.Eq(4L), Org.Mockito.Mockito.Eq(NameNodeLayoutVersion .CurrentLayoutVersion)); // Logger 1: fail at txn id 4 FailLoggerAtTxn(spies[1], 4L); QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion ); try { QJMTestUtil.WriteTxns(stm, 4, 1); NUnit.Framework.Assert.Fail("Did not fail to write"); } catch (QuorumException qe) { // Should fail, because logger 1 had an injected fault and // logger 0 should detect writer out of sync GenericTestUtils.AssertExceptionContains("Writer out of sync", qe); } finally { stm.Abort(); qjm.Close(); } // State: // Logger 0: 1-3 in-progress (since it missed finalize) // Logger 1: 1-3 finalized // Logger 2: 1-3 finalized, 4 in-progress with one txn // Shut down logger 2 so it doesn't participate in recovery cluster.GetJournalNode(2).StopAndJoin(0); qjm = CreateSpyingQJM(); long recovered = QJMTestUtil.RecoverAndReturnLastTxn(qjm); NUnit.Framework.Assert.AreEqual(3L, recovered); }
/// <summary> /// Set up the loggers into the following state: /// - JN0: edits 1-3 in progress /// - JN1: edits 1-4 in progress /// - JN2: edits 1-5 in progress /// None of the loggers have any associated paxos info. /// </summary> /// <exception cref="System.Exception"/> private void SetupLoggers345() { EditLogOutputStream stm = qjm.StartLogSegment(1, NameNodeLayoutVersion.CurrentLayoutVersion ); FailLoggerAtTxn(spies[0], 4); FailLoggerAtTxn(spies[1], 5); QJMTestUtil.WriteTxns(stm, 1, 3); // This should succeed to 2/3 loggers QJMTestUtil.WriteTxns(stm, 4, 1); // This should only succeed to 1 logger (index 2). Hence it should // fail try { QJMTestUtil.WriteTxns(stm, 5, 1); NUnit.Framework.Assert.Fail("Did not fail to write when only a minority succeeded" ); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("too many exceptions to achieve quorum size 2/3" , qe); } }
private long WriteSegmentUntilCrash(MiniJournalCluster cluster, QuorumJournalManager qjm, long txid, int numTxns, Holder <Exception> thrown) { long firstTxId = txid; long lastAcked = txid - 1; try { EditLogOutputStream stm = qjm.StartLogSegment(txid, NameNodeLayoutVersion.CurrentLayoutVersion ); for (int i = 0; i < numTxns; i++) { QJMTestUtil.WriteTxns(stm, txid++, 1); lastAcked++; } stm.Close(); qjm.FinalizeLogSegment(firstTxId, lastAcked); } catch (Exception t) { thrown.held = t; } return(lastAcked); }
/// <summary> /// Test the case where, at the beginning of a segment, transactions /// have been written to one JN but not others. /// </summary> /// <exception cref="System.Exception"/> public virtual void DoTestOutOfSyncAtBeginningOfSegment(int nodeWithOneTxn) { int nodeWithEmptySegment = (nodeWithOneTxn + 1) % 3; int nodeMissingSegment = (nodeWithOneTxn + 2) % 3; QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); cluster.GetJournalNode(nodeMissingSegment).StopAndJoin(0); // Open segment on 2/3 nodes EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion ); try { WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); // Write transactions to only 1/3 nodes FailLoggerAtTxn(spies[nodeWithEmptySegment], 4); try { QJMTestUtil.WriteTxns(stm, 4, 1); NUnit.Framework.Assert.Fail("Did not fail even though 2/3 failed"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } } finally { stm.Abort(); } // Bring back the down JN. cluster.RestartJournalNode(nodeMissingSegment); // Make a new QJM. At this point, the state is as follows: // A: nodeWithEmptySegment: 1-3 finalized, 4_inprogress (empty) // B: nodeWithOneTxn: 1-3 finalized, 4_inprogress (1 txn) // C: nodeMissingSegment: 1-3 finalized GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithEmptySegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithOneTxn, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeMissingSegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3)); // Stop one of the nodes. Since we run this test three // times, rotating the roles of the nodes, we'll test // all the permutations. cluster.GetJournalNode(2).StopAndJoin(0); qjm = CreateSpyingQJM(); qjm.RecoverUnfinalizedSegments(); if (nodeWithOneTxn == 0 || nodeWithOneTxn == 1) { // If the node that had the transaction committed was one of the nodes // that responded during recovery, then we should have recovered txid // 4. CheckRecovery(cluster, 4, 4); QJMTestUtil.WriteSegment(cluster, qjm, 5, 3, true); } else { // Otherwise, we should have recovered only 1-3 and should be able to // start a segment at 4. CheckRecovery(cluster, 1, 3); QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true); } }