/// <exception cref="System.IO.IOException"/> private void CheckRecovery(MiniJournalCluster cluster, long segmentTxId, long expectedEndTxId ) { int numFinalized = 0; for (int i = 0; i < cluster.GetNumNodes(); i++) { FilePath logDir = cluster.GetCurrentDir(i, QJMTestUtil.Jid); FileJournalManager.EditLogFile elf = FileJournalManager.GetLogFile(logDir, segmentTxId ); if (elf == null) { continue; } if (!elf.IsInProgress()) { numFinalized++; if (elf.GetLastTxId() != expectedEndTxId) { NUnit.Framework.Assert.Fail("File " + elf + " finalized to wrong txid, expected " + expectedEndTxId); } } } if (numFinalized < cluster.GetQuorumSize()) { NUnit.Framework.Assert.Fail("Did not find a quorum of finalized logs starting at " + segmentTxId); } }
/// <summary> /// Test the case where, at the beginning of a segment, transactions /// have been written to one JN but not others. /// </summary> /// <exception cref="System.Exception"/> public virtual void DoTestOutOfSyncAtBeginningOfSegment(int nodeWithOneTxn) { int nodeWithEmptySegment = (nodeWithOneTxn + 1) % 3; int nodeMissingSegment = (nodeWithOneTxn + 2) % 3; QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); cluster.GetJournalNode(nodeMissingSegment).StopAndJoin(0); // Open segment on 2/3 nodes EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion ); try { WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); // Write transactions to only 1/3 nodes FailLoggerAtTxn(spies[nodeWithEmptySegment], 4); try { QJMTestUtil.WriteTxns(stm, 4, 1); NUnit.Framework.Assert.Fail("Did not fail even though 2/3 failed"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } } finally { stm.Abort(); } // Bring back the down JN. cluster.RestartJournalNode(nodeMissingSegment); // Make a new QJM. At this point, the state is as follows: // A: nodeWithEmptySegment: 1-3 finalized, 4_inprogress (empty) // B: nodeWithOneTxn: 1-3 finalized, 4_inprogress (1 txn) // C: nodeMissingSegment: 1-3 finalized GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithEmptySegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithOneTxn, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeMissingSegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3)); // Stop one of the nodes. Since we run this test three // times, rotating the roles of the nodes, we'll test // all the permutations. cluster.GetJournalNode(2).StopAndJoin(0); qjm = CreateSpyingQJM(); qjm.RecoverUnfinalizedSegments(); if (nodeWithOneTxn == 0 || nodeWithOneTxn == 1) { // If the node that had the transaction committed was one of the nodes // that responded during recovery, then we should have recovered txid // 4. CheckRecovery(cluster, 4, 4); QJMTestUtil.WriteSegment(cluster, qjm, 5, 3, true); } else { // Otherwise, we should have recovered only 1-3 and should be able to // start a segment at 4. CheckRecovery(cluster, 1, 3); QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true); } }