public virtual void TestReaderWhileAnotherWrites() { QuorumJournalManager readerQjm = CloseLater(CreateSpyingQJM()); IList <EditLogInputStream> streams = Lists.NewArrayList(); readerQjm.SelectInputStreams(streams, 0, false); NUnit.Framework.Assert.AreEqual(0, streams.Count); QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); readerQjm.SelectInputStreams(streams, 0, false); try { NUnit.Framework.Assert.AreEqual(1, streams.Count); // Validate the actual stream contents. EditLogInputStream stream = streams[0]; NUnit.Framework.Assert.AreEqual(1, stream.GetFirstTxId()); NUnit.Framework.Assert.AreEqual(3, stream.GetLastTxId()); QJMTestUtil.VerifyEdits(streams, 1, 3); NUnit.Framework.Assert.IsNull(stream.ReadOp()); } finally { IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0])); streams.Clear(); } // Ensure correct results when there is a stream in-progress, but we don't // ask for in-progress. QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, false); readerQjm.SelectInputStreams(streams, 0, false); try { NUnit.Framework.Assert.AreEqual(1, streams.Count); EditLogInputStream stream = streams[0]; NUnit.Framework.Assert.AreEqual(1, stream.GetFirstTxId()); NUnit.Framework.Assert.AreEqual(3, stream.GetLastTxId()); QJMTestUtil.VerifyEdits(streams, 1, 3); } finally { IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0])); streams.Clear(); } // TODO: check results for selectInputStreams with inProgressOK = true. // This doesn't currently work, due to a bug where RedundantEditInputStream // throws an exception if there are any unvalidated in-progress edits in the list! // But, it shouldn't be necessary for current use cases. qjm.FinalizeLogSegment(4, 6); readerQjm.SelectInputStreams(streams, 0, false); try { NUnit.Framework.Assert.AreEqual(2, streams.Count); NUnit.Framework.Assert.AreEqual(4, streams[1].GetFirstTxId()); NUnit.Framework.Assert.AreEqual(6, streams[1].GetLastTxId()); QJMTestUtil.VerifyEdits(streams, 1, 6); } finally { IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0])); streams.Clear(); } }
public virtual void TestNewerVersionOfSegmentWins() { SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery(); // Now start writing again without JN0 present: cluster.GetJournalNode(0).StopAndJoin(0); qjm = CreateSpyingQJM(); try { NUnit.Framework.Assert.AreEqual(100, QJMTestUtil.RecoverAndReturnLastTxn(qjm)); // Write segment but do not finalize QJMTestUtil.WriteSegment(cluster, qjm, 101, 50, false); } finally { qjm.Close(); } // Now try to recover a new writer, with JN0 present, // and ensure that all of the above-written transactions are recovered. cluster.RestartJournalNode(0); qjm = CreateSpyingQJM(); try { NUnit.Framework.Assert.AreEqual(150, QJMTestUtil.RecoverAndReturnLastTxn(qjm)); } finally { qjm.Close(); } }
public virtual void TestWriteEdits() { EditLogOutputStream stm = CreateLogSegment(); QJMTestUtil.WriteOp(stm, 1); QJMTestUtil.WriteOp(stm, 2); stm.SetReadyToFlush(); QJMTestUtil.WriteOp(stm, 3); // The flush should log txn 1-2 FutureReturns(null).When(spyLoggers[0]).SendEdits(Matchers.AnyLong(), Matchers.Eq (1L), Matchers.Eq(2), Org.Mockito.Mockito.Any <byte[]>()); FutureReturns(null).When(spyLoggers[1]).SendEdits(Matchers.AnyLong(), Matchers.Eq (1L), Matchers.Eq(2), Org.Mockito.Mockito.Any <byte[]>()); FutureReturns(null).When(spyLoggers[2]).SendEdits(Matchers.AnyLong(), Matchers.Eq (1L), Matchers.Eq(2), Org.Mockito.Mockito.Any <byte[]>()); stm.Flush(); // Another flush should now log txn #3 stm.SetReadyToFlush(); FutureReturns(null).When(spyLoggers[0]).SendEdits(Matchers.AnyLong(), Matchers.Eq (3L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>()); FutureReturns(null).When(spyLoggers[1]).SendEdits(Matchers.AnyLong(), Matchers.Eq (3L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>()); FutureReturns(null).When(spyLoggers[2]).SendEdits(Matchers.AnyLong(), Matchers.Eq (3L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>()); stm.Flush(); }
public virtual void TestPurgeLogs() { for (int txid = 1; txid <= 5; txid++) { QJMTestUtil.WriteSegment(cluster, qjm, txid, 1, true); } FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid); GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 1), NNStorage.GetFinalizedEditsFileName(2, 2), NNStorage.GetFinalizedEditsFileName (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName (5, 5)); FilePath paxosDir = new FilePath(curDir, "paxos"); GenericTestUtils.AssertExists(paxosDir); // Create new files in the paxos directory, which should get purged too. NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "1").CreateNewFile()); NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "3").CreateNewFile()); GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "1", "3"); // Create some temporary files of the sort that are used during recovery. NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000001.epoch=140" ).CreateNewFile()); NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000002.empty" ).CreateNewFile()); qjm.PurgeLogsOlderThan(3); // Log purging is asynchronous, so we have to wait for the calls // to be sent and respond before verifying. WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); // Older edits should be purged GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName (5, 5)); // Older paxos files should be purged GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "3"); }
/// <summary>Test finalizing a segment after some batch of edits were missed.</summary> /// <remarks> /// Test finalizing a segment after some batch of edits were missed. /// This should fail, since we validate the log before finalization. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestFinalizeWhenEditsAreMissed() { journal.NewEpoch(FakeNsinfo, 1); journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion); journal.Journal(MakeRI(2), 1, 1, 3, QJMTestUtil.CreateTxnData(1, 3)); // Try to finalize up to txn 6, even though we only wrote up to txn 3. try { journal.FinalizeLogSegment(MakeRI(3), 1, 6); NUnit.Framework.Assert.Fail("did not fail to finalize"); } catch (JournalOutOfSyncException e) { GenericTestUtils.AssertExceptionContains("but only written up to txid 3", e); } // Check that, even if we re-construct the journal by scanning the // disk, we don't allow finalizing incorrectly. journal.Close(); journal = new Journal(conf, TestLogDir, Jid, HdfsServerConstants.StartupOption.Regular , mockErrorReporter); try { journal.FinalizeLogSegment(MakeRI(4), 1, 6); NUnit.Framework.Assert.Fail("did not fail to finalize"); } catch (JournalOutOfSyncException e) { GenericTestUtils.AssertExceptionContains("disk only contains up to txid 3", e); } }
public virtual void TestOneJNMissingSegments() { QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); cluster.GetJournalNode(0).StopAndJoin(0); QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true); WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); cluster.RestartJournalNode(0); QJMTestUtil.WriteSegment(cluster, qjm, 7, 3, true); WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); cluster.GetJournalNode(1).StopAndJoin(0); QuorumJournalManager readerQjm = CreateSpyingQJM(); IList <EditLogInputStream> streams = Lists.NewArrayList(); try { readerQjm.SelectInputStreams(streams, 1, false); QJMTestUtil.VerifyEdits(streams, 1, 9); } finally { IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0])); readerQjm.Close(); } }
public virtual void TestSingleWriter() { QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); // Should be finalized CheckRecovery(cluster, 1, 3); // Start a new segment QJMTestUtil.WriteSegment(cluster, qjm, 4, 1, true); // Should be finalized CheckRecovery(cluster, 4, 4); }
/// <exception cref="System.Exception"/> public virtual void TestCrashBetweenSyncLogAndPersistPaxosData() { JournalFaultInjector faultInjector = JournalFaultInjector.instance = Org.Mockito.Mockito .Mock <JournalFaultInjector>(); SetupLoggers345(); // Run recovery where the client only talks to JN0, JN1, such that it // decides that the correct length is through txid 4. // Only allow it to call acceptRecovery() on JN0. qjm = CreateSpyingQJM(); spies = qjm.GetLoggerSetForTests().GetLoggersForTests(); cluster.GetJournalNode(2).StopAndJoin(0); InjectIOE().When(spies[1]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto >(), Org.Mockito.Mockito.Any <Uri>()); TryRecoveryExpectingFailure(); cluster.RestartJournalNode(2); // State at this point: // JN0: edit log for 1-4, paxos recovery data for txid 4 // JN1: edit log for 1-4, // JN2: edit log for 1-5 // Run recovery again, but don't allow JN0 to respond to the // prepareRecovery() call. This will cause recovery to decide // on txid 5. // Additionally, crash all of the nodes before they persist // any new paxos data. qjm = CreateSpyingQJM(); spies = qjm.GetLoggerSetForTests().GetLoggersForTests(); InjectIOE().When(spies[0]).PrepareRecovery(Org.Mockito.Mockito.Eq(1L)); Org.Mockito.Mockito.DoThrow(new IOException("Injected")).When(faultInjector).BeforePersistPaxosData (); TryRecoveryExpectingFailure(); Org.Mockito.Mockito.Reset(faultInjector); // State at this point: // JN0: edit log for 1-5, paxos recovery data for txid 4 // !!! This is the interesting bit, above. The on-disk data and the // paxos data don't match up! // JN1: edit log for 1-5, // JN2: edit log for 1-5, // Now, stop JN2, and see if we can still start up even though // JN0 is in a strange state where its log data is actually newer // than its accepted Paxos state. cluster.GetJournalNode(2).StopAndJoin(0); qjm = CreateSpyingQJM(); try { long recovered = QJMTestUtil.RecoverAndReturnLastTxn(qjm); NUnit.Framework.Assert.IsTrue(recovered >= 4); } finally { // 4 was committed to a quorum qjm.Close(); } }
/// <summary> /// Test that, if the writer crashes at the very beginning of a segment, /// before any transactions are written, that the next newEpoch() call /// returns the prior segment txid as its most recent segment. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestNewEpochAtBeginningOfSegment() { journal.NewEpoch(FakeNsinfo, 1); journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion); journal.Journal(MakeRI(2), 1, 1, 2, QJMTestUtil.CreateTxnData(1, 2)); journal.FinalizeLogSegment(MakeRI(3), 1, 2); journal.StartLogSegment(MakeRI(4), 3, NameNodeLayoutVersion.CurrentLayoutVersion); QJournalProtocolProtos.NewEpochResponseProto resp = journal.NewEpoch(FakeNsinfo, 2); NUnit.Framework.Assert.AreEqual(1, resp.GetLastSegmentTxId()); }
public virtual void TestChangeWritersLogsInSync() { QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, false); QJMTestUtil.AssertExistsInQuorum(cluster, NNStorage.GetInProgressEditsFileName(1) ); // Make a new QJM qjm = CloseLater(new QuorumJournalManager(conf, cluster.GetQuorumJournalURI(QJMTestUtil .Jid), QJMTestUtil.FakeNsinfo)); qjm.RecoverUnfinalizedSegments(); CheckRecovery(cluster, 1, 3); }
/// <exception cref="System.Exception"/> public virtual void TestMaintainCommittedTxId() { journal.NewEpoch(FakeNsinfo, 1); journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion); // Send txids 1-3, with a request indicating only 0 committed journal.Journal(new RequestInfo(Jid, 1, 2, 0), 1, 1, 3, QJMTestUtil.CreateTxnData (1, 3)); NUnit.Framework.Assert.AreEqual(0, journal.GetCommittedTxnIdForTests()); // Send 4-6, with request indicating that through 3 is committed. journal.Journal(new RequestInfo(Jid, 1, 3, 3), 1, 4, 3, QJMTestUtil.CreateTxnData (4, 6)); NUnit.Framework.Assert.AreEqual(3, journal.GetCommittedTxnIdForTests()); }
/// <exception cref="System.Exception"/> private void DoOutOfSyncTest(int missingOnRecoveryIdx, long expectedRecoveryTxnId ) { SetupLoggers345(); QJMTestUtil.AssertExistsInQuorum(cluster, NNStorage.GetInProgressEditsFileName(1) ); // Shut down the specified JN, so it's not present during recovery. cluster.GetJournalNode(missingOnRecoveryIdx).StopAndJoin(0); // Make a new QJM qjm = CreateSpyingQJM(); qjm.RecoverUnfinalizedSegments(); CheckRecovery(cluster, 1, expectedRecoveryTxnId); }
public virtual void TestRecoverAfterDoubleFailures() { long MaxIpcNumber = DetermineMaxIpcNumber(); for (int failA = 1; failA <= MaxIpcNumber; failA++) { for (int failB = 1; failB <= MaxIpcNumber; failB++) { string injectionStr = "(" + failA + ", " + failB + ")"; Log.Info("\n\n-------------------------------------------\n" + "Beginning test, failing at " + injectionStr + "\n" + "-------------------------------------------\n\n"); MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).Build(); QuorumJournalManager qjm = null; try { qjm = CreateInjectableQJM(cluster); qjm.Format(QJMTestUtil.FakeNsinfo); IList <AsyncLogger> loggers = qjm.GetLoggerSetForTests().GetLoggersForTests(); FailIpcNumber(loggers[0], failA); FailIpcNumber(loggers[1], failB); int lastAckedTxn = DoWorkload(cluster, qjm); if (lastAckedTxn < 6) { Log.Info("Failed after injecting failures at " + injectionStr + ". This is expected since we injected a failure in the " + "majority."); } qjm.Close(); qjm = null; // Now should be able to recover qjm = CreateInjectableQJM(cluster); long lastRecoveredTxn = QJMTestUtil.RecoverAndReturnLastTxn(qjm); NUnit.Framework.Assert.IsTrue(lastRecoveredTxn >= lastAckedTxn); QJMTestUtil.WriteSegment(cluster, qjm, lastRecoveredTxn + 1, 3, true); } catch (Exception t) { // Test failure! Rethrow with the test setup info so it can be // easily triaged. throw new RuntimeException("Test failed with injection: " + injectionStr, t); } finally { cluster.Shutdown(); cluster = null; IOUtils.CloseStream(qjm); qjm = null; } } } }
/// <summary> /// Assume that a client is writing to a journal, but loses its connection /// in the middle of a segment. /// </summary> /// <remarks> /// Assume that a client is writing to a journal, but loses its connection /// in the middle of a segment. Thus, any future journal() calls in that /// segment may fail, because some txns were missed while the connection was /// down. /// Eventually, the connection comes back, and the NN tries to start a new /// segment at a higher txid. This should abort the old one and succeed. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestAbortOldSegmentIfFinalizeIsMissed() { journal.NewEpoch(FakeNsinfo, 1); // Start a segment at txid 1, and write a batch of 3 txns. journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion); journal.Journal(MakeRI(2), 1, 1, 3, QJMTestUtil.CreateTxnData(1, 3)); GenericTestUtils.AssertExists(journal.GetStorage().GetInProgressEditLog(1)); // Try to start new segment at txid 6, this should abort old segment and // then succeed, allowing us to write txid 6-9. journal.StartLogSegment(MakeRI(3), 6, NameNodeLayoutVersion.CurrentLayoutVersion); journal.Journal(MakeRI(4), 6, 6, 3, QJMTestUtil.CreateTxnData(6, 3)); // The old segment should *not* be finalized. GenericTestUtils.AssertExists(journal.GetStorage().GetInProgressEditLog(1)); GenericTestUtils.AssertExists(journal.GetStorage().GetInProgressEditLog(6)); }
/// <exception cref="System.IO.IOException"/> private void TryRecoveryExpectingFailure() { try { QJMTestUtil.RecoverAndReturnLastTxn(qjm); NUnit.Framework.Assert.Fail("Expected to fail recovery"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("Injected", qe); } finally { qjm.Close(); } }
public virtual void TestSelectInputStreamsNotOnBoundary() { int txIdsPerSegment = 10; for (int txid = 1; txid <= 5 * txIdsPerSegment; txid += txIdsPerSegment) { QJMTestUtil.WriteSegment(cluster, qjm, txid, txIdsPerSegment, true); } FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid); GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName (1, 10), NNStorage.GetFinalizedEditsFileName(11, 20), NNStorage.GetFinalizedEditsFileName (21, 30), NNStorage.GetFinalizedEditsFileName(31, 40), NNStorage.GetFinalizedEditsFileName (41, 50)); AList <EditLogInputStream> streams = new AList <EditLogInputStream>(); qjm.SelectInputStreams(streams, 25, false); QJMTestUtil.VerifyEdits(streams, 25, 50); }
/// <summary> /// Run a simple workload of becoming the active writer and writing /// two log segments: 1-3 and 4-6. /// </summary> /// <exception cref="System.IO.IOException"/> private static int DoWorkload(MiniJournalCluster cluster, QuorumJournalManager qjm ) { int lastAcked = 0; try { qjm.RecoverUnfinalizedSegments(); QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); lastAcked = 3; QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true); lastAcked = 6; } catch (QuorumException qe) { Log.Info("Failed to write at txid " + lastAcked, qe); } return(lastAcked); }
public virtual void TestWriteEditsOneSlow() { EditLogOutputStream stm = CreateLogSegment(); QJMTestUtil.WriteOp(stm, 1); stm.SetReadyToFlush(); // Make the first two logs respond immediately FutureReturns(null).When(spyLoggers[0]).SendEdits(Matchers.AnyLong(), Matchers.Eq (1L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>()); FutureReturns(null).When(spyLoggers[1]).SendEdits(Matchers.AnyLong(), Matchers.Eq (1L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>()); // And the third log not respond SettableFuture <Void> slowLog = SettableFuture.Create(); Org.Mockito.Mockito.DoReturn(slowLog).When(spyLoggers[2]).SendEdits(Matchers.AnyLong (), Matchers.Eq(1L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>()); stm.Flush(); Org.Mockito.Mockito.Verify(spyLoggers[0]).SetCommittedTxId(1L); }
public virtual void TestMissFinalizeAndNextStart() { // Logger 0: miss finalize(1-3) and start(4) TestQuorumJournalManagerUnit.FutureThrows(new IOException("injected")).When(spies [0]).FinalizeLogSegment(Org.Mockito.Mockito.Eq(1L), Org.Mockito.Mockito.Eq(3L)); TestQuorumJournalManagerUnit.FutureThrows(new IOException("injected")).When(spies [0]).StartLogSegment(Org.Mockito.Mockito.Eq(4L), Org.Mockito.Mockito.Eq(NameNodeLayoutVersion .CurrentLayoutVersion)); // Logger 1: fail at txn id 4 FailLoggerAtTxn(spies[1], 4L); QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion ); try { QJMTestUtil.WriteTxns(stm, 4, 1); NUnit.Framework.Assert.Fail("Did not fail to write"); } catch (QuorumException qe) { // Should fail, because logger 1 had an injected fault and // logger 0 should detect writer out of sync GenericTestUtils.AssertExceptionContains("Writer out of sync", qe); } finally { stm.Abort(); qjm.Close(); } // State: // Logger 0: 1-3 in-progress (since it missed finalize) // Logger 1: 1-3 finalized // Logger 2: 1-3 finalized, 4 in-progress with one txn // Shut down logger 2 so it doesn't participate in recovery cluster.GetJournalNode(2).StopAndJoin(0); qjm = CreateSpyingQJM(); long recovered = QJMTestUtil.RecoverAndReturnLastTxn(qjm); NUnit.Framework.Assert.AreEqual(3L, recovered); }
/// <exception cref="System.Exception"/> public virtual void TestRestartJournal() { journal.NewEpoch(FakeNsinfo, 1); journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion); journal.Journal(MakeRI(2), 1, 1, 2, QJMTestUtil.CreateTxnData(1, 2)); // Don't finalize. string storageString = journal.GetStorage().ToColonSeparatedString(); System.Console.Error.WriteLine("storage string: " + storageString); journal.Close(); // close to unlock the storage dir // Now re-instantiate, make sure history is still there journal = new Journal(conf, TestLogDir, Jid, HdfsServerConstants.StartupOption.Regular , mockErrorReporter); // The storage info should be read, even if no writer has taken over. NUnit.Framework.Assert.AreEqual(storageString, journal.GetStorage().ToColonSeparatedString ()); NUnit.Framework.Assert.AreEqual(1, journal.GetLastPromisedEpoch()); QJournalProtocolProtos.NewEpochResponseProtoOrBuilder newEpoch = journal.NewEpoch (FakeNsinfo, 2); NUnit.Framework.Assert.AreEqual(1, newEpoch.GetLastSegmentTxId()); }
/// <exception cref="System.Exception"/> public virtual void TestHttpServer() { string urlRoot = jn.GetHttpServerURI(); // Check default servlets. string pageContents = DFSTestUtil.UrlGet(new Uri(urlRoot + "/jmx")); NUnit.Framework.Assert.IsTrue("Bad contents: " + pageContents, pageContents.Contains ("Hadoop:service=JournalNode,name=JvmMetrics")); // Create some edits on server side byte[] EditsData = QJMTestUtil.CreateTxnData(1, 3); IPCLoggerChannel ch = new IPCLoggerChannel(conf, FakeNsinfo, journalId, jn.GetBoundIpcAddress ()); ch.NewEpoch(1).Get(); ch.SetEpoch(1); ch.StartLogSegment(1, NameNodeLayoutVersion.CurrentLayoutVersion).Get(); ch.SendEdits(1L, 1, 3, EditsData).Get(); ch.FinalizeLogSegment(1, 3).Get(); // Attempt to retrieve via HTTP, ensure we get the data back // including the header we expected byte[] retrievedViaHttp = DFSTestUtil.UrlGetBytes(new Uri(urlRoot + "/getJournal?segmentTxId=1&jid=" + journalId)); byte[] expected = Bytes.Concat(Ints.ToByteArray(HdfsConstants.NamenodeLayoutVersion ), (new byte[] { 0, 0, 0, 0 }), EditsData); // layout flags section Assert.AssertArrayEquals(expected, retrievedViaHttp); // Attempt to fetch a non-existent file, check that we get an // error status code Uri badUrl = new Uri(urlRoot + "/getJournal?segmentTxId=12345&jid=" + journalId); HttpURLConnection connection = (HttpURLConnection)badUrl.OpenConnection(); try { NUnit.Framework.Assert.AreEqual(404, connection.GetResponseCode()); } finally { connection.Disconnect(); } }
public virtual void TestNewerVersionOfSegmentWins2() { SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery(); // Recover without JN0 present. cluster.GetJournalNode(0).StopAndJoin(0); qjm = CreateSpyingQJM(); try { NUnit.Framework.Assert.AreEqual(100, QJMTestUtil.RecoverAndReturnLastTxn(qjm)); // After recovery, JN0 comes back to life and JN1 crashes. cluster.RestartJournalNode(0); cluster.GetJournalNode(1).StopAndJoin(0); // Write segment but do not finalize QJMTestUtil.WriteSegment(cluster, qjm, 101, 50, false); } finally { qjm.Close(); } // State: // JN0: 1-100 finalized, 101_inprogress (txns up to 150) // Previously, JN0 had an accepted recovery 101-101 from an earlier recovery // attempt. // JN1: 1-100 finalized // JN2: 1-100 finalized, 101_inprogress (txns up to 150) // We need to test that the accepted recovery 101-101 on JN0 doesn't // end up truncating the log back to 101. cluster.RestartJournalNode(1); cluster.GetJournalNode(2).StopAndJoin(0); qjm = CreateSpyingQJM(); try { NUnit.Framework.Assert.AreEqual(150, QJMTestUtil.RecoverAndReturnLastTxn(qjm)); } finally { qjm.Close(); } }
/// <summary> /// Test behavior of startLogSegment() when a segment with the /// same transaction ID already exists. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestStartLogSegmentWhenAlreadyExists() { journal.NewEpoch(FakeNsinfo, 1); // Start a segment at txid 1, and write just 1 transaction. This // would normally be the START_LOG_SEGMENT transaction. journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion); journal.Journal(MakeRI(2), 1, 1, 1, QJMTestUtil.CreateTxnData(1, 1)); // Try to start new segment at txid 1, this should succeed, because // we are allowed to re-start a segment if we only ever had the // START_LOG_SEGMENT transaction logged. journal.StartLogSegment(MakeRI(3), 1, NameNodeLayoutVersion.CurrentLayoutVersion); journal.Journal(MakeRI(4), 1, 1, 1, QJMTestUtil.CreateTxnData(1, 1)); // This time through, write more transactions afterwards, simulating // real user transactions. journal.Journal(MakeRI(5), 1, 2, 3, QJMTestUtil.CreateTxnData(2, 3)); try { journal.StartLogSegment(MakeRI(6), 1, NameNodeLayoutVersion.CurrentLayoutVersion); NUnit.Framework.Assert.Fail("Did not fail to start log segment which would overwrite " + "an existing one"); } catch (InvalidOperationException ise) { GenericTestUtils.AssertExceptionContains("seems to contain valid transactions", ise ); } journal.FinalizeLogSegment(MakeRI(7), 1, 4); // Ensure that we cannot overwrite a finalized segment try { journal.StartLogSegment(MakeRI(8), 1, NameNodeLayoutVersion.CurrentLayoutVersion); NUnit.Framework.Assert.Fail("Did not fail to start log segment which would overwrite " + "an existing one"); } catch (InvalidOperationException ise) { GenericTestUtils.AssertExceptionContains("have a finalized segment", ise); } }
public virtual void TestScanEditLog() { // use a future layout version journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion - 1); // in the segment we write garbage editlog, which can be scanned but // cannot be decoded int numTxns = 5; byte[] ops = QJMTestUtil.CreateGabageTxns(1, 5); journal.Journal(MakeRI(2), 1, 1, numTxns, ops); // verify the in-progress editlog segment QJournalProtocolProtos.SegmentStateProto segmentState = journal.GetSegmentInfo(1); NUnit.Framework.Assert.IsTrue(segmentState.GetIsInProgress()); NUnit.Framework.Assert.AreEqual(numTxns, segmentState.GetEndTxId()); NUnit.Framework.Assert.AreEqual(1, segmentState.GetStartTxId()); // finalize the segment and verify it again journal.FinalizeLogSegment(MakeRI(3), 1, numTxns); segmentState = journal.GetSegmentInfo(1); NUnit.Framework.Assert.IsFalse(segmentState.GetIsInProgress()); NUnit.Framework.Assert.AreEqual(numTxns, segmentState.GetEndTxId()); NUnit.Framework.Assert.AreEqual(1, segmentState.GetStartTxId()); }
public virtual void TestCrashAtBeginningOfSegment() { QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion ); try { WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); } finally { stm.Abort(); } // Make a new QJM qjm = CloseLater(new QuorumJournalManager(conf, cluster.GetQuorumJournalURI(QJMTestUtil .Jid), QJMTestUtil.FakeNsinfo)); qjm.RecoverUnfinalizedSegments(); CheckRecovery(cluster, 1, 3); QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true); }
/// <exception cref="System.Exception"/> public virtual void TestReturnsSegmentInfoAtEpochTransition() { ch.NewEpoch(1).Get(); ch.SetEpoch(1); ch.StartLogSegment(1, NameNodeLayoutVersion.CurrentLayoutVersion).Get(); ch.SendEdits(1L, 1, 2, QJMTestUtil.CreateTxnData(1, 2)).Get(); // Switch to a new epoch without closing earlier segment QJournalProtocolProtos.NewEpochResponseProto response = ch.NewEpoch(2).Get(); ch.SetEpoch(2); NUnit.Framework.Assert.AreEqual(1, response.GetLastSegmentTxId()); ch.FinalizeLogSegment(1, 2).Get(); // Switch to a new epoch after just closing the earlier segment. response = ch.NewEpoch(3).Get(); ch.SetEpoch(3); NUnit.Framework.Assert.AreEqual(1, response.GetLastSegmentTxId()); // Start a segment but don't write anything, check newEpoch segment info ch.StartLogSegment(3, NameNodeLayoutVersion.CurrentLayoutVersion).Get(); response = ch.NewEpoch(4).Get(); ch.SetEpoch(4); // Because the new segment is empty, it is equivalent to not having // started writing it. Hence, we should return the prior segment txid. NUnit.Framework.Assert.AreEqual(1, response.GetLastSegmentTxId()); }
/// <summary> /// Set up the loggers into the following state: /// - JN0: edits 1-3 in progress /// - JN1: edits 1-4 in progress /// - JN2: edits 1-5 in progress /// None of the loggers have any associated paxos info. /// </summary> /// <exception cref="System.Exception"/> private void SetupLoggers345() { EditLogOutputStream stm = qjm.StartLogSegment(1, NameNodeLayoutVersion.CurrentLayoutVersion ); FailLoggerAtTxn(spies[0], 4); FailLoggerAtTxn(spies[1], 5); QJMTestUtil.WriteTxns(stm, 1, 3); // This should succeed to 2/3 loggers QJMTestUtil.WriteTxns(stm, 4, 1); // This should only succeed to 1 logger (index 2). Hence it should // fail try { QJMTestUtil.WriteTxns(stm, 5, 1); NUnit.Framework.Assert.Fail("Did not fail to write when only a minority succeeded" ); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("too many exceptions to achieve quorum size 2/3" , qe); } }
private long WriteSegmentUntilCrash(MiniJournalCluster cluster, QuorumJournalManager qjm, long txid, int numTxns, Holder <Exception> thrown) { long firstTxId = txid; long lastAcked = txid - 1; try { EditLogOutputStream stm = qjm.StartLogSegment(txid, NameNodeLayoutVersion.CurrentLayoutVersion ); for (int i = 0; i < numTxns; i++) { QJMTestUtil.WriteTxns(stm, txid++, 1); lastAcked++; } stm.Close(); qjm.FinalizeLogSegment(firstTxId, lastAcked); } catch (Exception t) { thrown.held = t; } return(lastAcked); }
/// <summary> /// Set up the following tricky edge case state which is used by /// multiple tests: /// Initial writer: /// - Writing to 3 JNs: JN0, JN1, JN2: /// - A log segment with txnid 1 through 100 succeeds. /// </summary> /// <remarks> /// Set up the following tricky edge case state which is used by /// multiple tests: /// Initial writer: /// - Writing to 3 JNs: JN0, JN1, JN2: /// - A log segment with txnid 1 through 100 succeeds. /// - The first transaction in the next segment only goes to JN0 /// before the writer crashes (eg it is partitioned) /// Recovery by another writer: /// - The new NN starts recovery and talks to all three. Thus, it sees /// that the newest log segment which needs recovery is 101. /// - It sends the prepareRecovery(101) call, and decides that the /// recovery length for 101 is only the 1 transaction. /// - It sends acceptRecovery(101-101) to only JN0, before crashing /// This yields the following state: /// - JN0: 1-100 finalized, 101_inprogress, accepted recovery: 101-101 /// - JN1: 1-100 finalized, 101_inprogress.empty /// - JN2: 1-100 finalized, 101_inprogress.empty /// (the .empty files got moved aside during recovery) /// </remarks> /// <exception cref="System.Exception"></exception> private void SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery() { // Log segment with txns 1-100 succeeds QJMTestUtil.WriteSegment(cluster, qjm, 1, 100, true); // startLogSegment only makes it to one of the three nodes FailLoggerAtTxn(spies[1], 101); FailLoggerAtTxn(spies[2], 101); try { QJMTestUtil.WriteSegment(cluster, qjm, 101, 1, true); NUnit.Framework.Assert.Fail("Should have failed"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } finally { qjm.Close(); } // Recovery 1: // make acceptRecovery() only make it to the node which has txid 101 // this should fail because only 1/3 accepted the recovery qjm = CreateSpyingQJM(); spies = qjm.GetLoggerSetForTests().GetLoggersForTests(); TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies [1]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto >(), Org.Mockito.Mockito.Any <Uri>()); TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies [2]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto >(), Org.Mockito.Mockito.Any <Uri>()); try { qjm.RecoverUnfinalizedSegments(); NUnit.Framework.Assert.Fail("Should have failed to recover"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } finally { qjm.Close(); } // Check that we have entered the expected state as described in the // method javadoc. GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "edits_.*" , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName (101)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "edits_.*" , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName (101) + ".empty"); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "edits_.*" , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName (101) + ".empty"); FilePath paxos0 = new FilePath(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "paxos" ); FilePath paxos1 = new FilePath(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "paxos" ); FilePath paxos2 = new FilePath(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "paxos" ); GenericTestUtils.AssertGlobEquals(paxos0, ".*", "101"); GenericTestUtils.AssertGlobEquals(paxos1, ".*"); GenericTestUtils.AssertGlobEquals(paxos2, ".*"); }
/// <summary> /// Test the case where, at the beginning of a segment, transactions /// have been written to one JN but not others. /// </summary> /// <exception cref="System.Exception"/> public virtual void DoTestOutOfSyncAtBeginningOfSegment(int nodeWithOneTxn) { int nodeWithEmptySegment = (nodeWithOneTxn + 1) % 3; int nodeMissingSegment = (nodeWithOneTxn + 2) % 3; QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true); WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); cluster.GetJournalNode(nodeMissingSegment).StopAndJoin(0); // Open segment on 2/3 nodes EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion ); try { WaitForAllPendingCalls(qjm.GetLoggerSetForTests()); // Write transactions to only 1/3 nodes FailLoggerAtTxn(spies[nodeWithEmptySegment], 4); try { QJMTestUtil.WriteTxns(stm, 4, 1); NUnit.Framework.Assert.Fail("Did not fail even though 2/3 failed"); } catch (QuorumException qe) { GenericTestUtils.AssertExceptionContains("mock failure", qe); } } finally { stm.Abort(); } // Bring back the down JN. cluster.RestartJournalNode(nodeMissingSegment); // Make a new QJM. At this point, the state is as follows: // A: nodeWithEmptySegment: 1-3 finalized, 4_inprogress (empty) // B: nodeWithOneTxn: 1-3 finalized, 4_inprogress (1 txn) // C: nodeMissingSegment: 1-3 finalized GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithEmptySegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithOneTxn, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName (4)); GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeMissingSegment, QJMTestUtil .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3)); // Stop one of the nodes. Since we run this test three // times, rotating the roles of the nodes, we'll test // all the permutations. cluster.GetJournalNode(2).StopAndJoin(0); qjm = CreateSpyingQJM(); qjm.RecoverUnfinalizedSegments(); if (nodeWithOneTxn == 0 || nodeWithOneTxn == 1) { // If the node that had the transaction committed was one of the nodes // that responded during recovery, then we should have recovered txid // 4. CheckRecovery(cluster, 4, 4); QJMTestUtil.WriteSegment(cluster, qjm, 5, 3, true); } else { // Otherwise, we should have recovered only 1-3 and should be able to // start a segment at 4. CheckRecovery(cluster, 1, 3); QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true); } }