public virtual void TestReaderWhileAnotherWrites()
        {
            QuorumJournalManager       readerQjm = CloseLater(CreateSpyingQJM());
            IList <EditLogInputStream> streams   = Lists.NewArrayList();

            readerQjm.SelectInputStreams(streams, 0, false);
            NUnit.Framework.Assert.AreEqual(0, streams.Count);
            QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true);
            readerQjm.SelectInputStreams(streams, 0, false);
            try
            {
                NUnit.Framework.Assert.AreEqual(1, streams.Count);
                // Validate the actual stream contents.
                EditLogInputStream stream = streams[0];
                NUnit.Framework.Assert.AreEqual(1, stream.GetFirstTxId());
                NUnit.Framework.Assert.AreEqual(3, stream.GetLastTxId());
                QJMTestUtil.VerifyEdits(streams, 1, 3);
                NUnit.Framework.Assert.IsNull(stream.ReadOp());
            }
            finally
            {
                IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0]));
                streams.Clear();
            }
            // Ensure correct results when there is a stream in-progress, but we don't
            // ask for in-progress.
            QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, false);
            readerQjm.SelectInputStreams(streams, 0, false);
            try
            {
                NUnit.Framework.Assert.AreEqual(1, streams.Count);
                EditLogInputStream stream = streams[0];
                NUnit.Framework.Assert.AreEqual(1, stream.GetFirstTxId());
                NUnit.Framework.Assert.AreEqual(3, stream.GetLastTxId());
                QJMTestUtil.VerifyEdits(streams, 1, 3);
            }
            finally
            {
                IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0]));
                streams.Clear();
            }
            // TODO: check results for selectInputStreams with inProgressOK = true.
            // This doesn't currently work, due to a bug where RedundantEditInputStream
            // throws an exception if there are any unvalidated in-progress edits in the list!
            // But, it shouldn't be necessary for current use cases.
            qjm.FinalizeLogSegment(4, 6);
            readerQjm.SelectInputStreams(streams, 0, false);
            try
            {
                NUnit.Framework.Assert.AreEqual(2, streams.Count);
                NUnit.Framework.Assert.AreEqual(4, streams[1].GetFirstTxId());
                NUnit.Framework.Assert.AreEqual(6, streams[1].GetLastTxId());
                QJMTestUtil.VerifyEdits(streams, 1, 6);
            }
            finally
            {
                IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0]));
                streams.Clear();
            }
        }
 public virtual void TestNewerVersionOfSegmentWins()
 {
     SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery();
     // Now start writing again without JN0 present:
     cluster.GetJournalNode(0).StopAndJoin(0);
     qjm = CreateSpyingQJM();
     try
     {
         NUnit.Framework.Assert.AreEqual(100, QJMTestUtil.RecoverAndReturnLastTxn(qjm));
         // Write segment but do not finalize
         QJMTestUtil.WriteSegment(cluster, qjm, 101, 50, false);
     }
     finally
     {
         qjm.Close();
     }
     // Now try to recover a new writer, with JN0 present,
     // and ensure that all of the above-written transactions are recovered.
     cluster.RestartJournalNode(0);
     qjm = CreateSpyingQJM();
     try
     {
         NUnit.Framework.Assert.AreEqual(150, QJMTestUtil.RecoverAndReturnLastTxn(qjm));
     }
     finally
     {
         qjm.Close();
     }
 }
        public virtual void TestWriteEdits()
        {
            EditLogOutputStream stm = CreateLogSegment();

            QJMTestUtil.WriteOp(stm, 1);
            QJMTestUtil.WriteOp(stm, 2);
            stm.SetReadyToFlush();
            QJMTestUtil.WriteOp(stm, 3);
            // The flush should log txn 1-2
            FutureReturns(null).When(spyLoggers[0]).SendEdits(Matchers.AnyLong(), Matchers.Eq
                                                                  (1L), Matchers.Eq(2), Org.Mockito.Mockito.Any <byte[]>());
            FutureReturns(null).When(spyLoggers[1]).SendEdits(Matchers.AnyLong(), Matchers.Eq
                                                                  (1L), Matchers.Eq(2), Org.Mockito.Mockito.Any <byte[]>());
            FutureReturns(null).When(spyLoggers[2]).SendEdits(Matchers.AnyLong(), Matchers.Eq
                                                                  (1L), Matchers.Eq(2), Org.Mockito.Mockito.Any <byte[]>());
            stm.Flush();
            // Another flush should now log txn #3
            stm.SetReadyToFlush();
            FutureReturns(null).When(spyLoggers[0]).SendEdits(Matchers.AnyLong(), Matchers.Eq
                                                                  (3L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>());
            FutureReturns(null).When(spyLoggers[1]).SendEdits(Matchers.AnyLong(), Matchers.Eq
                                                                  (3L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>());
            FutureReturns(null).When(spyLoggers[2]).SendEdits(Matchers.AnyLong(), Matchers.Eq
                                                                  (3L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>());
            stm.Flush();
        }
        public virtual void TestPurgeLogs()
        {
            for (int txid = 1; txid <= 5; txid++)
            {
                QJMTestUtil.WriteSegment(cluster, qjm, txid, 1, true);
            }
            FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid);

            GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                  (1, 1), NNStorage.GetFinalizedEditsFileName(2, 2), NNStorage.GetFinalizedEditsFileName
                                                  (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName
                                                  (5, 5));
            FilePath paxosDir = new FilePath(curDir, "paxos");

            GenericTestUtils.AssertExists(paxosDir);
            // Create new files in the paxos directory, which should get purged too.
            NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "1").CreateNewFile());
            NUnit.Framework.Assert.IsTrue(new FilePath(paxosDir, "3").CreateNewFile());
            GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "1", "3");
            // Create some temporary files of the sort that are used during recovery.
            NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000001.epoch=140"
                                                       ).CreateNewFile());
            NUnit.Framework.Assert.IsTrue(new FilePath(curDir, "edits_inprogress_0000000000000000002.empty"
                                                       ).CreateNewFile());
            qjm.PurgeLogsOlderThan(3);
            // Log purging is asynchronous, so we have to wait for the calls
            // to be sent and respond before verifying.
            WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
            // Older edits should be purged
            GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                  (3, 3), NNStorage.GetFinalizedEditsFileName(4, 4), NNStorage.GetFinalizedEditsFileName
                                                  (5, 5));
            // Older paxos files should be purged
            GenericTestUtils.AssertGlobEquals(paxosDir, "\\d+", "3");
        }
Esempio n. 5
0
		/// <summary>Test finalizing a segment after some batch of edits were missed.</summary>
		/// <remarks>
		/// Test finalizing a segment after some batch of edits were missed.
		/// This should fail, since we validate the log before finalization.
		/// </remarks>
		/// <exception cref="System.Exception"/>
		public virtual void TestFinalizeWhenEditsAreMissed()
		{
			journal.NewEpoch(FakeNsinfo, 1);
			journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion);
			journal.Journal(MakeRI(2), 1, 1, 3, QJMTestUtil.CreateTxnData(1, 3));
			// Try to finalize up to txn 6, even though we only wrote up to txn 3.
			try
			{
				journal.FinalizeLogSegment(MakeRI(3), 1, 6);
				NUnit.Framework.Assert.Fail("did not fail to finalize");
			}
			catch (JournalOutOfSyncException e)
			{
				GenericTestUtils.AssertExceptionContains("but only written up to txid 3", e);
			}
			// Check that, even if we re-construct the journal by scanning the
			// disk, we don't allow finalizing incorrectly.
			journal.Close();
			journal = new Journal(conf, TestLogDir, Jid, HdfsServerConstants.StartupOption.Regular
				, mockErrorReporter);
			try
			{
				journal.FinalizeLogSegment(MakeRI(4), 1, 6);
				NUnit.Framework.Assert.Fail("did not fail to finalize");
			}
			catch (JournalOutOfSyncException e)
			{
				GenericTestUtils.AssertExceptionContains("disk only contains up to txid 3", e);
			}
		}
        public virtual void TestOneJNMissingSegments()
        {
            QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true);
            WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
            cluster.GetJournalNode(0).StopAndJoin(0);
            QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true);
            WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
            cluster.RestartJournalNode(0);
            QJMTestUtil.WriteSegment(cluster, qjm, 7, 3, true);
            WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
            cluster.GetJournalNode(1).StopAndJoin(0);
            QuorumJournalManager       readerQjm = CreateSpyingQJM();
            IList <EditLogInputStream> streams   = Lists.NewArrayList();

            try
            {
                readerQjm.SelectInputStreams(streams, 1, false);
                QJMTestUtil.VerifyEdits(streams, 1, 9);
            }
            finally
            {
                IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0]));
                readerQjm.Close();
            }
        }
 public virtual void TestSingleWriter()
 {
     QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true);
     // Should be finalized
     CheckRecovery(cluster, 1, 3);
     // Start a new segment
     QJMTestUtil.WriteSegment(cluster, qjm, 4, 1, true);
     // Should be finalized
     CheckRecovery(cluster, 4, 4);
 }
        /// <exception cref="System.Exception"/>
        public virtual void TestCrashBetweenSyncLogAndPersistPaxosData()
        {
            JournalFaultInjector faultInjector = JournalFaultInjector.instance = Org.Mockito.Mockito
                                                                                 .Mock <JournalFaultInjector>();

            SetupLoggers345();
            // Run recovery where the client only talks to JN0, JN1, such that it
            // decides that the correct length is through txid 4.
            // Only allow it to call acceptRecovery() on JN0.
            qjm   = CreateSpyingQJM();
            spies = qjm.GetLoggerSetForTests().GetLoggersForTests();
            cluster.GetJournalNode(2).StopAndJoin(0);
            InjectIOE().When(spies[1]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto
                                                                               >(), Org.Mockito.Mockito.Any <Uri>());
            TryRecoveryExpectingFailure();
            cluster.RestartJournalNode(2);
            // State at this point:
            // JN0: edit log for 1-4, paxos recovery data for txid 4
            // JN1: edit log for 1-4,
            // JN2: edit log for 1-5
            // Run recovery again, but don't allow JN0 to respond to the
            // prepareRecovery() call. This will cause recovery to decide
            // on txid 5.
            // Additionally, crash all of the nodes before they persist
            // any new paxos data.
            qjm   = CreateSpyingQJM();
            spies = qjm.GetLoggerSetForTests().GetLoggersForTests();
            InjectIOE().When(spies[0]).PrepareRecovery(Org.Mockito.Mockito.Eq(1L));
            Org.Mockito.Mockito.DoThrow(new IOException("Injected")).When(faultInjector).BeforePersistPaxosData
                ();
            TryRecoveryExpectingFailure();
            Org.Mockito.Mockito.Reset(faultInjector);
            // State at this point:
            // JN0: edit log for 1-5, paxos recovery data for txid 4
            // !!!   This is the interesting bit, above. The on-disk data and the
            //       paxos data don't match up!
            // JN1: edit log for 1-5,
            // JN2: edit log for 1-5,
            // Now, stop JN2, and see if we can still start up even though
            // JN0 is in a strange state where its log data is actually newer
            // than its accepted Paxos state.
            cluster.GetJournalNode(2).StopAndJoin(0);
            qjm = CreateSpyingQJM();
            try
            {
                long recovered = QJMTestUtil.RecoverAndReturnLastTxn(qjm);
                NUnit.Framework.Assert.IsTrue(recovered >= 4);
            }
            finally
            {
                // 4 was committed to a quorum
                qjm.Close();
            }
        }
Esempio n. 9
0
		/// <summary>
		/// Test that, if the writer crashes at the very beginning of a segment,
		/// before any transactions are written, that the next newEpoch() call
		/// returns the prior segment txid as its most recent segment.
		/// </summary>
		/// <exception cref="System.Exception"/>
		public virtual void TestNewEpochAtBeginningOfSegment()
		{
			journal.NewEpoch(FakeNsinfo, 1);
			journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion);
			journal.Journal(MakeRI(2), 1, 1, 2, QJMTestUtil.CreateTxnData(1, 2));
			journal.FinalizeLogSegment(MakeRI(3), 1, 2);
			journal.StartLogSegment(MakeRI(4), 3, NameNodeLayoutVersion.CurrentLayoutVersion);
			QJournalProtocolProtos.NewEpochResponseProto resp = journal.NewEpoch(FakeNsinfo, 
				2);
			NUnit.Framework.Assert.AreEqual(1, resp.GetLastSegmentTxId());
		}
 public virtual void TestChangeWritersLogsInSync()
 {
     QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, false);
     QJMTestUtil.AssertExistsInQuorum(cluster, NNStorage.GetInProgressEditsFileName(1)
                                      );
     // Make a new QJM
     qjm = CloseLater(new QuorumJournalManager(conf, cluster.GetQuorumJournalURI(QJMTestUtil
                                                                                 .Jid), QJMTestUtil.FakeNsinfo));
     qjm.RecoverUnfinalizedSegments();
     CheckRecovery(cluster, 1, 3);
 }
Esempio n. 11
0
		/// <exception cref="System.Exception"/>
		public virtual void TestMaintainCommittedTxId()
		{
			journal.NewEpoch(FakeNsinfo, 1);
			journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion);
			// Send txids 1-3, with a request indicating only 0 committed
			journal.Journal(new RequestInfo(Jid, 1, 2, 0), 1, 1, 3, QJMTestUtil.CreateTxnData
				(1, 3));
			NUnit.Framework.Assert.AreEqual(0, journal.GetCommittedTxnIdForTests());
			// Send 4-6, with request indicating that through 3 is committed.
			journal.Journal(new RequestInfo(Jid, 1, 3, 3), 1, 4, 3, QJMTestUtil.CreateTxnData
				(4, 6));
			NUnit.Framework.Assert.AreEqual(3, journal.GetCommittedTxnIdForTests());
		}
 /// <exception cref="System.Exception"/>
 private void DoOutOfSyncTest(int missingOnRecoveryIdx, long expectedRecoveryTxnId
                              )
 {
     SetupLoggers345();
     QJMTestUtil.AssertExistsInQuorum(cluster, NNStorage.GetInProgressEditsFileName(1)
                                      );
     // Shut down the specified JN, so it's not present during recovery.
     cluster.GetJournalNode(missingOnRecoveryIdx).StopAndJoin(0);
     // Make a new QJM
     qjm = CreateSpyingQJM();
     qjm.RecoverUnfinalizedSegments();
     CheckRecovery(cluster, 1, expectedRecoveryTxnId);
 }
Esempio n. 13
0
        public virtual void TestRecoverAfterDoubleFailures()
        {
            long MaxIpcNumber = DetermineMaxIpcNumber();

            for (int failA = 1; failA <= MaxIpcNumber; failA++)
            {
                for (int failB = 1; failB <= MaxIpcNumber; failB++)
                {
                    string injectionStr = "(" + failA + ", " + failB + ")";
                    Log.Info("\n\n-------------------------------------------\n" + "Beginning test, failing at "
                             + injectionStr + "\n" + "-------------------------------------------\n\n");
                    MiniJournalCluster   cluster = new MiniJournalCluster.Builder(conf).Build();
                    QuorumJournalManager qjm     = null;
                    try
                    {
                        qjm = CreateInjectableQJM(cluster);
                        qjm.Format(QJMTestUtil.FakeNsinfo);
                        IList <AsyncLogger> loggers = qjm.GetLoggerSetForTests().GetLoggersForTests();
                        FailIpcNumber(loggers[0], failA);
                        FailIpcNumber(loggers[1], failB);
                        int lastAckedTxn = DoWorkload(cluster, qjm);
                        if (lastAckedTxn < 6)
                        {
                            Log.Info("Failed after injecting failures at " + injectionStr + ". This is expected since we injected a failure in the "
                                     + "majority.");
                        }
                        qjm.Close();
                        qjm = null;
                        // Now should be able to recover
                        qjm = CreateInjectableQJM(cluster);
                        long lastRecoveredTxn = QJMTestUtil.RecoverAndReturnLastTxn(qjm);
                        NUnit.Framework.Assert.IsTrue(lastRecoveredTxn >= lastAckedTxn);
                        QJMTestUtil.WriteSegment(cluster, qjm, lastRecoveredTxn + 1, 3, true);
                    }
                    catch (Exception t)
                    {
                        // Test failure! Rethrow with the test setup info so it can be
                        // easily triaged.
                        throw new RuntimeException("Test failed with injection: " + injectionStr, t);
                    }
                    finally
                    {
                        cluster.Shutdown();
                        cluster = null;
                        IOUtils.CloseStream(qjm);
                        qjm = null;
                    }
                }
            }
        }
Esempio n. 14
0
		/// <summary>
		/// Assume that a client is writing to a journal, but loses its connection
		/// in the middle of a segment.
		/// </summary>
		/// <remarks>
		/// Assume that a client is writing to a journal, but loses its connection
		/// in the middle of a segment. Thus, any future journal() calls in that
		/// segment may fail, because some txns were missed while the connection was
		/// down.
		/// Eventually, the connection comes back, and the NN tries to start a new
		/// segment at a higher txid. This should abort the old one and succeed.
		/// </remarks>
		/// <exception cref="System.Exception"/>
		public virtual void TestAbortOldSegmentIfFinalizeIsMissed()
		{
			journal.NewEpoch(FakeNsinfo, 1);
			// Start a segment at txid 1, and write a batch of 3 txns.
			journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion);
			journal.Journal(MakeRI(2), 1, 1, 3, QJMTestUtil.CreateTxnData(1, 3));
			GenericTestUtils.AssertExists(journal.GetStorage().GetInProgressEditLog(1));
			// Try to start new segment at txid 6, this should abort old segment and
			// then succeed, allowing us to write txid 6-9.
			journal.StartLogSegment(MakeRI(3), 6, NameNodeLayoutVersion.CurrentLayoutVersion);
			journal.Journal(MakeRI(4), 6, 6, 3, QJMTestUtil.CreateTxnData(6, 3));
			// The old segment should *not* be finalized.
			GenericTestUtils.AssertExists(journal.GetStorage().GetInProgressEditLog(1));
			GenericTestUtils.AssertExists(journal.GetStorage().GetInProgressEditLog(6));
		}
 /// <exception cref="System.IO.IOException"/>
 private void TryRecoveryExpectingFailure()
 {
     try
     {
         QJMTestUtil.RecoverAndReturnLastTxn(qjm);
         NUnit.Framework.Assert.Fail("Expected to fail recovery");
     }
     catch (QuorumException qe)
     {
         GenericTestUtils.AssertExceptionContains("Injected", qe);
     }
     finally
     {
         qjm.Close();
     }
 }
        public virtual void TestSelectInputStreamsNotOnBoundary()
        {
            int txIdsPerSegment = 10;

            for (int txid = 1; txid <= 5 * txIdsPerSegment; txid += txIdsPerSegment)
            {
                QJMTestUtil.WriteSegment(cluster, qjm, txid, txIdsPerSegment, true);
            }
            FilePath curDir = cluster.GetCurrentDir(0, QJMTestUtil.Jid);

            GenericTestUtils.AssertGlobEquals(curDir, "edits_.*", NNStorage.GetFinalizedEditsFileName
                                                  (1, 10), NNStorage.GetFinalizedEditsFileName(11, 20), NNStorage.GetFinalizedEditsFileName
                                                  (21, 30), NNStorage.GetFinalizedEditsFileName(31, 40), NNStorage.GetFinalizedEditsFileName
                                                  (41, 50));
            AList <EditLogInputStream> streams = new AList <EditLogInputStream>();

            qjm.SelectInputStreams(streams, 25, false);
            QJMTestUtil.VerifyEdits(streams, 25, 50);
        }
Esempio n. 17
0
        /// <summary>
        /// Run a simple workload of becoming the active writer and writing
        /// two log segments: 1-3 and 4-6.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        private static int DoWorkload(MiniJournalCluster cluster, QuorumJournalManager qjm
                                      )
        {
            int lastAcked = 0;

            try
            {
                qjm.RecoverUnfinalizedSegments();
                QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true);
                lastAcked = 3;
                QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true);
                lastAcked = 6;
            }
            catch (QuorumException qe)
            {
                Log.Info("Failed to write at txid " + lastAcked, qe);
            }
            return(lastAcked);
        }
        public virtual void TestWriteEditsOneSlow()
        {
            EditLogOutputStream stm = CreateLogSegment();

            QJMTestUtil.WriteOp(stm, 1);
            stm.SetReadyToFlush();
            // Make the first two logs respond immediately
            FutureReturns(null).When(spyLoggers[0]).SendEdits(Matchers.AnyLong(), Matchers.Eq
                                                                  (1L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>());
            FutureReturns(null).When(spyLoggers[1]).SendEdits(Matchers.AnyLong(), Matchers.Eq
                                                                  (1L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>());
            // And the third log not respond
            SettableFuture <Void> slowLog = SettableFuture.Create();

            Org.Mockito.Mockito.DoReturn(slowLog).When(spyLoggers[2]).SendEdits(Matchers.AnyLong
                                                                                    (), Matchers.Eq(1L), Matchers.Eq(1), Org.Mockito.Mockito.Any <byte[]>());
            stm.Flush();
            Org.Mockito.Mockito.Verify(spyLoggers[0]).SetCommittedTxId(1L);
        }
        public virtual void TestMissFinalizeAndNextStart()
        {
            // Logger 0: miss finalize(1-3) and start(4)
            TestQuorumJournalManagerUnit.FutureThrows(new IOException("injected")).When(spies
                                                                                        [0]).FinalizeLogSegment(Org.Mockito.Mockito.Eq(1L), Org.Mockito.Mockito.Eq(3L));
            TestQuorumJournalManagerUnit.FutureThrows(new IOException("injected")).When(spies
                                                                                        [0]).StartLogSegment(Org.Mockito.Mockito.Eq(4L), Org.Mockito.Mockito.Eq(NameNodeLayoutVersion
                                                                                                                                                                .CurrentLayoutVersion));
            // Logger 1: fail at txn id 4
            FailLoggerAtTxn(spies[1], 4L);
            QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true);
            EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion
                                                          );

            try
            {
                QJMTestUtil.WriteTxns(stm, 4, 1);
                NUnit.Framework.Assert.Fail("Did not fail to write");
            }
            catch (QuorumException qe)
            {
                // Should fail, because logger 1 had an injected fault and
                // logger 0 should detect writer out of sync
                GenericTestUtils.AssertExceptionContains("Writer out of sync", qe);
            }
            finally
            {
                stm.Abort();
                qjm.Close();
            }
            // State:
            // Logger 0: 1-3 in-progress (since it missed finalize)
            // Logger 1: 1-3 finalized
            // Logger 2: 1-3 finalized, 4 in-progress with one txn
            // Shut down logger 2 so it doesn't participate in recovery
            cluster.GetJournalNode(2).StopAndJoin(0);
            qjm = CreateSpyingQJM();
            long recovered = QJMTestUtil.RecoverAndReturnLastTxn(qjm);

            NUnit.Framework.Assert.AreEqual(3L, recovered);
        }
Esempio n. 20
0
		/// <exception cref="System.Exception"/>
		public virtual void TestRestartJournal()
		{
			journal.NewEpoch(FakeNsinfo, 1);
			journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion);
			journal.Journal(MakeRI(2), 1, 1, 2, QJMTestUtil.CreateTxnData(1, 2));
			// Don't finalize.
			string storageString = journal.GetStorage().ToColonSeparatedString();
			System.Console.Error.WriteLine("storage string: " + storageString);
			journal.Close();
			// close to unlock the storage dir
			// Now re-instantiate, make sure history is still there
			journal = new Journal(conf, TestLogDir, Jid, HdfsServerConstants.StartupOption.Regular
				, mockErrorReporter);
			// The storage info should be read, even if no writer has taken over.
			NUnit.Framework.Assert.AreEqual(storageString, journal.GetStorage().ToColonSeparatedString
				());
			NUnit.Framework.Assert.AreEqual(1, journal.GetLastPromisedEpoch());
			QJournalProtocolProtos.NewEpochResponseProtoOrBuilder newEpoch = journal.NewEpoch
				(FakeNsinfo, 2);
			NUnit.Framework.Assert.AreEqual(1, newEpoch.GetLastSegmentTxId());
		}
Esempio n. 21
0
        /// <exception cref="System.Exception"/>
        public virtual void TestHttpServer()
        {
            string urlRoot = jn.GetHttpServerURI();
            // Check default servlets.
            string pageContents = DFSTestUtil.UrlGet(new Uri(urlRoot + "/jmx"));

            NUnit.Framework.Assert.IsTrue("Bad contents: " + pageContents, pageContents.Contains
                                              ("Hadoop:service=JournalNode,name=JvmMetrics"));
            // Create some edits on server side
            byte[]           EditsData = QJMTestUtil.CreateTxnData(1, 3);
            IPCLoggerChannel ch        = new IPCLoggerChannel(conf, FakeNsinfo, journalId, jn.GetBoundIpcAddress
                                                                  ());

            ch.NewEpoch(1).Get();
            ch.SetEpoch(1);
            ch.StartLogSegment(1, NameNodeLayoutVersion.CurrentLayoutVersion).Get();
            ch.SendEdits(1L, 1, 3, EditsData).Get();
            ch.FinalizeLogSegment(1, 3).Get();
            // Attempt to retrieve via HTTP, ensure we get the data back
            // including the header we expected
            byte[] retrievedViaHttp = DFSTestUtil.UrlGetBytes(new Uri(urlRoot + "/getJournal?segmentTxId=1&jid="
                                                                      + journalId));
            byte[] expected = Bytes.Concat(Ints.ToByteArray(HdfsConstants.NamenodeLayoutVersion
                                                            ), (new byte[] { 0, 0, 0, 0 }), EditsData);
            // layout flags section
            Assert.AssertArrayEquals(expected, retrievedViaHttp);
            // Attempt to fetch a non-existent file, check that we get an
            // error status code
            Uri badUrl = new Uri(urlRoot + "/getJournal?segmentTxId=12345&jid=" + journalId);
            HttpURLConnection connection = (HttpURLConnection)badUrl.OpenConnection();

            try
            {
                NUnit.Framework.Assert.AreEqual(404, connection.GetResponseCode());
            }
            finally
            {
                connection.Disconnect();
            }
        }
 public virtual void TestNewerVersionOfSegmentWins2()
 {
     SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery();
     // Recover without JN0 present.
     cluster.GetJournalNode(0).StopAndJoin(0);
     qjm = CreateSpyingQJM();
     try
     {
         NUnit.Framework.Assert.AreEqual(100, QJMTestUtil.RecoverAndReturnLastTxn(qjm));
         // After recovery, JN0 comes back to life and JN1 crashes.
         cluster.RestartJournalNode(0);
         cluster.GetJournalNode(1).StopAndJoin(0);
         // Write segment but do not finalize
         QJMTestUtil.WriteSegment(cluster, qjm, 101, 50, false);
     }
     finally
     {
         qjm.Close();
     }
     // State:
     // JN0: 1-100 finalized, 101_inprogress (txns up to 150)
     // Previously, JN0 had an accepted recovery 101-101 from an earlier recovery
     // attempt.
     // JN1: 1-100 finalized
     // JN2: 1-100 finalized, 101_inprogress (txns up to 150)
     // We need to test that the accepted recovery 101-101 on JN0 doesn't
     // end up truncating the log back to 101.
     cluster.RestartJournalNode(1);
     cluster.GetJournalNode(2).StopAndJoin(0);
     qjm = CreateSpyingQJM();
     try
     {
         NUnit.Framework.Assert.AreEqual(150, QJMTestUtil.RecoverAndReturnLastTxn(qjm));
     }
     finally
     {
         qjm.Close();
     }
 }
Esempio n. 23
0
		/// <summary>
		/// Test behavior of startLogSegment() when a segment with the
		/// same transaction ID already exists.
		/// </summary>
		/// <exception cref="System.Exception"/>
		public virtual void TestStartLogSegmentWhenAlreadyExists()
		{
			journal.NewEpoch(FakeNsinfo, 1);
			// Start a segment at txid 1, and write just 1 transaction. This
			// would normally be the START_LOG_SEGMENT transaction.
			journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion);
			journal.Journal(MakeRI(2), 1, 1, 1, QJMTestUtil.CreateTxnData(1, 1));
			// Try to start new segment at txid 1, this should succeed, because
			// we are allowed to re-start a segment if we only ever had the
			// START_LOG_SEGMENT transaction logged.
			journal.StartLogSegment(MakeRI(3), 1, NameNodeLayoutVersion.CurrentLayoutVersion);
			journal.Journal(MakeRI(4), 1, 1, 1, QJMTestUtil.CreateTxnData(1, 1));
			// This time through, write more transactions afterwards, simulating
			// real user transactions.
			journal.Journal(MakeRI(5), 1, 2, 3, QJMTestUtil.CreateTxnData(2, 3));
			try
			{
				journal.StartLogSegment(MakeRI(6), 1, NameNodeLayoutVersion.CurrentLayoutVersion);
				NUnit.Framework.Assert.Fail("Did not fail to start log segment which would overwrite "
					 + "an existing one");
			}
			catch (InvalidOperationException ise)
			{
				GenericTestUtils.AssertExceptionContains("seems to contain valid transactions", ise
					);
			}
			journal.FinalizeLogSegment(MakeRI(7), 1, 4);
			// Ensure that we cannot overwrite a finalized segment
			try
			{
				journal.StartLogSegment(MakeRI(8), 1, NameNodeLayoutVersion.CurrentLayoutVersion);
				NUnit.Framework.Assert.Fail("Did not fail to start log segment which would overwrite "
					 + "an existing one");
			}
			catch (InvalidOperationException ise)
			{
				GenericTestUtils.AssertExceptionContains("have a finalized segment", ise);
			}
		}
Esempio n. 24
0
		public virtual void TestScanEditLog()
		{
			// use a future layout version
			journal.StartLogSegment(MakeRI(1), 1, NameNodeLayoutVersion.CurrentLayoutVersion 
				- 1);
			// in the segment we write garbage editlog, which can be scanned but
			// cannot be decoded
			int numTxns = 5;
			byte[] ops = QJMTestUtil.CreateGabageTxns(1, 5);
			journal.Journal(MakeRI(2), 1, 1, numTxns, ops);
			// verify the in-progress editlog segment
			QJournalProtocolProtos.SegmentStateProto segmentState = journal.GetSegmentInfo(1);
			NUnit.Framework.Assert.IsTrue(segmentState.GetIsInProgress());
			NUnit.Framework.Assert.AreEqual(numTxns, segmentState.GetEndTxId());
			NUnit.Framework.Assert.AreEqual(1, segmentState.GetStartTxId());
			// finalize the segment and verify it again
			journal.FinalizeLogSegment(MakeRI(3), 1, numTxns);
			segmentState = journal.GetSegmentInfo(1);
			NUnit.Framework.Assert.IsFalse(segmentState.GetIsInProgress());
			NUnit.Framework.Assert.AreEqual(numTxns, segmentState.GetEndTxId());
			NUnit.Framework.Assert.AreEqual(1, segmentState.GetStartTxId());
		}
        public virtual void TestCrashAtBeginningOfSegment()
        {
            QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true);
            WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
            EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion
                                                          );

            try
            {
                WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
            }
            finally
            {
                stm.Abort();
            }
            // Make a new QJM
            qjm = CloseLater(new QuorumJournalManager(conf, cluster.GetQuorumJournalURI(QJMTestUtil
                                                                                        .Jid), QJMTestUtil.FakeNsinfo));
            qjm.RecoverUnfinalizedSegments();
            CheckRecovery(cluster, 1, 3);
            QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true);
        }
Esempio n. 26
0
 /// <exception cref="System.Exception"/>
 public virtual void TestReturnsSegmentInfoAtEpochTransition()
 {
     ch.NewEpoch(1).Get();
     ch.SetEpoch(1);
     ch.StartLogSegment(1, NameNodeLayoutVersion.CurrentLayoutVersion).Get();
     ch.SendEdits(1L, 1, 2, QJMTestUtil.CreateTxnData(1, 2)).Get();
     // Switch to a new epoch without closing earlier segment
     QJournalProtocolProtos.NewEpochResponseProto response = ch.NewEpoch(2).Get();
     ch.SetEpoch(2);
     NUnit.Framework.Assert.AreEqual(1, response.GetLastSegmentTxId());
     ch.FinalizeLogSegment(1, 2).Get();
     // Switch to a new epoch after just closing the earlier segment.
     response = ch.NewEpoch(3).Get();
     ch.SetEpoch(3);
     NUnit.Framework.Assert.AreEqual(1, response.GetLastSegmentTxId());
     // Start a segment but don't write anything, check newEpoch segment info
     ch.StartLogSegment(3, NameNodeLayoutVersion.CurrentLayoutVersion).Get();
     response = ch.NewEpoch(4).Get();
     ch.SetEpoch(4);
     // Because the new segment is empty, it is equivalent to not having
     // started writing it. Hence, we should return the prior segment txid.
     NUnit.Framework.Assert.AreEqual(1, response.GetLastSegmentTxId());
 }
        /// <summary>
        /// Set up the loggers into the following state:
        /// - JN0: edits 1-3 in progress
        /// - JN1: edits 1-4 in progress
        /// - JN2: edits 1-5 in progress
        /// None of the loggers have any associated paxos info.
        /// </summary>
        /// <exception cref="System.Exception"/>
        private void SetupLoggers345()
        {
            EditLogOutputStream stm = qjm.StartLogSegment(1, NameNodeLayoutVersion.CurrentLayoutVersion
                                                          );

            FailLoggerAtTxn(spies[0], 4);
            FailLoggerAtTxn(spies[1], 5);
            QJMTestUtil.WriteTxns(stm, 1, 3);
            // This should succeed to 2/3 loggers
            QJMTestUtil.WriteTxns(stm, 4, 1);
            // This should only succeed to 1 logger (index 2). Hence it should
            // fail
            try
            {
                QJMTestUtil.WriteTxns(stm, 5, 1);
                NUnit.Framework.Assert.Fail("Did not fail to write when only a minority succeeded"
                                            );
            }
            catch (QuorumException qe)
            {
                GenericTestUtils.AssertExceptionContains("too many exceptions to achieve quorum size 2/3"
                                                         , qe);
            }
        }
Esempio n. 28
0
        private long WriteSegmentUntilCrash(MiniJournalCluster cluster, QuorumJournalManager
                                            qjm, long txid, int numTxns, Holder <Exception> thrown)
        {
            long firstTxId = txid;
            long lastAcked = txid - 1;

            try
            {
                EditLogOutputStream stm = qjm.StartLogSegment(txid, NameNodeLayoutVersion.CurrentLayoutVersion
                                                              );
                for (int i = 0; i < numTxns; i++)
                {
                    QJMTestUtil.WriteTxns(stm, txid++, 1);
                    lastAcked++;
                }
                stm.Close();
                qjm.FinalizeLogSegment(firstTxId, lastAcked);
            }
            catch (Exception t)
            {
                thrown.held = t;
            }
            return(lastAcked);
        }
        /// <summary>
        /// Set up the following tricky edge case state which is used by
        /// multiple tests:
        /// Initial writer:
        /// - Writing to 3 JNs: JN0, JN1, JN2:
        /// - A log segment with txnid 1 through 100 succeeds.
        /// </summary>
        /// <remarks>
        /// Set up the following tricky edge case state which is used by
        /// multiple tests:
        /// Initial writer:
        /// - Writing to 3 JNs: JN0, JN1, JN2:
        /// - A log segment with txnid 1 through 100 succeeds.
        /// - The first transaction in the next segment only goes to JN0
        /// before the writer crashes (eg it is partitioned)
        /// Recovery by another writer:
        /// - The new NN starts recovery and talks to all three. Thus, it sees
        /// that the newest log segment which needs recovery is 101.
        /// - It sends the prepareRecovery(101) call, and decides that the
        /// recovery length for 101 is only the 1 transaction.
        /// - It sends acceptRecovery(101-101) to only JN0, before crashing
        /// This yields the following state:
        /// - JN0: 1-100 finalized, 101_inprogress, accepted recovery: 101-101
        /// - JN1: 1-100 finalized, 101_inprogress.empty
        /// - JN2: 1-100 finalized, 101_inprogress.empty
        /// (the .empty files got moved aside during recovery)
        /// </remarks>
        /// <exception cref="System.Exception"></exception>
        private void SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery()
        {
            // Log segment with txns 1-100 succeeds
            QJMTestUtil.WriteSegment(cluster, qjm, 1, 100, true);
            // startLogSegment only makes it to one of the three nodes
            FailLoggerAtTxn(spies[1], 101);
            FailLoggerAtTxn(spies[2], 101);
            try
            {
                QJMTestUtil.WriteSegment(cluster, qjm, 101, 1, true);
                NUnit.Framework.Assert.Fail("Should have failed");
            }
            catch (QuorumException qe)
            {
                GenericTestUtils.AssertExceptionContains("mock failure", qe);
            }
            finally
            {
                qjm.Close();
            }
            // Recovery 1:
            // make acceptRecovery() only make it to the node which has txid 101
            // this should fail because only 1/3 accepted the recovery
            qjm   = CreateSpyingQJM();
            spies = qjm.GetLoggerSetForTests().GetLoggersForTests();
            TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies
                                                                                            [1]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto
                                                                                                                                         >(), Org.Mockito.Mockito.Any <Uri>());
            TestQuorumJournalManagerUnit.FutureThrows(new IOException("mock failure")).When(spies
                                                                                            [2]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto
                                                                                                                                         >(), Org.Mockito.Mockito.Any <Uri>());
            try
            {
                qjm.RecoverUnfinalizedSegments();
                NUnit.Framework.Assert.Fail("Should have failed to recover");
            }
            catch (QuorumException qe)
            {
                GenericTestUtils.AssertExceptionContains("mock failure", qe);
            }
            finally
            {
                qjm.Close();
            }
            // Check that we have entered the expected state as described in the
            // method javadoc.
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "edits_.*"
                                              , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName
                                                  (101));
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "edits_.*"
                                              , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName
                                                  (101) + ".empty");
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "edits_.*"
                                              , NNStorage.GetFinalizedEditsFileName(1, 100), NNStorage.GetInProgressEditsFileName
                                                  (101) + ".empty");
            FilePath paxos0 = new FilePath(cluster.GetCurrentDir(0, QJMTestUtil.Jid), "paxos"
                                           );
            FilePath paxos1 = new FilePath(cluster.GetCurrentDir(1, QJMTestUtil.Jid), "paxos"
                                           );
            FilePath paxos2 = new FilePath(cluster.GetCurrentDir(2, QJMTestUtil.Jid), "paxos"
                                           );

            GenericTestUtils.AssertGlobEquals(paxos0, ".*", "101");
            GenericTestUtils.AssertGlobEquals(paxos1, ".*");
            GenericTestUtils.AssertGlobEquals(paxos2, ".*");
        }
        /// <summary>
        /// Test the case where, at the beginning of a segment, transactions
        /// have been written to one JN but not others.
        /// </summary>
        /// <exception cref="System.Exception"/>
        public virtual void DoTestOutOfSyncAtBeginningOfSegment(int nodeWithOneTxn)
        {
            int nodeWithEmptySegment = (nodeWithOneTxn + 1) % 3;
            int nodeMissingSegment   = (nodeWithOneTxn + 2) % 3;

            QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true);
            WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
            cluster.GetJournalNode(nodeMissingSegment).StopAndJoin(0);
            // Open segment on 2/3 nodes
            EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion
                                                          );

            try
            {
                WaitForAllPendingCalls(qjm.GetLoggerSetForTests());
                // Write transactions to only 1/3 nodes
                FailLoggerAtTxn(spies[nodeWithEmptySegment], 4);
                try
                {
                    QJMTestUtil.WriteTxns(stm, 4, 1);
                    NUnit.Framework.Assert.Fail("Did not fail even though 2/3 failed");
                }
                catch (QuorumException qe)
                {
                    GenericTestUtils.AssertExceptionContains("mock failure", qe);
                }
            }
            finally
            {
                stm.Abort();
            }
            // Bring back the down JN.
            cluster.RestartJournalNode(nodeMissingSegment);
            // Make a new QJM. At this point, the state is as follows:
            // A: nodeWithEmptySegment: 1-3 finalized, 4_inprogress (empty)
            // B: nodeWithOneTxn:       1-3 finalized, 4_inprogress (1 txn)
            // C: nodeMissingSegment:   1-3 finalized
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithEmptySegment, QJMTestUtil
                                                                    .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName
                                                  (4));
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeWithOneTxn, QJMTestUtil
                                                                    .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3), NNStorage.GetInProgressEditsFileName
                                                  (4));
            GenericTestUtils.AssertGlobEquals(cluster.GetCurrentDir(nodeMissingSegment, QJMTestUtil
                                                                    .Jid), "edits_.*", NNStorage.GetFinalizedEditsFileName(1, 3));
            // Stop one of the nodes. Since we run this test three
            // times, rotating the roles of the nodes, we'll test
            // all the permutations.
            cluster.GetJournalNode(2).StopAndJoin(0);
            qjm = CreateSpyingQJM();
            qjm.RecoverUnfinalizedSegments();
            if (nodeWithOneTxn == 0 || nodeWithOneTxn == 1)
            {
                // If the node that had the transaction committed was one of the nodes
                // that responded during recovery, then we should have recovered txid
                // 4.
                CheckRecovery(cluster, 4, 4);
                QJMTestUtil.WriteSegment(cluster, qjm, 5, 3, true);
            }
            else
            {
                // Otherwise, we should have recovered only 1-3 and should be able to
                // start a segment at 4.
                CheckRecovery(cluster, 1, 3);
                QJMTestUtil.WriteSegment(cluster, qjm, 4, 3, true);
            }
        }