public virtual void TestNewerVersionOfSegmentWins()
 {
     SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery();
     // Now start writing again without JN0 present:
     cluster.GetJournalNode(0).StopAndJoin(0);
     qjm = CreateSpyingQJM();
     try
     {
         NUnit.Framework.Assert.AreEqual(100, QJMTestUtil.RecoverAndReturnLastTxn(qjm));
         // Write segment but do not finalize
         QJMTestUtil.WriteSegment(cluster, qjm, 101, 50, false);
     }
     finally
     {
         qjm.Close();
     }
     // Now try to recover a new writer, with JN0 present,
     // and ensure that all of the above-written transactions are recovered.
     cluster.RestartJournalNode(0);
     qjm = CreateSpyingQJM();
     try
     {
         NUnit.Framework.Assert.AreEqual(150, QJMTestUtil.RecoverAndReturnLastTxn(qjm));
     }
     finally
     {
         qjm.Close();
     }
 }
        /// <exception cref="System.Exception"/>
        public virtual void TestCrashBetweenSyncLogAndPersistPaxosData()
        {
            JournalFaultInjector faultInjector = JournalFaultInjector.instance = Org.Mockito.Mockito
                                                                                 .Mock <JournalFaultInjector>();

            SetupLoggers345();
            // Run recovery where the client only talks to JN0, JN1, such that it
            // decides that the correct length is through txid 4.
            // Only allow it to call acceptRecovery() on JN0.
            qjm   = CreateSpyingQJM();
            spies = qjm.GetLoggerSetForTests().GetLoggersForTests();
            cluster.GetJournalNode(2).StopAndJoin(0);
            InjectIOE().When(spies[1]).AcceptRecovery(Org.Mockito.Mockito.Any <QJournalProtocolProtos.SegmentStateProto
                                                                               >(), Org.Mockito.Mockito.Any <Uri>());
            TryRecoveryExpectingFailure();
            cluster.RestartJournalNode(2);
            // State at this point:
            // JN0: edit log for 1-4, paxos recovery data for txid 4
            // JN1: edit log for 1-4,
            // JN2: edit log for 1-5
            // Run recovery again, but don't allow JN0 to respond to the
            // prepareRecovery() call. This will cause recovery to decide
            // on txid 5.
            // Additionally, crash all of the nodes before they persist
            // any new paxos data.
            qjm   = CreateSpyingQJM();
            spies = qjm.GetLoggerSetForTests().GetLoggersForTests();
            InjectIOE().When(spies[0]).PrepareRecovery(Org.Mockito.Mockito.Eq(1L));
            Org.Mockito.Mockito.DoThrow(new IOException("Injected")).When(faultInjector).BeforePersistPaxosData
                ();
            TryRecoveryExpectingFailure();
            Org.Mockito.Mockito.Reset(faultInjector);
            // State at this point:
            // JN0: edit log for 1-5, paxos recovery data for txid 4
            // !!!   This is the interesting bit, above. The on-disk data and the
            //       paxos data don't match up!
            // JN1: edit log for 1-5,
            // JN2: edit log for 1-5,
            // Now, stop JN2, and see if we can still start up even though
            // JN0 is in a strange state where its log data is actually newer
            // than its accepted Paxos state.
            cluster.GetJournalNode(2).StopAndJoin(0);
            qjm = CreateSpyingQJM();
            try
            {
                long recovered = QJMTestUtil.RecoverAndReturnLastTxn(qjm);
                NUnit.Framework.Assert.IsTrue(recovered >= 4);
            }
            finally
            {
                // 4 was committed to a quorum
                qjm.Close();
            }
        }
Esempio n. 3
0
        public virtual void TestRecoverAfterDoubleFailures()
        {
            long MaxIpcNumber = DetermineMaxIpcNumber();

            for (int failA = 1; failA <= MaxIpcNumber; failA++)
            {
                for (int failB = 1; failB <= MaxIpcNumber; failB++)
                {
                    string injectionStr = "(" + failA + ", " + failB + ")";
                    Log.Info("\n\n-------------------------------------------\n" + "Beginning test, failing at "
                             + injectionStr + "\n" + "-------------------------------------------\n\n");
                    MiniJournalCluster   cluster = new MiniJournalCluster.Builder(conf).Build();
                    QuorumJournalManager qjm     = null;
                    try
                    {
                        qjm = CreateInjectableQJM(cluster);
                        qjm.Format(QJMTestUtil.FakeNsinfo);
                        IList <AsyncLogger> loggers = qjm.GetLoggerSetForTests().GetLoggersForTests();
                        FailIpcNumber(loggers[0], failA);
                        FailIpcNumber(loggers[1], failB);
                        int lastAckedTxn = DoWorkload(cluster, qjm);
                        if (lastAckedTxn < 6)
                        {
                            Log.Info("Failed after injecting failures at " + injectionStr + ". This is expected since we injected a failure in the "
                                     + "majority.");
                        }
                        qjm.Close();
                        qjm = null;
                        // Now should be able to recover
                        qjm = CreateInjectableQJM(cluster);
                        long lastRecoveredTxn = QJMTestUtil.RecoverAndReturnLastTxn(qjm);
                        NUnit.Framework.Assert.IsTrue(lastRecoveredTxn >= lastAckedTxn);
                        QJMTestUtil.WriteSegment(cluster, qjm, lastRecoveredTxn + 1, 3, true);
                    }
                    catch (Exception t)
                    {
                        // Test failure! Rethrow with the test setup info so it can be
                        // easily triaged.
                        throw new RuntimeException("Test failed with injection: " + injectionStr, t);
                    }
                    finally
                    {
                        cluster.Shutdown();
                        cluster = null;
                        IOUtils.CloseStream(qjm);
                        qjm = null;
                    }
                }
            }
        }
 /// <exception cref="System.IO.IOException"/>
 private void TryRecoveryExpectingFailure()
 {
     try
     {
         QJMTestUtil.RecoverAndReturnLastTxn(qjm);
         NUnit.Framework.Assert.Fail("Expected to fail recovery");
     }
     catch (QuorumException qe)
     {
         GenericTestUtils.AssertExceptionContains("Injected", qe);
     }
     finally
     {
         qjm.Close();
     }
 }
        public virtual void TestMissFinalizeAndNextStart()
        {
            // Logger 0: miss finalize(1-3) and start(4)
            TestQuorumJournalManagerUnit.FutureThrows(new IOException("injected")).When(spies
                                                                                        [0]).FinalizeLogSegment(Org.Mockito.Mockito.Eq(1L), Org.Mockito.Mockito.Eq(3L));
            TestQuorumJournalManagerUnit.FutureThrows(new IOException("injected")).When(spies
                                                                                        [0]).StartLogSegment(Org.Mockito.Mockito.Eq(4L), Org.Mockito.Mockito.Eq(NameNodeLayoutVersion
                                                                                                                                                                .CurrentLayoutVersion));
            // Logger 1: fail at txn id 4
            FailLoggerAtTxn(spies[1], 4L);
            QJMTestUtil.WriteSegment(cluster, qjm, 1, 3, true);
            EditLogOutputStream stm = qjm.StartLogSegment(4, NameNodeLayoutVersion.CurrentLayoutVersion
                                                          );

            try
            {
                QJMTestUtil.WriteTxns(stm, 4, 1);
                NUnit.Framework.Assert.Fail("Did not fail to write");
            }
            catch (QuorumException qe)
            {
                // Should fail, because logger 1 had an injected fault and
                // logger 0 should detect writer out of sync
                GenericTestUtils.AssertExceptionContains("Writer out of sync", qe);
            }
            finally
            {
                stm.Abort();
                qjm.Close();
            }
            // State:
            // Logger 0: 1-3 in-progress (since it missed finalize)
            // Logger 1: 1-3 finalized
            // Logger 2: 1-3 finalized, 4 in-progress with one txn
            // Shut down logger 2 so it doesn't participate in recovery
            cluster.GetJournalNode(2).StopAndJoin(0);
            qjm = CreateSpyingQJM();
            long recovered = QJMTestUtil.RecoverAndReturnLastTxn(qjm);

            NUnit.Framework.Assert.AreEqual(3L, recovered);
        }
 public virtual void TestNewerVersionOfSegmentWins2()
 {
     SetupEdgeCaseOneJnHasSegmentWithAcceptedRecovery();
     // Recover without JN0 present.
     cluster.GetJournalNode(0).StopAndJoin(0);
     qjm = CreateSpyingQJM();
     try
     {
         NUnit.Framework.Assert.AreEqual(100, QJMTestUtil.RecoverAndReturnLastTxn(qjm));
         // After recovery, JN0 comes back to life and JN1 crashes.
         cluster.RestartJournalNode(0);
         cluster.GetJournalNode(1).StopAndJoin(0);
         // Write segment but do not finalize
         QJMTestUtil.WriteSegment(cluster, qjm, 101, 50, false);
     }
     finally
     {
         qjm.Close();
     }
     // State:
     // JN0: 1-100 finalized, 101_inprogress (txns up to 150)
     // Previously, JN0 had an accepted recovery 101-101 from an earlier recovery
     // attempt.
     // JN1: 1-100 finalized
     // JN2: 1-100 finalized, 101_inprogress (txns up to 150)
     // We need to test that the accepted recovery 101-101 on JN0 doesn't
     // end up truncating the log back to 101.
     cluster.RestartJournalNode(1);
     cluster.GetJournalNode(2).StopAndJoin(0);
     qjm = CreateSpyingQJM();
     try
     {
         NUnit.Framework.Assert.AreEqual(150, QJMTestUtil.RecoverAndReturnLastTxn(qjm));
     }
     finally
     {
         qjm.Close();
     }
 }
Esempio n. 7
0
        public virtual void TestRandomized()
        {
            long seed;
            long userSpecifiedSeed = long.GetLong(RandSeedProperty);

            if (userSpecifiedSeed != null)
            {
                Log.Info("Using seed specified in system property");
                seed = userSpecifiedSeed;
                // If the user specifies a seed, then we should gather all the
                // IPC trace information so that debugging is easier. This makes
                // the test run about 25% slower otherwise.
                ((Log4JLogger)ProtobufRpcEngine.Log).GetLogger().SetLevel(Level.All);
            }
            else
            {
                seed = new Random().NextLong();
            }
            Log.Info("Random seed: " + seed);
            Random             r       = new Random(seed);
            MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).Build();
            // Format the cluster using a non-faulty QJM.
            QuorumJournalManager qjmForInitialFormat = CreateInjectableQJM(cluster);

            qjmForInitialFormat.Format(QJMTestUtil.FakeNsinfo);
            qjmForInitialFormat.Close();
            try
            {
                long txid      = 0;
                long lastAcked = 0;
                for (int i = 0; i < NumWriterIters; i++)
                {
                    Log.Info("Starting writer " + i + "\n-------------------");
                    QuorumJournalManager qjm = CreateRandomFaultyQJM(cluster, r);
                    try
                    {
                        long recovered;
                        try
                        {
                            recovered = QJMTestUtil.RecoverAndReturnLastTxn(qjm);
                        }
                        catch (Exception t)
                        {
                            Log.Info("Failed recovery", t);
                            CheckException(t);
                            continue;
                        }
                        NUnit.Framework.Assert.IsTrue("Recovered only up to txnid " + recovered + " but had gotten an ack for "
                                                      + lastAcked, recovered >= lastAcked);
                        txid = recovered + 1;
                        // Periodically purge old data on disk so it's easier to look
                        // at failure cases.
                        if (txid > 100 && i % 10 == 1)
                        {
                            qjm.PurgeLogsOlderThan(txid - 100);
                        }
                        Holder <Exception> thrown = new Holder <Exception>(null);
                        for (int j = 0; j < SegmentsPerWriter; j++)
                        {
                            lastAcked = WriteSegmentUntilCrash(cluster, qjm, txid, 4, thrown);
                            if (thrown.held != null)
                            {
                                Log.Info("Failed write", thrown.held);
                                CheckException(thrown.held);
                                break;
                            }
                            txid += 4;
                        }
                    }
                    finally
                    {
                        qjm.Close();
                    }
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }