Exemple #1
0
        // Set up fault injection mock.
        /// <summary>
        /// Run through the creation of a log without any faults injected,
        /// and count how many RPCs are made to each node.
        /// </summary>
        /// <remarks>
        /// Run through the creation of a log without any faults injected,
        /// and count how many RPCs are made to each node. This sets the
        /// bounds for the other test cases, so they can exhaustively explore
        /// the space of potential failures.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        private static long DetermineMaxIpcNumber()
        {
            Configuration        conf    = new Configuration();
            MiniJournalCluster   cluster = new MiniJournalCluster.Builder(conf).Build();
            QuorumJournalManager qjm     = null;
            long ret;

            try
            {
                qjm = CreateInjectableQJM(cluster);
                qjm.Format(QJMTestUtil.FakeNsinfo);
                DoWorkload(cluster, qjm);
                ICollection <int> ipcCounts = Sets.NewTreeSet();
                foreach (AsyncLogger l in qjm.GetLoggerSetForTests().GetLoggersForTests())
                {
                    TestQJMWithFaults.InvocationCountingChannel ch = (TestQJMWithFaults.InvocationCountingChannel
                                                                      )l;
                    ch.WaitForAllPendingCalls();
                    ipcCounts.AddItem(ch.GetRpcCount());
                }
                // All of the loggers should have sent the same number of RPCs, since there
                // were no failures.
                NUnit.Framework.Assert.AreEqual(1, ipcCounts.Count);
                ret = ipcCounts.First();
                Log.Info("Max IPC count = " + ret);
            }
            finally
            {
                IOUtils.CloseStream(qjm);
                cluster.Shutdown();
            }
            return(ret);
        }
        public virtual void TestRollbackWithQJM()
        {
            Configuration      conf    = new HdfsConfiguration();
            MiniJournalCluster mjc     = null;
            MiniDFSCluster     cluster = null;
            Path foo = new Path("/foo");
            Path bar = new Path("/bar");

            try
            {
                mjc = new MiniJournalCluster.Builder(conf).NumJournalNodes(NumJournalNodes).Build
                          ();
                conf.Set(DFSConfigKeys.DfsNamenodeEditsDirKey, mjc.GetQuorumJournalURI(JournalId)
                         .ToString());
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).Build();
                cluster.WaitActive();
                DistributedFileSystem dfs = cluster.GetFileSystem();
                DFSAdmin dfsadmin         = new DFSAdmin(conf);
                dfs.Mkdirs(foo);
                // start rolling upgrade
                dfs.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter);
                NUnit.Framework.Assert.AreEqual(0, dfsadmin.Run(new string[] { "-rollingUpgrade",
                                                                               "prepare" }));
                dfs.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeLeave);
                // create new directory
                dfs.Mkdirs(bar);
                dfs.Close();
                // rollback
                cluster.RestartNameNode("-rollingUpgrade", "rollback");
                // make sure /foo is still there, but /bar is not
                dfs = cluster.GetFileSystem();
                NUnit.Framework.Assert.IsTrue(dfs.Exists(foo));
                NUnit.Framework.Assert.IsFalse(dfs.Exists(bar));
                // check storage in JNs
                for (int i = 0; i < NumJournalNodes; i++)
                {
                    FilePath dir = mjc.GetCurrentDir(0, JournalId);
                    // segments:(startSegment, mkdir, endSegment), (startSegment, upgrade
                    // marker, mkdir, endSegment)
                    CheckJNStorage(dir, 4, 7);
                }
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
                if (mjc != null)
                {
                    mjc.Shutdown();
                }
            }
        }
Exemple #3
0
        public virtual void TestRecoverAfterDoubleFailures()
        {
            long MaxIpcNumber = DetermineMaxIpcNumber();

            for (int failA = 1; failA <= MaxIpcNumber; failA++)
            {
                for (int failB = 1; failB <= MaxIpcNumber; failB++)
                {
                    string injectionStr = "(" + failA + ", " + failB + ")";
                    Log.Info("\n\n-------------------------------------------\n" + "Beginning test, failing at "
                             + injectionStr + "\n" + "-------------------------------------------\n\n");
                    MiniJournalCluster   cluster = new MiniJournalCluster.Builder(conf).Build();
                    QuorumJournalManager qjm     = null;
                    try
                    {
                        qjm = CreateInjectableQJM(cluster);
                        qjm.Format(QJMTestUtil.FakeNsinfo);
                        IList <AsyncLogger> loggers = qjm.GetLoggerSetForTests().GetLoggersForTests();
                        FailIpcNumber(loggers[0], failA);
                        FailIpcNumber(loggers[1], failB);
                        int lastAckedTxn = DoWorkload(cluster, qjm);
                        if (lastAckedTxn < 6)
                        {
                            Log.Info("Failed after injecting failures at " + injectionStr + ". This is expected since we injected a failure in the "
                                     + "majority.");
                        }
                        qjm.Close();
                        qjm = null;
                        // Now should be able to recover
                        qjm = CreateInjectableQJM(cluster);
                        long lastRecoveredTxn = QJMTestUtil.RecoverAndReturnLastTxn(qjm);
                        NUnit.Framework.Assert.IsTrue(lastRecoveredTxn >= lastAckedTxn);
                        QJMTestUtil.WriteSegment(cluster, qjm, lastRecoveredTxn + 1, 3, true);
                    }
                    catch (Exception t)
                    {
                        // Test failure! Rethrow with the test setup info so it can be
                        // easily triaged.
                        throw new RuntimeException("Test failed with injection: " + injectionStr, t);
                    }
                    finally
                    {
                        cluster.Shutdown();
                        cluster = null;
                        IOUtils.CloseStream(qjm);
                        qjm = null;
                    }
                }
            }
        }
Exemple #4
0
        public virtual void TestSingleThreaded()
        {
            Configuration      conf    = new Configuration();
            MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).Build();
            URI uri = cluster.GetQuorumJournalURI(Jid);
            QuorumJournalManager qjm = new QuorumJournalManager(conf, uri, FakeNsinfo);

            try
            {
                qjm.Format(FakeNsinfo);
            }
            finally
            {
                qjm.Close();
            }
            try
            {
                // With no failures or contention, epochs should increase one-by-one
                for (int i = 0; i < 5; i++)
                {
                    qjm = new QuorumJournalManager(conf, uri, FakeNsinfo);
                    try
                    {
                        qjm.CreateNewUniqueEpoch();
                        NUnit.Framework.Assert.AreEqual(i + 1, qjm.GetLoggerSetForTests().GetEpoch());
                    }
                    finally
                    {
                        qjm.Close();
                    }
                }
                long prevEpoch = 5;
                // With some failures injected, it should still always increase, perhaps
                // skipping some
                for (int i_1 = 0; i_1 < 20; i_1++)
                {
                    long newEpoch = -1;
                    while (true)
                    {
                        qjm = new QuorumJournalManager(conf, uri, FakeNsinfo, new TestEpochsAreUnique.FaultyLoggerFactory
                                                           (this));
                        try
                        {
                            qjm.CreateNewUniqueEpoch();
                            newEpoch = qjm.GetLoggerSetForTests().GetEpoch();
                            break;
                        }
                        catch (IOException)
                        {
                        }
                        finally
                        {
                            // It's OK to fail to create an epoch, since we randomly inject
                            // faults. It's possible we'll inject faults in too many of the
                            // underlying nodes, and a failure is expected in that case
                            qjm.Close();
                        }
                    }
                    Log.Info("Created epoch " + newEpoch);
                    NUnit.Framework.Assert.IsTrue("New epoch " + newEpoch + " should be greater than previous "
                                                  + prevEpoch, newEpoch > prevEpoch);
                    prevEpoch = newEpoch;
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestRollingUpgradeWithQJM()
        {
            string   nnDirPrefix = MiniDFSCluster.GetBaseDirectory() + "/nn/";
            FilePath nn1Dir      = new FilePath(nnDirPrefix + "image1");
            FilePath nn2Dir      = new FilePath(nnDirPrefix + "image2");

            Log.Info("nn1Dir=" + nn1Dir);
            Log.Info("nn2Dir=" + nn2Dir);
            Configuration      conf = new HdfsConfiguration();
            MiniJournalCluster mjc  = new MiniJournalCluster.Builder(conf).Build();

            SetConf(conf, nn1Dir, mjc);
            {
                // Start the cluster once to generate the dfs dirs
                MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).ManageNameDfsDirs
                                             (false).CheckExitOnShutdown(false).Build();
                // Shutdown the cluster before making a copy of the namenode dir to release
                // all file locks, otherwise, the copy will fail on some platforms.
                cluster.Shutdown();
            }
            MiniDFSCluster cluster2 = null;

            try
            {
                // Start a second NN pointed to the same quorum.
                // We need to copy the image dir from the first NN -- or else
                // the new NN will just be rejected because of Namespace mismatch.
                FileUtil.FullyDelete(nn2Dir);
                FileUtil.Copy(nn1Dir, FileSystem.GetLocal(conf).GetRaw(), new Path(nn2Dir.GetAbsolutePath
                                                                                       ()), false, conf);
                // Start the cluster again
                MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(0).Format(
                    false).ManageNameDfsDirs(false).CheckExitOnShutdown(false).Build();
                Path foo = new Path("/foo");
                Path bar = new Path("/bar");
                Path baz = new Path("/baz");
                RollingUpgradeInfo info1;
                {
                    DistributedFileSystem dfs = cluster.GetFileSystem();
                    dfs.Mkdirs(foo);
                    //start rolling upgrade
                    dfs.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeEnter);
                    info1 = dfs.RollingUpgrade(HdfsConstants.RollingUpgradeAction.Prepare);
                    dfs.SetSafeMode(HdfsConstants.SafeModeAction.SafemodeLeave);
                    Log.Info("START\n" + info1);
                    //query rolling upgrade
                    NUnit.Framework.Assert.AreEqual(info1, dfs.RollingUpgrade(HdfsConstants.RollingUpgradeAction
                                                                              .Query));
                    dfs.Mkdirs(bar);
                    cluster.Shutdown();
                }
                // cluster2 takes over QJM
                Configuration conf2 = SetConf(new Configuration(), nn2Dir, mjc);
                cluster2 = new MiniDFSCluster.Builder(conf2).NumDataNodes(0).Format(false).ManageNameDfsDirs
                               (false).Build();
                DistributedFileSystem dfs2 = cluster2.GetFileSystem();
                // Check that cluster2 sees the edits made on cluster1
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(foo));
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(bar));
                NUnit.Framework.Assert.IsFalse(dfs2.Exists(baz));
                //query rolling upgrade in cluster2
                NUnit.Framework.Assert.AreEqual(info1, dfs2.RollingUpgrade(HdfsConstants.RollingUpgradeAction
                                                                           .Query));
                dfs2.Mkdirs(baz);
                Log.Info("RESTART cluster 2");
                cluster2.RestartNameNode();
                NUnit.Framework.Assert.AreEqual(info1, dfs2.RollingUpgrade(HdfsConstants.RollingUpgradeAction
                                                                           .Query));
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(foo));
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(bar));
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(baz));
                //restart cluster with -upgrade should fail.
                try
                {
                    cluster2.RestartNameNode("-upgrade");
                }
                catch (IOException e)
                {
                    Log.Info("The exception is expected.", e);
                }
                Log.Info("RESTART cluster 2 again");
                cluster2.RestartNameNode();
                NUnit.Framework.Assert.AreEqual(info1, dfs2.RollingUpgrade(HdfsConstants.RollingUpgradeAction
                                                                           .Query));
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(foo));
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(bar));
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(baz));
                //finalize rolling upgrade
                RollingUpgradeInfo finalize = dfs2.RollingUpgrade(HdfsConstants.RollingUpgradeAction
                                                                  .Finalize);
                NUnit.Framework.Assert.IsTrue(finalize.IsFinalized());
                Log.Info("RESTART cluster 2 with regular startup option");
                cluster2.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Regular
                                                           );
                cluster2.RestartNameNode();
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(foo));
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(bar));
                NUnit.Framework.Assert.IsTrue(dfs2.Exists(baz));
            }
            finally
            {
                if (cluster2 != null)
                {
                    cluster2.Shutdown();
                }
            }
        }
Exemple #6
0
        public virtual void TestRandomized()
        {
            long seed;
            long userSpecifiedSeed = long.GetLong(RandSeedProperty);

            if (userSpecifiedSeed != null)
            {
                Log.Info("Using seed specified in system property");
                seed = userSpecifiedSeed;
                // If the user specifies a seed, then we should gather all the
                // IPC trace information so that debugging is easier. This makes
                // the test run about 25% slower otherwise.
                ((Log4JLogger)ProtobufRpcEngine.Log).GetLogger().SetLevel(Level.All);
            }
            else
            {
                seed = new Random().NextLong();
            }
            Log.Info("Random seed: " + seed);
            Random             r       = new Random(seed);
            MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).Build();
            // Format the cluster using a non-faulty QJM.
            QuorumJournalManager qjmForInitialFormat = CreateInjectableQJM(cluster);

            qjmForInitialFormat.Format(QJMTestUtil.FakeNsinfo);
            qjmForInitialFormat.Close();
            try
            {
                long txid      = 0;
                long lastAcked = 0;
                for (int i = 0; i < NumWriterIters; i++)
                {
                    Log.Info("Starting writer " + i + "\n-------------------");
                    QuorumJournalManager qjm = CreateRandomFaultyQJM(cluster, r);
                    try
                    {
                        long recovered;
                        try
                        {
                            recovered = QJMTestUtil.RecoverAndReturnLastTxn(qjm);
                        }
                        catch (Exception t)
                        {
                            Log.Info("Failed recovery", t);
                            CheckException(t);
                            continue;
                        }
                        NUnit.Framework.Assert.IsTrue("Recovered only up to txnid " + recovered + " but had gotten an ack for "
                                                      + lastAcked, recovered >= lastAcked);
                        txid = recovered + 1;
                        // Periodically purge old data on disk so it's easier to look
                        // at failure cases.
                        if (txid > 100 && i % 10 == 1)
                        {
                            qjm.PurgeLogsOlderThan(txid - 100);
                        }
                        Holder <Exception> thrown = new Holder <Exception>(null);
                        for (int j = 0; j < SegmentsPerWriter; j++)
                        {
                            lastAcked = WriteSegmentUntilCrash(cluster, qjm, txid, 4, thrown);
                            if (thrown.held != null)
                            {
                                Log.Info("Failed write", thrown.held);
                                CheckException(thrown.held);
                                break;
                            }
                            txid += 4;
                        }
                    }
                    finally
                    {
                        qjm.Close();
                    }
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }