Beispiel #1
0
        public virtual void TestAllBookieFailure()
        {
            // bookie to fail
            newBookie = bkutil.NewBookie();
            BookieServer replacementBookie = null;

            try
            {
                int ensembleSize = numBookies + 1;
                NUnit.Framework.Assert.AreEqual("New bookie didn't start", ensembleSize, bkutil.CheckBookiesUp
                                                    (ensembleSize, 10));
                // ensure that the journal manager has to use all bookies,
                // so that a failure will fail the journal manager
                Configuration conf = new Configuration();
                conf.SetInt(BookKeeperJournalManager.BkjmBookkeeperEnsembleSize, ensembleSize);
                conf.SetInt(BookKeeperJournalManager.BkjmBookkeeperQuorumSize, ensembleSize);
                long                     txid = 1;
                NamespaceInfo            nsi  = NewNSInfo();
                BookKeeperJournalManager bkjm = new BookKeeperJournalManager(conf, BKJMUtil.CreateJournalURI
                                                                                 ("/hdfsjournal-allbookiefailure"), nsi);
                bkjm.Format(nsi);
                EditLogOutputStream @out = bkjm.StartLogSegment(txid, NameNodeLayoutVersion.CurrentLayoutVersion
                                                                );
                for (long i = 1; i <= 3; i++)
                {
                    FSEditLogOp op = FSEditLogTestUtil.GetNoOpInstance();
                    op.SetTransactionId(txid++);
                    @out.Write(op);
                }
                @out.SetReadyToFlush();
                @out.Flush();
                newBookie.Shutdown();
                NUnit.Framework.Assert.AreEqual("New bookie didn't die", numBookies, bkutil.CheckBookiesUp
                                                    (numBookies, 10));
                try
                {
                    for (long i_1 = 1; i_1 <= 3; i_1++)
                    {
                        FSEditLogOp op = FSEditLogTestUtil.GetNoOpInstance();
                        op.SetTransactionId(txid++);
                        @out.Write(op);
                    }
                    @out.SetReadyToFlush();
                    @out.Flush();
                    NUnit.Framework.Assert.Fail("should not get to this stage");
                }
                catch (IOException ioe)
                {
                    Log.Debug("Error writing to bookkeeper", ioe);
                    NUnit.Framework.Assert.IsTrue("Invalid exception message", ioe.Message.Contains("Failed to write to bookkeeper"
                                                                                                    ));
                }
                replacementBookie = bkutil.NewBookie();
                NUnit.Framework.Assert.AreEqual("New bookie didn't start", numBookies + 1, bkutil
                                                .CheckBookiesUp(numBookies + 1, 10));
                bkjm.RecoverUnfinalizedSegments();
                @out = bkjm.StartLogSegment(txid, NameNodeLayoutVersion.CurrentLayoutVersion);
                for (long i_2 = 1; i_2 <= 3; i_2++)
                {
                    FSEditLogOp op = FSEditLogTestUtil.GetNoOpInstance();
                    op.SetTransactionId(txid++);
                    @out.Write(op);
                }
                @out.SetReadyToFlush();
                @out.Flush();
            }
            catch (Exception e)
            {
                Log.Error("Exception in test", e);
                throw;
            }
            finally
            {
                if (replacementBookie != null)
                {
                    replacementBookie.Shutdown();
                }
                newBookie.Shutdown();
                if (bkutil.CheckBookiesUp(numBookies, 30) != numBookies)
                {
                    Log.Warn("Not all bookies from this test shut down, expect errors");
                }
            }
        }
Beispiel #2
0
        public virtual void TestFailoverWithFailingBKCluster()
        {
            int          ensembleSize = numBookies + 1;
            BookieServer newBookie    = bkutil.NewBookie();

            NUnit.Framework.Assert.AreEqual("New bookie didn't start", ensembleSize, bkutil.CheckBookiesUp
                                                (ensembleSize, 10));
            BookieServer   replacementBookie = null;
            MiniDFSCluster cluster           = null;

            try
            {
                Configuration conf = new Configuration();
                conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
                conf.Set(DFSConfigKeys.DfsNamenodeSharedEditsDirKey, BKJMUtil.CreateJournalURI("/hotfailoverWithFail"
                                                                                               ).ToString());
                conf.SetInt(BookKeeperJournalManager.BkjmBookkeeperEnsembleSize, ensembleSize);
                conf.SetInt(BookKeeperJournalManager.BkjmBookkeeperQuorumSize, ensembleSize);
                BKJMUtil.AddJournalManagerDefinition(conf);
                cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                                          ()).NumDataNodes(0).ManageNameDfsSharedDirs(false).CheckExitOnShutdown(false).Build
                              ();
                NameNode nn1 = cluster.GetNameNode(0);
                NameNode nn2 = cluster.GetNameNode(1);
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Path       p1 = new Path("/testBKJMFailingBKCluster1");
                Path       p2 = new Path("/testBKJMFailingBKCluster2");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                fs.Mkdirs(p1);
                newBookie.Shutdown();
                // will take down shared storage
                NUnit.Framework.Assert.AreEqual("New bookie didn't stop", numBookies, bkutil.CheckBookiesUp
                                                    (numBookies, 10));
                try
                {
                    fs.Mkdirs(p2);
                    NUnit.Framework.Assert.Fail("mkdirs should result in the NN exiting");
                }
                catch (RemoteException re)
                {
                    NUnit.Framework.Assert.IsTrue(re.GetClassName().Contains("ExitException"));
                }
                cluster.ShutdownNameNode(0);
                try
                {
                    cluster.TransitionToActive(1);
                    NUnit.Framework.Assert.Fail("Shouldn't have been able to transition with bookies down"
                                                );
                }
                catch (ExitUtil.ExitException ee)
                {
                    NUnit.Framework.Assert.IsTrue("Should shutdown due to required journal failure",
                                                  ee.Message.Contains("starting log segment 3 failed for required journal"));
                }
                replacementBookie = bkutil.NewBookie();
                NUnit.Framework.Assert.AreEqual("Replacement bookie didn't start", ensembleSize,
                                                bkutil.CheckBookiesUp(ensembleSize, 10));
                cluster.TransitionToActive(1);
                // should work fine now
                NUnit.Framework.Assert.IsTrue(fs.Exists(p1));
                NUnit.Framework.Assert.IsFalse(fs.Exists(p2));
            }
            finally
            {
                newBookie.Shutdown();
                if (replacementBookie != null)
                {
                    replacementBookie.Shutdown();
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }