public virtual void TestAllBookieFailure() { // bookie to fail newBookie = bkutil.NewBookie(); BookieServer replacementBookie = null; try { int ensembleSize = numBookies + 1; NUnit.Framework.Assert.AreEqual("New bookie didn't start", ensembleSize, bkutil.CheckBookiesUp (ensembleSize, 10)); // ensure that the journal manager has to use all bookies, // so that a failure will fail the journal manager Configuration conf = new Configuration(); conf.SetInt(BookKeeperJournalManager.BkjmBookkeeperEnsembleSize, ensembleSize); conf.SetInt(BookKeeperJournalManager.BkjmBookkeeperQuorumSize, ensembleSize); long txid = 1; NamespaceInfo nsi = NewNSInfo(); BookKeeperJournalManager bkjm = new BookKeeperJournalManager(conf, BKJMUtil.CreateJournalURI ("/hdfsjournal-allbookiefailure"), nsi); bkjm.Format(nsi); EditLogOutputStream @out = bkjm.StartLogSegment(txid, NameNodeLayoutVersion.CurrentLayoutVersion ); for (long i = 1; i <= 3; i++) { FSEditLogOp op = FSEditLogTestUtil.GetNoOpInstance(); op.SetTransactionId(txid++); @out.Write(op); } @out.SetReadyToFlush(); @out.Flush(); newBookie.Shutdown(); NUnit.Framework.Assert.AreEqual("New bookie didn't die", numBookies, bkutil.CheckBookiesUp (numBookies, 10)); try { for (long i_1 = 1; i_1 <= 3; i_1++) { FSEditLogOp op = FSEditLogTestUtil.GetNoOpInstance(); op.SetTransactionId(txid++); @out.Write(op); } @out.SetReadyToFlush(); @out.Flush(); NUnit.Framework.Assert.Fail("should not get to this stage"); } catch (IOException ioe) { Log.Debug("Error writing to bookkeeper", ioe); NUnit.Framework.Assert.IsTrue("Invalid exception message", ioe.Message.Contains("Failed to write to bookkeeper" )); } replacementBookie = bkutil.NewBookie(); NUnit.Framework.Assert.AreEqual("New bookie didn't start", numBookies + 1, bkutil .CheckBookiesUp(numBookies + 1, 10)); bkjm.RecoverUnfinalizedSegments(); @out = bkjm.StartLogSegment(txid, NameNodeLayoutVersion.CurrentLayoutVersion); for (long i_2 = 1; i_2 <= 3; i_2++) { FSEditLogOp op = FSEditLogTestUtil.GetNoOpInstance(); op.SetTransactionId(txid++); @out.Write(op); } @out.SetReadyToFlush(); @out.Flush(); } catch (Exception e) { Log.Error("Exception in test", e); throw; } finally { if (replacementBookie != null) { replacementBookie.Shutdown(); } newBookie.Shutdown(); if (bkutil.CheckBookiesUp(numBookies, 30) != numBookies) { Log.Warn("Not all bookies from this test shut down, expect errors"); } } }
public virtual void TestFailoverWithFailingBKCluster() { int ensembleSize = numBookies + 1; BookieServer newBookie = bkutil.NewBookie(); NUnit.Framework.Assert.AreEqual("New bookie didn't start", ensembleSize, bkutil.CheckBookiesUp (ensembleSize, 10)); BookieServer replacementBookie = null; MiniDFSCluster cluster = null; try { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); conf.Set(DFSConfigKeys.DfsNamenodeSharedEditsDirKey, BKJMUtil.CreateJournalURI("/hotfailoverWithFail" ).ToString()); conf.SetInt(BookKeeperJournalManager.BkjmBookkeeperEnsembleSize, ensembleSize); conf.SetInt(BookKeeperJournalManager.BkjmBookkeeperQuorumSize, ensembleSize); BKJMUtil.AddJournalManagerDefinition(conf); cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology ()).NumDataNodes(0).ManageNameDfsSharedDirs(false).CheckExitOnShutdown(false).Build (); NameNode nn1 = cluster.GetNameNode(0); NameNode nn2 = cluster.GetNameNode(1); cluster.WaitActive(); cluster.TransitionToActive(0); Path p1 = new Path("/testBKJMFailingBKCluster1"); Path p2 = new Path("/testBKJMFailingBKCluster2"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); fs.Mkdirs(p1); newBookie.Shutdown(); // will take down shared storage NUnit.Framework.Assert.AreEqual("New bookie didn't stop", numBookies, bkutil.CheckBookiesUp (numBookies, 10)); try { fs.Mkdirs(p2); NUnit.Framework.Assert.Fail("mkdirs should result in the NN exiting"); } catch (RemoteException re) { NUnit.Framework.Assert.IsTrue(re.GetClassName().Contains("ExitException")); } cluster.ShutdownNameNode(0); try { cluster.TransitionToActive(1); NUnit.Framework.Assert.Fail("Shouldn't have been able to transition with bookies down" ); } catch (ExitUtil.ExitException ee) { NUnit.Framework.Assert.IsTrue("Should shutdown due to required journal failure", ee.Message.Contains("starting log segment 3 failed for required journal")); } replacementBookie = bkutil.NewBookie(); NUnit.Framework.Assert.AreEqual("Replacement bookie didn't start", ensembleSize, bkutil.CheckBookiesUp(ensembleSize, 10)); cluster.TransitionToActive(1); // should work fine now NUnit.Framework.Assert.IsTrue(fs.Exists(p1)); NUnit.Framework.Assert.IsFalse(fs.Exists(p2)); } finally { newBookie.Shutdown(); if (replacementBookie != null) { replacementBookie.Shutdown(); } if (cluster != null) { cluster.Shutdown(); } } }