public virtual void TestSharedEditsMissingLogs() { RemoveStandbyNameDirs(); CheckpointSignature sig = nn0.GetRpcServer().RollEditLog(); NUnit.Framework.Assert.AreEqual(3, sig.GetCurSegmentTxId()); // Should have created edits_1-2 in shared edits dir URI editsUri = cluster.GetSharedEditsDir(0, 1); FilePath editsDir = new FilePath(editsUri); FilePath editsSegment = new FilePath(new FilePath(editsDir, "current"), NNStorage .GetFinalizedEditsFileName(1, 2)); GenericTestUtils.AssertExists(editsSegment); // Delete the segment. NUnit.Framework.Assert.IsTrue(editsSegment.Delete()); // Trying to bootstrap standby should now fail since the edit // logs aren't available in the shared dir. GenericTestUtils.LogCapturer logs = GenericTestUtils.LogCapturer.CaptureLogs(LogFactory .GetLog(typeof(BootstrapStandby))); try { int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(BootstrapStandby.ErrCodeLogsUnavailable, rc); } finally { logs.StopCapturing(); } GenericTestUtils.AssertMatches(logs.GetOutput(), "FATAL.*Unable to read transaction ids 1-3 from the configured shared" ); }
public virtual void TestNfsUpgrade() { MiniDFSCluster cluster = null; FileSystem fs = null; try { cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology ()).NumDataNodes(0).Build(); FilePath sharedDir = new FilePath(cluster.GetSharedEditsDir(0, 1)); // No upgrade is in progress at the moment. CheckClusterPreviousDirExistence(cluster, false); AssertCTimesEqual(cluster); CheckPreviousDirExistence(sharedDir, false); // Transition NN0 to active and do some FS ops. cluster.TransitionToActive(0); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1"))); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.ShutdownNameNode(1); cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade ); cluster.RestartNameNode(0, false); CheckNnPreviousDirExistence(cluster, 0, true); CheckNnPreviousDirExistence(cluster, 1, false); CheckPreviousDirExistence(sharedDir, true); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2"))); // Restart NN0 without the -upgrade flag, to make sure that works. cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Regular ); cluster.RestartNameNode(0, false); // Make sure we can still do FS ops after upgrading. cluster.TransitionToActive(0); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo3"))); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); // Now restart NN1 and make sure that we can do ops against that as well. cluster.RestartNameNode(1); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo4"))); AssertCTimesEqual(cluster); } finally { if (fs != null) { fs.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
/// <summary> /// Test that, even if the other node is not active, we are able /// to bootstrap standby from it. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestOtherNodeNotActive() { cluster.TransitionToStandby(0); int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); }
public virtual void TestFinalizeWithJournalNodes() { MiniQJMHACluster qjCluster = null; FileSystem fs = null; try { MiniQJMHACluster.Builder builder = new MiniQJMHACluster.Builder(conf); builder.GetDfsBuilder().NumDataNodes(0); qjCluster = builder.Build(); MiniDFSCluster cluster = qjCluster.GetDfsCluster(); // No upgrade is in progress at the moment. CheckJnPreviousDirExistence(qjCluster, false); CheckClusterPreviousDirExistence(cluster, false); AssertCTimesEqual(cluster); // Transition NN0 to active and do some FS ops. cluster.TransitionToActive(0); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1"))); long cidBeforeUpgrade = GetCommittedTxnIdValue(qjCluster); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.ShutdownNameNode(1); cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade ); cluster.RestartNameNode(0, false); NUnit.Framework.Assert.IsTrue(cidBeforeUpgrade <= GetCommittedTxnIdValue(qjCluster )); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2"))); CheckNnPreviousDirExistence(cluster, 0, true); CheckNnPreviousDirExistence(cluster, 1, false); CheckJnPreviousDirExistence(qjCluster, true); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); cluster.RestartNameNode(1); long cidDuringUpgrade = GetCommittedTxnIdValue(qjCluster); NUnit.Framework.Assert.IsTrue(cidDuringUpgrade > cidBeforeUpgrade); RunFinalizeCommand(cluster); NUnit.Framework.Assert.AreEqual(cidDuringUpgrade, GetCommittedTxnIdValue(qjCluster )); CheckClusterPreviousDirExistence(cluster, false); CheckJnPreviousDirExistence(qjCluster, false); AssertCTimesEqual(cluster); } finally { if (fs != null) { fs.Close(); } if (qjCluster != null) { qjCluster.Shutdown(); } } }
public virtual void TestStandbyDirsAlreadyExist() { // Should not pass since standby dirs exist, force not given int rc = BootstrapStandby.Run(new string[] { "-nonInteractive" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(BootstrapStandby.ErrCodeAlreadyFormatted, rc); // Should pass with -force rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration(1)); NUnit.Framework.Assert.AreEqual(0, rc); }
/// <exception cref="System.Exception"/> private void TestUpgrade(TestBootstrapStandbyWithQJM.UpgradeState state) { cluster.TransitionToActive(0); Configuration confNN1 = cluster.GetConfiguration(1); FilePath current = cluster.GetNameNode(1).GetFSImage().GetStorage().GetStorageDir (0).GetCurrentDir(); FilePath tmp = cluster.GetNameNode(1).GetFSImage().GetStorage().GetStorageDir(0). GetPreviousTmp(); // shut down nn1 cluster.ShutdownNameNode(1); // make NN0 in upgrade state FSImage fsImage0 = cluster.GetNameNode(0).GetNamesystem().GetFSImage(); Whitebox.SetInternalState(fsImage0, "isUpgradeFinalized", false); switch (state) { case TestBootstrapStandbyWithQJM.UpgradeState.Recover: { // rename the current directory to previous.tmp in nn1 NNStorage.Rename(current, tmp); break; } case TestBootstrapStandbyWithQJM.UpgradeState.Format: { // rename the current directory to a random name so it's not formatted FilePath wrongPath = new FilePath(current.GetParentFile(), "wrong"); NNStorage.Rename(current, wrongPath); break; } default: { break; } } int rc = BootstrapStandby.Run(new string[] { "-force" }, confNN1); NUnit.Framework.Assert.AreEqual(0, rc); // Should have copied over the namespace from the standby FSImageTestUtil.AssertNNHasCheckpoints(cluster, 1, ImmutableList.Of(0)); FSImageTestUtil.AssertNNFilesMatch(cluster); // make sure the NN1 is in upgrade state, i.e., the previous directory has // been successfully created cluster.RestartNameNode(1); NUnit.Framework.Assert.IsFalse(cluster.GetNameNode(1).GetNamesystem().IsUpgradeFinalized ()); }
public virtual void TestBootstrapStandbyWithActiveNN() { // make the first NN in active state cluster.TransitionToActive(0); Configuration confNN1 = cluster.GetConfiguration(1); // shut down nn1 cluster.ShutdownNameNode(1); int rc = BootstrapStandby.Run(new string[] { "-force" }, confNN1); NUnit.Framework.Assert.AreEqual(0, rc); // Should have copied over the namespace from the standby FSImageTestUtil.AssertNNHasCheckpoints(cluster, 1, ImmutableList.Of(0)); FSImageTestUtil.AssertNNFilesMatch(cluster); }
public virtual void TestSuccessfulBaseCase() { RemoveStandbyNameDirs(); try { cluster.RestartNameNode(1); NUnit.Framework.Assert.Fail("Did not throw"); } catch (IOException ioe) { GenericTestUtils.AssertExceptionContains("storage directory does not exist or is not accessible" , ioe); } int rc = BootstrapStandby.Run(new string[] { "-nonInteractive" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); // Should have copied over the namespace from the active FSImageTestUtil.AssertNNHasCheckpoints(cluster, 1, ImmutableList.Of(0)); FSImageTestUtil.AssertNNFilesMatch(cluster); // We should now be able to start the standby successfully. cluster.RestartNameNode(1); }
public virtual void TestDownloadingLaterCheckpoint() { // Roll edit logs a few times to inflate txid nn0.GetRpcServer().RollEditLog(); nn0.GetRpcServer().RollEditLog(); // Make checkpoint NameNodeAdapter.EnterSafeMode(nn0, false); NameNodeAdapter.SaveNamespace(nn0); NameNodeAdapter.LeaveSafeMode(nn0); long expectedCheckpointTxId = NameNodeAdapter.GetNamesystem(nn0).GetFSImage().GetMostRecentCheckpointTxId (); NUnit.Framework.Assert.AreEqual(6, expectedCheckpointTxId); int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); // Should have copied over the namespace from the active FSImageTestUtil.AssertNNHasCheckpoints(cluster, 1, ImmutableList.Of((int)expectedCheckpointTxId )); FSImageTestUtil.AssertNNFilesMatch(cluster); // We should now be able to start the standby successfully. cluster.RestartNameNode(1); }
public virtual void TestRollbackWithJournalNodes() { MiniQJMHACluster qjCluster = null; FileSystem fs = null; try { MiniQJMHACluster.Builder builder = new MiniQJMHACluster.Builder(conf); builder.GetDfsBuilder().NumDataNodes(0); qjCluster = builder.Build(); MiniDFSCluster cluster = qjCluster.GetDfsCluster(); // No upgrade is in progress at the moment. CheckClusterPreviousDirExistence(cluster, false); AssertCTimesEqual(cluster); CheckJnPreviousDirExistence(qjCluster, false); // Transition NN0 to active and do some FS ops. cluster.TransitionToActive(0); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1"))); long cidBeforeUpgrade = GetCommittedTxnIdValue(qjCluster); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.ShutdownNameNode(1); cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade ); cluster.RestartNameNode(0, false); CheckNnPreviousDirExistence(cluster, 0, true); CheckNnPreviousDirExistence(cluster, 1, false); CheckJnPreviousDirExistence(qjCluster, true); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2"))); long cidDuringUpgrade = GetCommittedTxnIdValue(qjCluster); NUnit.Framework.Assert.IsTrue(cidDuringUpgrade > cidBeforeUpgrade); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); cluster.RestartNameNode(1); CheckNnPreviousDirExistence(cluster, 0, true); CheckNnPreviousDirExistence(cluster, 1, true); CheckJnPreviousDirExistence(qjCluster, true); AssertCTimesEqual(cluster); // Shut down the NNs, but deliberately leave the JNs up and running. ICollection <URI> nn1NameDirs = cluster.GetNameDirs(0); cluster.Shutdown(); conf.SetStrings(DFSConfigKeys.DfsNamenodeNameDirKey, Joiner.On(",").Join(nn1NameDirs )); NameNode.DoRollback(conf, false); long cidAfterRollback = GetCommittedTxnIdValue(qjCluster); NUnit.Framework.Assert.IsTrue(cidBeforeUpgrade < cidAfterRollback); // make sure the committedTxnId has been reset correctly after rollback NUnit.Framework.Assert.IsTrue(cidDuringUpgrade > cidAfterRollback); // The rollback operation should have rolled back the first NN's local // dirs, and the shared dir, but not the other NN's dirs. Those have to be // done by bootstrapping the standby. CheckNnPreviousDirExistence(cluster, 0, false); CheckJnPreviousDirExistence(qjCluster, false); } finally { if (fs != null) { fs.Close(); } if (qjCluster != null) { qjCluster.Shutdown(); } } }
public virtual void TestRollbackWithNfs() { MiniDFSCluster cluster = null; FileSystem fs = null; try { cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology ()).NumDataNodes(0).Build(); FilePath sharedDir = new FilePath(cluster.GetSharedEditsDir(0, 1)); // No upgrade is in progress at the moment. CheckClusterPreviousDirExistence(cluster, false); AssertCTimesEqual(cluster); CheckPreviousDirExistence(sharedDir, false); // Transition NN0 to active and do some FS ops. cluster.TransitionToActive(0); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1"))); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.ShutdownNameNode(1); cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade ); cluster.RestartNameNode(0, false); CheckNnPreviousDirExistence(cluster, 0, true); CheckNnPreviousDirExistence(cluster, 1, false); CheckPreviousDirExistence(sharedDir, true); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2"))); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); cluster.RestartNameNode(1); CheckNnPreviousDirExistence(cluster, 0, true); CheckNnPreviousDirExistence(cluster, 1, true); CheckPreviousDirExistence(sharedDir, true); AssertCTimesEqual(cluster); // Now shut down the cluster and do the rollback. ICollection <URI> nn1NameDirs = cluster.GetNameDirs(0); cluster.Shutdown(); conf.SetStrings(DFSConfigKeys.DfsNamenodeNameDirKey, Joiner.On(",").Join(nn1NameDirs )); NameNode.DoRollback(conf, false); // The rollback operation should have rolled back the first NN's local // dirs, and the shared dir, but not the other NN's dirs. Those have to be // done by bootstrapping the standby. CheckNnPreviousDirExistence(cluster, 0, false); CheckPreviousDirExistence(sharedDir, false); } finally { if (fs != null) { fs.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestUpgradeWithJournalNodes() { MiniQJMHACluster qjCluster = null; FileSystem fs = null; try { MiniQJMHACluster.Builder builder = new MiniQJMHACluster.Builder(conf); builder.GetDfsBuilder().NumDataNodes(0); qjCluster = builder.Build(); MiniDFSCluster cluster = qjCluster.GetDfsCluster(); // No upgrade is in progress at the moment. CheckJnPreviousDirExistence(qjCluster, false); CheckClusterPreviousDirExistence(cluster, false); AssertCTimesEqual(cluster); // Transition NN0 to active and do some FS ops. cluster.TransitionToActive(0); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1"))); // get the value of the committedTxnId in journal nodes long cidBeforeUpgrade = GetCommittedTxnIdValue(qjCluster); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.ShutdownNameNode(1); cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade ); cluster.RestartNameNode(0, false); CheckNnPreviousDirExistence(cluster, 0, true); CheckNnPreviousDirExistence(cluster, 1, false); CheckJnPreviousDirExistence(qjCluster, true); NUnit.Framework.Assert.IsTrue(cidBeforeUpgrade <= GetCommittedTxnIdValue(qjCluster )); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2"))); // Restart NN0 without the -upgrade flag, to make sure that works. cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Regular ); cluster.RestartNameNode(0, false); // Make sure we can still do FS ops after upgrading. cluster.TransitionToActive(0); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo3"))); NUnit.Framework.Assert.IsTrue(GetCommittedTxnIdValue(qjCluster) > cidBeforeUpgrade ); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); // Now restart NN1 and make sure that we can do ops against that as well. cluster.RestartNameNode(1); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo4"))); AssertCTimesEqual(cluster); } finally { if (fs != null) { fs.Close(); } if (qjCluster != null) { qjCluster.Shutdown(); } } }