public virtual void EnsureInvalidBlockTokensAreRejected() { cluster.TransitionToActive(0); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); DFSTestUtil.WriteFile(fs, TestPath, TestData); NUnit.Framework.Assert.AreEqual(TestData, DFSTestUtil.ReadFile(fs, TestPath)); DFSClient dfsClient = DFSClientAdapter.GetDFSClient((DistributedFileSystem)fs); DFSClient spyDfsClient = Org.Mockito.Mockito.Spy(dfsClient); Org.Mockito.Mockito.DoAnswer(new _Answer_121()).When(spyDfsClient).GetLocatedBlocks (Org.Mockito.Mockito.AnyString(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .AnyLong()); // This will make the token invalid, since the password // won't match anymore DFSClientAdapter.SetDFSClient((DistributedFileSystem)fs, spyDfsClient); try { NUnit.Framework.Assert.AreEqual(TestData, DFSTestUtil.ReadFile(fs, TestPath)); NUnit.Framework.Assert.Fail("Shouldn't have been able to read a file with invalid block tokens" ); } catch (IOException ioe) { GenericTestUtils.AssertExceptionContains("Could not obtain block", ioe); } }
/// <summary>Test manual failover failback for one namespace</summary> /// <param name="cluster">single process test cluster</param> /// <param name="conf">cluster configuration</param> /// <param name="nsIndex">namespace index starting from zero</param> /// <exception cref="System.Exception"/> private void TestManualFailoverFailback(MiniDFSCluster cluster, Configuration conf , int nsIndex) { int nn0 = 2 * nsIndex; int nn1 = 2 * nsIndex + 1; cluster.TransitionToActive(nn0); Log.Info("Starting with NN 0 active in namespace " + nsIndex); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); fs.Mkdirs(TestDir); Log.Info("Failing over to NN 1 in namespace " + nsIndex); cluster.TransitionToStandby(nn0); cluster.TransitionToActive(nn1); NUnit.Framework.Assert.IsTrue(fs.Exists(TestDir)); DFSTestUtil.WriteFile(fs, TestFilePath, TestFileData); Log.Info("Failing over to NN 0 in namespace " + nsIndex); cluster.TransitionToStandby(nn1); cluster.TransitionToActive(nn0); NUnit.Framework.Assert.IsTrue(fs.Exists(TestDir)); NUnit.Framework.Assert.AreEqual(TestFileData, DFSTestUtil.ReadFile(fs, TestFilePath )); Log.Info("Removing test file"); fs.Delete(TestDir, true); NUnit.Framework.Assert.IsFalse(fs.Exists(TestDir)); Log.Info("Failing over to NN 1 in namespace " + nsIndex); cluster.TransitionToStandby(nn0); cluster.TransitionToActive(nn1); NUnit.Framework.Assert.IsFalse(fs.Exists(TestDir)); }
/// <summary> /// Test cancellation of ongoing checkpoints when failover happens /// mid-checkpoint during image upload from standby to active NN. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestCheckpointCancellationDuringUpload() { // don't compress, we want a big image cluster.GetConfiguration(0).SetBoolean(DFSConfigKeys.DfsImageCompressKey, false); cluster.GetConfiguration(1).SetBoolean(DFSConfigKeys.DfsImageCompressKey, false); // Throttle SBN upload to make it hang during upload to ANN cluster.GetConfiguration(1).SetLong(DFSConfigKeys.DfsImageTransferRateKey, 100); cluster.RestartNameNode(0); cluster.RestartNameNode(1); nn0 = cluster.GetNameNode(0); nn1 = cluster.GetNameNode(1); cluster.TransitionToActive(0); DoEdits(0, 100); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); HATestUtil.WaitForCheckpoint(cluster, 1, ImmutableList.Of(104)); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); // Wait to make sure background TransferFsImageUpload thread was cancelled. // This needs to be done before the next test in the suite starts, so that a // file descriptor is not held open during the next cluster init. cluster.Shutdown(); cluster = null; GenericTestUtils.WaitFor(new _Supplier_312(), 1000, 30000); // Assert that former active did not accept the canceled checkpoint file. NUnit.Framework.Assert.AreEqual(0, nn0.GetFSImage().GetMostRecentCheckpointTxId() ); }
public virtual void SetupCluster() { Configuration conf = SetupCommonConfig(); // Dial down the retention of extra edits and checkpoints. This is to // help catch regressions of HDFS-4238 (SBN should not purge shared edits) conf.SetInt(DFSConfigKeys.DfsNamenodeNumCheckpointsRetainedKey, 1); conf.SetInt(DFSConfigKeys.DfsNamenodeNumExtraEditsRetainedKey, 0); int retryCount = 0; while (true) { try { int basePort = 10060 + random.Next(100) * 2; MiniDFSNNTopology topology = new MiniDFSNNTopology().AddNameservice(new MiniDFSNNTopology.NSConf ("ns1").AddNN(new MiniDFSNNTopology.NNConf("nn1").SetHttpPort(basePort)).AddNN(new MiniDFSNNTopology.NNConf("nn2").SetHttpPort(basePort + 1))); cluster = new MiniDFSCluster.Builder(conf).NnTopology(topology).NumDataNodes(1).Build (); cluster.WaitActive(); nn0 = cluster.GetNameNode(0); nn1 = cluster.GetNameNode(1); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); cluster.TransitionToActive(0); ++retryCount; break; } catch (BindException) { Log.Info("Set up MiniDFSCluster failed due to port conflicts, retry " + retryCount + " times"); } } }
/// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/> /// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.URISyntaxException"/> /// <exception cref="System.Exception"/> private void AssertCanStartHaNameNodes(string pathSuffix) { // Now should be able to start both NNs. Pass "false" here so that we don't // try to waitActive on all NNs, since the second NN doesn't exist yet. cluster.RestartNameNode(0, false); cluster.RestartNameNode(1, true); // Make sure HA is working. cluster.GetNameNode(0).GetRpcServer().TransitionToActive(new HAServiceProtocol.StateChangeRequestInfo (HAServiceProtocol.RequestSource.RequestByUser)); FileSystem fs = null; try { Path newPath = new Path(TestPath, pathSuffix); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(newPath)); HATestUtil.WaitForStandbyToCatchUp(cluster.GetNameNode(0), cluster.GetNameNode(1) ); NUnit.Framework.Assert.IsTrue(NameNodeAdapter.GetFileInfo(cluster.GetNameNode(1), newPath.ToString(), false).IsDir()); } finally { if (fs != null) { fs.Close(); } } }
public virtual void Setup() { conf = new Configuration(); // Specify the quorum per-nameservice, to ensure that these configs // can be nameservice-scoped. conf.Set(ZKFailoverController.ZkQuorumKey + ".ns1", hostPort); conf.Set(DFSConfigKeys.DfsHaFenceMethodsKey, typeof(TestNodeFencer.AlwaysSucceedFencer ).FullName); conf.SetBoolean(DFSConfigKeys.DfsHaAutoFailoverEnabledKey, true); // Turn off IPC client caching, so that the suite can handle // the restart of the daemons between test cases. conf.SetInt(CommonConfigurationKeysPublic.IpcClientConnectionMaxidletimeKey, 0); conf.SetInt(DFSConfigKeys.DfsHaZkfcPortKey + ".ns1.nn1", 10023); conf.SetInt(DFSConfigKeys.DfsHaZkfcPortKey + ".ns1.nn2", 10024); MiniDFSNNTopology topology = new MiniDFSNNTopology().AddNameservice(new MiniDFSNNTopology.NSConf ("ns1").AddNN(new MiniDFSNNTopology.NNConf("nn1").SetIpcPort(10021)).AddNN(new MiniDFSNNTopology.NNConf ("nn2").SetIpcPort(10022))); cluster = new MiniDFSCluster.Builder(conf).NnTopology(topology).NumDataNodes(0).Build (); cluster.WaitActive(); ctx = new MultithreadedTestUtil.TestContext(); ctx.AddThread(thr1 = new TestDFSZKFailoverController.ZKFCThread(this, ctx, 0)); NUnit.Framework.Assert.AreEqual(0, thr1.zkfc.Run(new string[] { "-formatZK" })); thr1.Start(); WaitForHAState(0, HAServiceProtocol.HAServiceState.Active); ctx.AddThread(thr2 = new TestDFSZKFailoverController.ZKFCThread(this, ctx, 1)); thr2.Start(); // Wait for the ZKFCs to fully start up ZKFCTestUtil.WaitForHealthState(thr1.zkfc, HealthMonitor.State.ServiceHealthy, ctx ); ZKFCTestUtil.WaitForHealthState(thr2.zkfc, HealthMonitor.State.ServiceHealthy, ctx ); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); }
public virtual void SetUpNameNode() { conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology ()).NumDataNodes(0).Build(); HATestUtil.SetFailoverConfigurations(cluster, conf); }
public virtual void TestBlocksRemovedWhileInSafeMode() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); DFSTestUtil.CreateFile(fs, new Path("/test"), 10 * BlockSize, (short)3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup. nn0.GetRpcServer().RollEditLog(); Banner("Restarting standby"); RestartStandby(); // It will initially have all of the blocks necessary. AssertSafeMode(nn1, 10, 10, 3, 0); // Delete those blocks while the SBN is in safe mode. // This doesn't affect the SBN, since deletions are not // ACKed when due to block removals. Banner("Removing the blocks without rolling the edit log"); fs.Delete(new Path("/test"), true); BlockManagerTestUtil.ComputeAllPendingWork(nn0.GetNamesystem().GetBlockManager()); Banner("Triggering deletions on DNs and Deletion Reports"); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); AssertSafeMode(nn1, 10, 10, 3, 0); // When we catch up to active namespace, it will restore back // to 0 blocks. Banner("Waiting for standby to catch up to active namespace"); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); AssertSafeMode(nn1, 0, 0, 3, 0); }
public virtual void TestNfsUpgrade() { MiniDFSCluster cluster = null; FileSystem fs = null; try { cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology ()).NumDataNodes(0).Build(); FilePath sharedDir = new FilePath(cluster.GetSharedEditsDir(0, 1)); // No upgrade is in progress at the moment. CheckClusterPreviousDirExistence(cluster, false); AssertCTimesEqual(cluster); CheckPreviousDirExistence(sharedDir, false); // Transition NN0 to active and do some FS ops. cluster.TransitionToActive(0); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1"))); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.ShutdownNameNode(1); cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade ); cluster.RestartNameNode(0, false); CheckNnPreviousDirExistence(cluster, 0, true); CheckNnPreviousDirExistence(cluster, 1, false); CheckPreviousDirExistence(sharedDir, true); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2"))); // Restart NN0 without the -upgrade flag, to make sure that works. cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Regular ); cluster.RestartNameNode(0, false); // Make sure we can still do FS ops after upgrading. cluster.TransitionToActive(0); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo3"))); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); // Now restart NN1 and make sure that we can do ops against that as well. cluster.RestartNameNode(1); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo4"))); AssertCTimesEqual(cluster); } finally { if (fs != null) { fs.Close(); } if (cluster != null) { cluster.Shutdown(); } } }
public virtual void SetUpCluster() { conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointCheckPeriodKey, 1); conf.SetInt(DFSConfigKeys.DfsNamenodeCheckpointTxnsKey, 1); conf.SetInt(DFSConfigKeys.DfsNamenodeNumCheckpointsRetainedKey, 10); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); HAUtil.SetAllowStandbyReads(conf, true); if (clusterType == TestFailureToReadEdits.TestType.SharedDirHa) { MiniDFSNNTopology topology = MiniQJMHACluster.CreateDefaultTopology(10000); cluster = new MiniDFSCluster.Builder(conf).NnTopology(topology).NumDataNodes(0).CheckExitOnShutdown (false).Build(); } else { MiniQJMHACluster.Builder builder = new MiniQJMHACluster.Builder(conf); builder.GetDfsBuilder().NumDataNodes(0).CheckExitOnShutdown(false); miniQjmHaCluster = builder.Build(); cluster = miniQjmHaCluster.GetDfsCluster(); } cluster.WaitActive(); nn0 = cluster.GetNameNode(0); nn1 = cluster.GetNameNode(1); cluster.TransitionToActive(0); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); }
public virtual void TestFailureToReadEditsOnTransitionToActive() { NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir1))); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); // It should also upload it back to the active. HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(0, 3)); CauseFailureOnEditLogRead(); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir2))); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir3))); try { HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); NUnit.Framework.Assert.Fail("Standby fully caught up, but should not have been able to" ); } catch (HATestUtil.CouldNotCatchUpException) { } // Expected. The NN did not exit. // Shutdown the active NN. cluster.ShutdownNameNode(0); try { // Transition the standby to active. cluster.TransitionToActive(1); NUnit.Framework.Assert.Fail("Standby transitioned to active, but should not have been able to" ); } catch (ExitUtil.ExitException ee) { GenericTestUtils.AssertExceptionContains("Error replaying edit log", ee); } }
public virtual void TestBlocksRemovedWhileInSafeModeEditsArriveFirst() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); DFSTestUtil.CreateFile(fs, new Path("/test"), 10 * BlockSize, (short)3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup. nn0.GetRpcServer().RollEditLog(); Banner("Restarting standby"); RestartStandby(); // It will initially have all of the blocks necessary. string status = nn1.GetNamesystem().GetSafemode(); NUnit.Framework.Assert.IsTrue("Bad safemode status: '" + status + "'", status.StartsWith ("Safe mode is ON. The reported blocks 10 has reached the threshold " + "0.9990 of total blocks 10. The number of live datanodes 3 has " + "reached the minimum number 0. In safe mode extension. " + "Safe mode will be turned off automatically" )); // Delete those blocks while the SBN is in safe mode. // Immediately roll the edit log before the actual deletions are sent // to the DNs. Banner("Removing the blocks without rolling the edit log"); fs.Delete(new Path("/test"), true); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); // Should see removal of the blocks as well as their contribution to safe block count. AssertSafeMode(nn1, 0, 0, 3, 0); Banner("Triggering sending deletions to DNs and Deletion Reports"); BlockManagerTestUtil.ComputeAllPendingWork(nn0.GetNamesystem().GetBlockManager()); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); // No change in assertion status here, but some of the consistency checks // in safemode will fire here if we accidentally decrement safe block count // below 0. AssertSafeMode(nn1, 0, 0, 3, 0); }
/// <summary> /// This test also serves to test /// <see cref="Org.Apache.Hadoop.Hdfs.HAUtil.GetProxiesForAllNameNodesInNameservice(Org.Apache.Hadoop.Conf.Configuration, string) /// "/> /// and /// <see cref="Org.Apache.Hadoop.Hdfs.DFSUtil.GetRpcAddressesForNameserviceId(Org.Apache.Hadoop.Conf.Configuration, string, string) /// "/> /// by virtue of the fact that it wouldn't work properly if the proxies /// returned were not for the correct NNs. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestIsAtLeastOneActive() { MiniDFSCluster cluster = new MiniDFSCluster.Builder(new HdfsConfiguration()).NnTopology (MiniDFSNNTopology.SimpleHATopology()).NumDataNodes(0).Build(); try { Configuration conf = new HdfsConfiguration(); HATestUtil.SetFailoverConfigurations(cluster, conf); IList <ClientProtocol> namenodes = HAUtil.GetProxiesForAllNameNodesInNameservice(conf , HATestUtil.GetLogicalHostname(cluster)); NUnit.Framework.Assert.AreEqual(2, namenodes.Count); NUnit.Framework.Assert.IsFalse(HAUtil.IsAtLeastOneActive(namenodes)); cluster.TransitionToActive(0); NUnit.Framework.Assert.IsTrue(HAUtil.IsAtLeastOneActive(namenodes)); cluster.TransitionToStandby(0); NUnit.Framework.Assert.IsFalse(HAUtil.IsAtLeastOneActive(namenodes)); cluster.TransitionToActive(1); NUnit.Framework.Assert.IsTrue(HAUtil.IsAtLeastOneActive(namenodes)); cluster.TransitionToStandby(1); NUnit.Framework.Assert.IsFalse(HAUtil.IsAtLeastOneActive(namenodes)); } finally { if (cluster != null) { cluster.Shutdown(); } } }
/// <summary> /// Regression test for HDFS-2693: when doing state transitions, we need to /// lock the FSNamesystem so that we don't end up doing any writes while it's /// "in between" states. /// </summary> /// <remarks> /// Regression test for HDFS-2693: when doing state transitions, we need to /// lock the FSNamesystem so that we don't end up doing any writes while it's /// "in between" states. /// This test case starts up several client threads which do mutation operations /// while flipping a NN back and forth from active to standby. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestTransitionSynchronization() { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(0).Build(); try { cluster.WaitActive(); ReentrantReadWriteLock spyLock = NameNodeAdapter.SpyOnFsLock(cluster.GetNameNode( 0).GetNamesystem()); Org.Mockito.Mockito.DoAnswer(new GenericTestUtils.SleepAnswer(50)).When(spyLock). WriteLock(); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(); for (int i = 0; i < 50; i++) { int finalI = i; ctx.AddThread(new _RepeatingTestThread_256(finalI, fs, ctx)); } ctx.AddThread(new _RepeatingTestThread_266(cluster, ctx)); ctx.StartThreads(); ctx.WaitFor(20000); ctx.Stop(); } finally { cluster.Shutdown(); } }
public virtual void TestFinalizeWithJournalNodes() { MiniQJMHACluster qjCluster = null; FileSystem fs = null; try { MiniQJMHACluster.Builder builder = new MiniQJMHACluster.Builder(conf); builder.GetDfsBuilder().NumDataNodes(0); qjCluster = builder.Build(); MiniDFSCluster cluster = qjCluster.GetDfsCluster(); // No upgrade is in progress at the moment. CheckJnPreviousDirExistence(qjCluster, false); CheckClusterPreviousDirExistence(cluster, false); AssertCTimesEqual(cluster); // Transition NN0 to active and do some FS ops. cluster.TransitionToActive(0); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo1"))); long cidBeforeUpgrade = GetCommittedTxnIdValue(qjCluster); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.ShutdownNameNode(1); cluster.GetNameNodeInfos()[0].SetStartOpt(HdfsServerConstants.StartupOption.Upgrade ); cluster.RestartNameNode(0, false); NUnit.Framework.Assert.IsTrue(cidBeforeUpgrade <= GetCommittedTxnIdValue(qjCluster )); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path("/foo2"))); CheckNnPreviousDirExistence(cluster, 0, true); CheckNnPreviousDirExistence(cluster, 1, false); CheckJnPreviousDirExistence(qjCluster, true); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.Run(new string[] { "-force" }, cluster.GetConfiguration (1)); NUnit.Framework.Assert.AreEqual(0, rc); cluster.RestartNameNode(1); long cidDuringUpgrade = GetCommittedTxnIdValue(qjCluster); NUnit.Framework.Assert.IsTrue(cidDuringUpgrade > cidBeforeUpgrade); RunFinalizeCommand(cluster); NUnit.Framework.Assert.AreEqual(cidDuringUpgrade, GetCommittedTxnIdValue(qjCluster )); CheckClusterPreviousDirExistence(cluster, false); CheckJnPreviousDirExistence(qjCluster, false); AssertCTimesEqual(cluster); } finally { if (fs != null) { fs.Close(); } if (qjCluster != null) { qjCluster.Shutdown(); } } }
/// <summary> /// Regression test for HDFS-2795: /// - Start an HA cluster with a DN. /// </summary> /// <remarks> /// Regression test for HDFS-2795: /// - Start an HA cluster with a DN. /// - Write several blocks to the FS with replication 1. /// - Shutdown the DN /// - Wait for the NNs to declare the DN dead. All blocks will be under-replicated. /// - Restart the DN. /// In the bug, the standby node would only very slowly notice the blocks returning /// to the cluster. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDatanodeRestarts() { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, 1024); // We read from the standby to watch block locations HAUtil.SetAllowStandbyReads(conf, true); conf.SetLong(DFSConfigKeys.DfsNamenodeAccesstimePrecisionKey, 0); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(1).Build(); try { NameNode nn0 = cluster.GetNameNode(0); NameNode nn1 = cluster.GetNameNode(1); cluster.TransitionToActive(0); // Create 5 blocks. DFSTestUtil.CreateFile(cluster.GetFileSystem(0), TestFilePath, 5 * 1024, (short)1 , 1L); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); // Stop the DN. DataNode dn = cluster.GetDataNodes()[0]; string dnName = dn.GetDatanodeId().GetXferAddr(); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // Make sure both NNs register it as dead. BlockManagerTestUtil.NoticeDeadDatanode(nn0, dnName); BlockManagerTestUtil.NoticeDeadDatanode(nn1, dnName); BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(5, nn0.GetNamesystem().GetUnderReplicatedBlocks() ); // The SBN will not have any blocks in its neededReplication queue // since the SBN doesn't process replication. NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks() ); LocatedBlocks locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1); NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has no replicas" , 0, locs.Get(0).GetLocations().Length); cluster.RestartDataNode(dnProps); // Wait for both NNs to re-register the DN. cluster.WaitActive(0); cluster.WaitActive(1); BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, nn0.GetNamesystem().GetUnderReplicatedBlocks() ); NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks() ); locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1); NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has replicas again" , 1, locs.Get(0).GetLocations().Length); } finally { cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> private void DoWriteOverFailoverTest(TestPipelinesFailover.TestScenario scenario, TestPipelinesFailover.MethodToTestIdempotence methodToTest) { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); // Don't check replication periodically. conf.SetInt(DFSConfigKeys.DfsNamenodeReplicationIntervalKey, 1000); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { int sizeWritten = 0; cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a block and a half AppendTestUtil.Write(stm, 0, BlockAndAHalf); sizeWritten += BlockAndAHalf; // Make sure all of the blocks are written out before failover. stm.Hflush(); Log.Info("Failing over to NN 1"); scenario.Run(cluster); // NOTE: explicitly do *not* make any further metadata calls // to the NN here. The next IPC call should be to allocate the next // block. Any other call would notice the failover and not test // idempotence of the operation (HDFS-3031) FSNamesystem ns1 = cluster.GetNameNode(1).GetNamesystem(); BlockManagerTestUtil.UpdateState(ns1.GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, ns1.GetPendingReplicationBlocks()); NUnit.Framework.Assert.AreEqual(0, ns1.GetCorruptReplicaBlocks()); NUnit.Framework.Assert.AreEqual(0, ns1.GetMissingBlocksCount()); // If we're testing allocateBlock()'s idempotence, write another // block and a half, so we have to allocate a new block. // Otherise, don't write anything, so our next RPC will be // completeFile() if we're testing idempotence of that operation. if (methodToTest == TestPipelinesFailover.MethodToTestIdempotence.AllocateBlock) { // write another block and a half AppendTestUtil.Write(stm, sizeWritten, BlockAndAHalf); sizeWritten += BlockAndAHalf; } stm.Close(); stm = null; AppendTestUtil.Check(fs, TestPath, sizeWritten); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
/// <exception cref="Org.Apache.Hadoop.HA.ServiceFailedException"/> /// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.URISyntaxException"/> private void WriteUsingBothNameNodes() { cluster.TransitionToActive(0); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); DFSTestUtil.WriteFile(fs, TestPath, TestData); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); fs.Delete(TestPath, false); DFSTestUtil.WriteFile(fs, TestPath, TestData); }
public virtual void TestDnFencing() { // Create a file with replication level 3. DFSTestUtil.CreateFile(fs, TestFilePath, 30 * SmallBlock, (short)3, 1L); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, TestFilePath); // Drop its replication count to 1, so it becomes over-replicated. // Then compute the invalidation of the extra blocks and trigger // heartbeats so the invalidations are flushed to the DNs. nn1.GetRpcServer().SetReplication(TestFile, (short)1); BlockManagerTestUtil.ComputeInvalidationWork(nn1.GetNamesystem().GetBlockManager( )); cluster.TriggerHeartbeats(); // Transition nn2 to active even though nn1 still thinks it's active. Banner("Failing to NN2 but let NN1 continue to think it's active"); NameNodeAdapter.AbortEditLogs(nn1); NameNodeAdapter.EnterSafeMode(nn1, false); cluster.TransitionToActive(1); // Check that the standby picked up the replication change. NUnit.Framework.Assert.AreEqual(1, nn2.GetRpcServer().GetFileInfo(TestFile).GetReplication ()); // Dump some info for debugging purposes. Banner("NN2 Metadata immediately after failover"); DoMetasave(nn2); Banner("Triggering heartbeats and block reports so that fencing is completed"); cluster.TriggerHeartbeats(); cluster.TriggerBlockReports(); Banner("Metadata after nodes have all block-reported"); DoMetasave(nn2); // Force a rescan of postponedMisreplicatedBlocks. BlockManager nn2BM = nn2.GetNamesystem().GetBlockManager(); BlockManagerTestUtil.CheckHeartbeat(nn2BM); BlockManagerTestUtil.RescanPostponedMisreplicatedBlocks(nn2BM); // The blocks should no longer be postponed. NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPostponedMisreplicatedBlocks ()); // Wait for NN2 to enact its deletions (replication monitor has to run, etc) BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager( )); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetUnderReplicatedBlocks() ); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPendingReplicationBlocks ()); Banner("Making sure the file is still readable"); FileSystem fs2 = cluster.GetFileSystem(1); DFSTestUtil.ReadFile(fs2, TestFilePath); Banner("Waiting for the actual block files to get deleted from DNs."); WaitForTrueReplication(cluster, block, 1); }
/// <exception cref="System.Exception"/> public virtual void TestStandbyIsHot() { Configuration conf = new Configuration(); // We read from the standby to watch block locations HAUtil.SetAllowStandbyReads(conf, true); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); NameNode nn1 = cluster.GetNameNode(0); NameNode nn2 = cluster.GetNameNode(1); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); Sharpen.Thread.Sleep(1000); System.Console.Error.WriteLine("=================================="); DFSTestUtil.WriteFile(fs, TestFilePath, TestFileData); // Have to force an edit log roll so that the standby catches up nn1.GetRpcServer().RollEditLog(); System.Console.Error.WriteLine("=================================="); // Block locations should show up on standby. Log.Info("Waiting for block locations to appear on standby node"); WaitForBlockLocations(cluster, nn2, TestFile, 3); // Trigger immediate heartbeats and block reports so // that the active "trusts" all of the DNs cluster.TriggerHeartbeats(); cluster.TriggerBlockReports(); // Change replication Log.Info("Changing replication to 1"); fs.SetReplication(TestFilePath, (short)1); BlockManagerTestUtil.ComputeAllPendingWork(nn1.GetNamesystem().GetBlockManager()); WaitForBlockLocations(cluster, nn1, TestFile, 1); nn1.GetRpcServer().RollEditLog(); Log.Info("Waiting for lowered replication to show up on standby"); WaitForBlockLocations(cluster, nn2, TestFile, 1); // Change back to 3 Log.Info("Changing replication to 3"); fs.SetReplication(TestFilePath, (short)3); BlockManagerTestUtil.ComputeAllPendingWork(nn1.GetNamesystem().GetBlockManager()); nn1.GetRpcServer().RollEditLog(); Log.Info("Waiting for higher replication to show up on standby"); WaitForBlockLocations(cluster, nn2, TestFile, 3); } finally { cluster.Shutdown(); } }
public virtual void SetupCluster() { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); HAUtil.SetAllowStandbyReads(conf, true); cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology ()).NumDataNodes(1).WaitSafeMode(false).Build(); cluster.WaitActive(); nn0 = cluster.GetNameNode(0); nn1 = cluster.GetNameNode(1); fs = HATestUtil.ConfigureFailoverFs(cluster, conf); cluster.TransitionToActive(0); }
public virtual void TestAppendWhileInSafeMode() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); // Make 4.5 blocks so that append() will re-open an existing block // instead of just adding a new one DFSTestUtil.CreateFile(fs, new Path("/test"), 4 * BlockSize + BlockSize / 2, (short )3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup. nn0.GetRpcServer().RollEditLog(); Banner("Restarting standby"); RestartStandby(); // It will initially have all of the blocks necessary. AssertSafeMode(nn1, 5, 5, 3, 0); // Append to a block while SBN is in safe mode. This should // not affect safemode initially, since the DN message // will get queued. FSDataOutputStream stm = fs.Append(new Path("/test")); try { AssertSafeMode(nn1, 5, 5, 3, 0); // if we roll edits now, the SBN should see that it's under construction // and change its total count and safe count down by one, since UC // blocks are not counted by safe mode. HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); AssertSafeMode(nn1, 4, 4, 3, 0); } finally { IOUtils.CloseStream(stm); } // Delete those blocks while the SBN is in safe mode. // This will not ACK the deletions to the SBN, so it won't // notice until we roll the edit log. Banner("Removing the blocks without rolling the edit log"); fs.Delete(new Path("/test"), true); BlockManagerTestUtil.ComputeAllPendingWork(nn0.GetNamesystem().GetBlockManager()); Banner("Triggering deletions on DNs and Deletion Reports"); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); AssertSafeMode(nn1, 4, 4, 3, 0); // When we roll the edit log, the deletions will go through. Banner("Waiting for standby to catch up to active namespace"); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); AssertSafeMode(nn1, 0, 0, 3, 0); }
/// <exception cref="System.Exception"/> public virtual void TestHdfsGetCanonicalServiceName() { Configuration conf = dfs.GetConf(); URI haUri = HATestUtil.GetLogicalUri(cluster); AbstractFileSystem afs = AbstractFileSystem.CreateFileSystem(haUri, conf); string haService = HAUtil.BuildTokenServiceForLogicalUri(haUri, HdfsConstants.HdfsUriScheme ).ToString(); NUnit.Framework.Assert.AreEqual(haService, afs.GetCanonicalServiceName()); Org.Apache.Hadoop.Security.Token.Token <object> token = afs.GetDelegationTokens(UserGroupInformation .GetCurrentUser().GetShortUserName())[0]; NUnit.Framework.Assert.AreEqual(haService, token.GetService().ToString()); // make sure the logical uri is handled correctly token.Renew(conf); token.Cancel(conf); }
/// <summary> /// HDFS-3062: DistributedFileSystem.getCanonicalServiceName() throws an /// exception if the URI is a logical URI. /// </summary> /// <remarks> /// HDFS-3062: DistributedFileSystem.getCanonicalServiceName() throws an /// exception if the URI is a logical URI. This bug fails the combination of /// ha + mapred + security. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDFSGetCanonicalServiceName() { URI hAUri = HATestUtil.GetLogicalUri(cluster); string haService = HAUtil.BuildTokenServiceForLogicalUri(hAUri, HdfsConstants.HdfsUriScheme ).ToString(); NUnit.Framework.Assert.AreEqual(haService, dfs.GetCanonicalServiceName()); string renewer = UserGroupInformation.GetCurrentUser().GetShortUserName(); Org.Apache.Hadoop.Security.Token.Token <DelegationTokenIdentifier> token = GetDelegationToken (dfs, renewer); NUnit.Framework.Assert.AreEqual(haService, token.GetService().ToString()); // make sure the logical uri is handled correctly token.Renew(dfs.GetConf()); token.Cancel(dfs.GetConf()); }
public virtual void TestCheckpointStartingMidEditsFile() { NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir1))); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); // Once the standby catches up, it should notice that it needs to // do a checkpoint and save one to its local directories. HATestUtil.WaitForCheckpoint(cluster, 1, ImmutableList.Of(0, 3)); // It should also upload it back to the active. HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(0, 3)); CauseFailureOnEditLogRead(); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir2))); NUnit.Framework.Assert.IsTrue(fs.Mkdirs(new Path(TestDir3))); try { HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); NUnit.Framework.Assert.Fail("Standby fully caught up, but should not have been able to" ); } catch (HATestUtil.CouldNotCatchUpException) { } // Expected. The NN did not exit. // 5 because we should get OP_START_LOG_SEGMENT and one successful OP_MKDIR HATestUtil.WaitForCheckpoint(cluster, 1, ImmutableList.Of(0, 3, 5)); // It should also upload it back to the active. HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(0, 3, 5)); // Restart the active NN cluster.RestartNameNode(0); HATestUtil.WaitForCheckpoint(cluster, 0, ImmutableList.Of(0, 3, 5)); FileSystem fs0 = null; try { // Make sure that when the active restarts, it loads all the edits. fs0 = FileSystem.Get(NameNode.GetUri(nn0.GetNameNodeAddress()), conf); NUnit.Framework.Assert.IsTrue(fs0.Exists(new Path(TestDir1))); NUnit.Framework.Assert.IsTrue(fs0.Exists(new Path(TestDir2))); NUnit.Framework.Assert.IsTrue(fs0.Exists(new Path(TestDir3))); } finally { if (fs0 != null) { fs0.Close(); } } }
/// <summary> /// Test that quotas are properly tracked by the standby through /// create, append, delete. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestQuotasTrackedOnStandby() { fs.Mkdirs(TestDir); DistributedFileSystem dfs = (DistributedFileSystem)fs; dfs.SetQuota(TestDir, NsQuota, DsQuota); long expectedSize = 3 * BlockSize + BlockSize / 2; DFSTestUtil.CreateFile(fs, TestFile, expectedSize, (short)1, 1L); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); ContentSummary cs = nn1.GetRpcServer().GetContentSummary(TestDirStr); NUnit.Framework.Assert.AreEqual(NsQuota, cs.GetQuota()); NUnit.Framework.Assert.AreEqual(DsQuota, cs.GetSpaceQuota()); NUnit.Framework.Assert.AreEqual(expectedSize, cs.GetSpaceConsumed()); NUnit.Framework.Assert.AreEqual(1, cs.GetDirectoryCount()); NUnit.Framework.Assert.AreEqual(1, cs.GetFileCount()); // Append to the file and make sure quota is updated correctly. FSDataOutputStream stm = fs.Append(TestFile); try { byte[] data = new byte[(int)(BlockSize * 3 / 2)]; stm.Write(data); expectedSize += data.Length; } finally { IOUtils.CloseStream(stm); } HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); cs = nn1.GetRpcServer().GetContentSummary(TestDirStr); NUnit.Framework.Assert.AreEqual(NsQuota, cs.GetQuota()); NUnit.Framework.Assert.AreEqual(DsQuota, cs.GetSpaceQuota()); NUnit.Framework.Assert.AreEqual(expectedSize, cs.GetSpaceConsumed()); NUnit.Framework.Assert.AreEqual(1, cs.GetDirectoryCount()); NUnit.Framework.Assert.AreEqual(1, cs.GetFileCount()); fs.Delete(TestFile, true); expectedSize = 0; HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); cs = nn1.GetRpcServer().GetContentSummary(TestDirStr); NUnit.Framework.Assert.AreEqual(NsQuota, cs.GetQuota()); NUnit.Framework.Assert.AreEqual(DsQuota, cs.GetSpaceQuota()); NUnit.Framework.Assert.AreEqual(expectedSize, cs.GetSpaceConsumed()); NUnit.Framework.Assert.AreEqual(1, cs.GetDirectoryCount()); NUnit.Framework.Assert.AreEqual(0, cs.GetFileCount()); }
/// <exception cref="System.Exception"/> private void DoTestWriteOverFailoverWithDnFail(TestPipelinesFailover.TestScenario scenario) { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(5).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a block and a half AppendTestUtil.Write(stm, 0, BlockAndAHalf); // Make sure all the blocks are written before failover stm.Hflush(); Log.Info("Failing over to NN 1"); scenario.Run(cluster); NUnit.Framework.Assert.IsTrue(fs.Exists(TestPath)); cluster.StopDataNode(0); // write another block and a half AppendTestUtil.Write(stm, BlockAndAHalf, BlockAndAHalf); stm.Hflush(); Log.Info("Failing back to NN 0"); cluster.TransitionToStandby(1); cluster.TransitionToActive(0); cluster.StopDataNode(1); AppendTestUtil.Write(stm, BlockAndAHalf * 2, BlockAndAHalf); stm.Hflush(); stm.Close(); stm = null; AppendTestUtil.Check(fs, TestPath, BlockAndAHalf * 3); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
public virtual void TestBlocksAddedWhileInSafeMode() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); DFSTestUtil.CreateFile(fs, new Path("/test"), 3 * BlockSize, (short)3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup. nn0.GetRpcServer().RollEditLog(); Banner("Restarting standby"); RestartStandby(); AssertSafeMode(nn1, 3, 3, 3, 0); // Create a few blocks which will send blockReceived calls to the // SBN. Banner("Creating some blocks while SBN is in safe mode"); DFSTestUtil.CreateFile(fs, new Path("/test2"), 5 * BlockSize, (short)3, 1L); Banner("Waiting for standby to catch up to active namespace"); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); AssertSafeMode(nn1, 8, 8, 3, 0); }
public virtual void TestTailer() { Configuration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); HAUtil.SetAllowStandbyReads(conf, true); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(0).Build(); cluster.WaitActive(); cluster.TransitionToActive(0); NameNode nn1 = cluster.GetNameNode(0); NameNode nn2 = cluster.GetNameNode(1); try { for (int i = 0; i < DirsToMake / 2; i++) { NameNodeAdapter.Mkdirs(nn1, GetDirPath(i), new PermissionStatus("test", "test", new FsPermission((short)0x1ed)), true); } HATestUtil.WaitForStandbyToCatchUp(nn1, nn2); for (int i_1 = 0; i_1 < DirsToMake / 2; i_1++) { NUnit.Framework.Assert.IsTrue(NameNodeAdapter.GetFileInfo(nn2, GetDirPath(i_1), false ).IsDir()); } for (int i_2 = DirsToMake / 2; i_2 < DirsToMake; i_2++) { NameNodeAdapter.Mkdirs(nn1, GetDirPath(i_2), new PermissionStatus("test", "test", new FsPermission((short)0x1ed)), true); } HATestUtil.WaitForStandbyToCatchUp(nn1, nn2); for (int i_3 = DirsToMake / 2; i_3 < DirsToMake; i_3++) { NUnit.Framework.Assert.IsTrue(NameNodeAdapter.GetFileInfo(nn2, GetDirPath(i_3), false ).IsDir()); } } finally { cluster.Shutdown(); } }
public virtual void TestBlocksAddedBeforeStandbyRestart() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); DFSTestUtil.CreateFile(fs, new Path("/test"), 3 * BlockSize, (short)3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup. nn0.GetRpcServer().RollEditLog(); Banner("Creating some blocks that won't be in the edit log"); DFSTestUtil.CreateFile(fs, new Path("/test2"), 5 * BlockSize, (short)3, 1L); Banner("Restarting standby"); RestartStandby(); // We expect it not to be stuck in safemode, since those blocks // that are already visible to the SBN should be processed // in the initial block reports. AssertSafeMode(nn1, 3, 3, 3, 0); Banner("Waiting for standby to catch up to active namespace"); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); AssertSafeMode(nn1, 8, 8, 3, 0); }