/// <summary> /// Regression test for HDFS-2795: /// - Start an HA cluster with a DN. /// </summary> /// <remarks> /// Regression test for HDFS-2795: /// - Start an HA cluster with a DN. /// - Write several blocks to the FS with replication 1. /// - Shutdown the DN /// - Wait for the NNs to declare the DN dead. All blocks will be under-replicated. /// - Restart the DN. /// In the bug, the standby node would only very slowly notice the blocks returning /// to the cluster. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDatanodeRestarts() { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, 1024); // We read from the standby to watch block locations HAUtil.SetAllowStandbyReads(conf, true); conf.SetLong(DFSConfigKeys.DfsNamenodeAccesstimePrecisionKey, 0); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(1).Build(); try { NameNode nn0 = cluster.GetNameNode(0); NameNode nn1 = cluster.GetNameNode(1); cluster.TransitionToActive(0); // Create 5 blocks. DFSTestUtil.CreateFile(cluster.GetFileSystem(0), TestFilePath, 5 * 1024, (short)1 , 1L); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); // Stop the DN. DataNode dn = cluster.GetDataNodes()[0]; string dnName = dn.GetDatanodeId().GetXferAddr(); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // Make sure both NNs register it as dead. BlockManagerTestUtil.NoticeDeadDatanode(nn0, dnName); BlockManagerTestUtil.NoticeDeadDatanode(nn1, dnName); BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(5, nn0.GetNamesystem().GetUnderReplicatedBlocks() ); // The SBN will not have any blocks in its neededReplication queue // since the SBN doesn't process replication. NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks() ); LocatedBlocks locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1); NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has no replicas" , 0, locs.Get(0).GetLocations().Length); cluster.RestartDataNode(dnProps); // Wait for both NNs to re-register the DN. cluster.WaitActive(0); cluster.WaitActive(1); BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, nn0.GetNamesystem().GetUnderReplicatedBlocks() ); NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks() ); locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1); NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has replicas again" , 1, locs.Get(0).GetLocations().Length); } finally { cluster.Shutdown(); } }
public virtual void TestNormalReplicaOffline() { // Stop the datanode hosting the NORMAL replica cluster.StopDataNode(normalDataNode.GetXferAddr()); // Force NameNode to detect that the datanode is down BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), normalDataNode.GetXferAddr ()); // The live replica count should now be zero (since the NORMAL replica is offline) NumberReplicas numberReplicas = blockManager.CountNodes(block); Assert.AssertThat(numberReplicas.LiveReplicas(), CoreMatchers.Is(0)); // The block should be reported as under-replicated BlockManagerTestUtil.UpdateState(blockManager); Assert.AssertThat(blockManager.GetUnderReplicatedBlocksCount(), CoreMatchers.Is(1L )); // The BlockManager should be able to heal the replication count back to 1 // by triggering an inter-datanode replication from one of the READ_ONLY_SHARED replicas BlockManagerTestUtil.ComputeAllPendingWork(blockManager); DFSTestUtil.WaitForReplication(cluster, extendedBlock, 1, 1, 0); // There should now be 2 *locations* for the block, and 1 *replica* Assert.AssertThat(GetLocatedBlock().GetLocations().Length, CoreMatchers.Is(2)); ValidateNumberReplicas(1); }