Ejemplo n.º 1
0
        /// <summary>
        /// Regression test for HDFS-2795:
        /// - Start an HA cluster with a DN.
        /// </summary>
        /// <remarks>
        /// Regression test for HDFS-2795:
        /// - Start an HA cluster with a DN.
        /// - Write several blocks to the FS with replication 1.
        /// - Shutdown the DN
        /// - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
        /// - Restart the DN.
        /// In the bug, the standby node would only very slowly notice the blocks returning
        /// to the cluster.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestDatanodeRestarts()
        {
            Configuration conf = new Configuration();

            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, 1024);
            // We read from the standby to watch block locations
            HAUtil.SetAllowStandbyReads(conf, true);
            conf.SetLong(DFSConfigKeys.DfsNamenodeAccesstimePrecisionKey, 0);
            conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                 .SimpleHATopology()).NumDataNodes(1).Build();

            try
            {
                NameNode nn0 = cluster.GetNameNode(0);
                NameNode nn1 = cluster.GetNameNode(1);
                cluster.TransitionToActive(0);
                // Create 5 blocks.
                DFSTestUtil.CreateFile(cluster.GetFileSystem(0), TestFilePath, 5 * 1024, (short)1
                                       , 1L);
                HATestUtil.WaitForStandbyToCatchUp(nn0, nn1);
                // Stop the DN.
                DataNode dn     = cluster.GetDataNodes()[0];
                string   dnName = dn.GetDatanodeId().GetXferAddr();
                MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0);
                // Make sure both NNs register it as dead.
                BlockManagerTestUtil.NoticeDeadDatanode(nn0, dnName);
                BlockManagerTestUtil.NoticeDeadDatanode(nn1, dnName);
                BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager());
                BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager());
                NUnit.Framework.Assert.AreEqual(5, nn0.GetNamesystem().GetUnderReplicatedBlocks()
                                                );
                // The SBN will not have any blocks in its neededReplication queue
                // since the SBN doesn't process replication.
                NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks()
                                                );
                LocatedBlocks locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1);
                NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has no replicas"
                                                , 0, locs.Get(0).GetLocations().Length);
                cluster.RestartDataNode(dnProps);
                // Wait for both NNs to re-register the DN.
                cluster.WaitActive(0);
                cluster.WaitActive(1);
                BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager());
                BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager());
                NUnit.Framework.Assert.AreEqual(0, nn0.GetNamesystem().GetUnderReplicatedBlocks()
                                                );
                NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks()
                                                );
                locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1);
                NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has replicas again"
                                                , 1, locs.Get(0).GetLocations().Length);
            }
            finally
            {
                cluster.Shutdown();
            }
        }
        public virtual void TestNormalReplicaOffline()
        {
            // Stop the datanode hosting the NORMAL replica
            cluster.StopDataNode(normalDataNode.GetXferAddr());
            // Force NameNode to detect that the datanode is down
            BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), normalDataNode.GetXferAddr
                                                        ());
            // The live replica count should now be zero (since the NORMAL replica is offline)
            NumberReplicas numberReplicas = blockManager.CountNodes(block);

            Assert.AssertThat(numberReplicas.LiveReplicas(), CoreMatchers.Is(0));
            // The block should be reported as under-replicated
            BlockManagerTestUtil.UpdateState(blockManager);
            Assert.AssertThat(blockManager.GetUnderReplicatedBlocksCount(), CoreMatchers.Is(1L
                                                                                            ));
            // The BlockManager should be able to heal the replication count back to 1
            // by triggering an inter-datanode replication from one of the READ_ONLY_SHARED replicas
            BlockManagerTestUtil.ComputeAllPendingWork(blockManager);
            DFSTestUtil.WaitForReplication(cluster, extendedBlock, 1, 1, 0);
            // There should now be 2 *locations* for the block, and 1 *replica*
            Assert.AssertThat(GetLocatedBlock().GetLocations().Length, CoreMatchers.Is(2));
            ValidateNumberReplicas(1);
        }