/// <summary> /// Ensure that the given NameNode marks the specified DataNode as /// entirely dead/expired. /// </summary> /// <param name="nn">the NameNode to manipulate</param> /// <param name="dnName">the name of the DataNode</param> public static void NoticeDeadDatanode(NameNode nn, string dnName) { FSNamesystem namesystem = nn.GetNamesystem(); namesystem.WriteLock(); try { DatanodeManager dnm = namesystem.GetBlockManager().GetDatanodeManager(); HeartbeatManager hbm = dnm.GetHeartbeatManager(); DatanodeDescriptor[] dnds = hbm.GetDatanodes(); DatanodeDescriptor theDND = null; foreach (DatanodeDescriptor dnd in dnds) { if (dnd.GetXferAddr().Equals(dnName)) { theDND = dnd; } } NUnit.Framework.Assert.IsNotNull("Could not find DN with name: " + dnName, theDND ); lock (hbm) { DFSTestUtil.SetDatanodeDead(theDND); hbm.HeartbeatCheck(); } } finally { namesystem.WriteUnlock(); } }
public virtual void TestProcesOverReplicateBlock() { Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsDatanodeScanPeriodHoursKey, 100L); conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000L); conf.Set(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, Sharpen.Extensions.ToString (2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); FileSystem fs = cluster.GetFileSystem(); try { Path fileName = new Path("/foo1"); DFSTestUtil.CreateFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.WaitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); NUnit.Framework.Assert.IsTrue(cluster.CorruptReplica(0, block)); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // remove block scanner log to trigger block scanning FilePath scanCursor = new FilePath(new FilePath(MiniDFSCluster.GetFinalizedDir(cluster .GetInstanceStorageDir(0, 0), cluster.GetNamesystem().GetBlockPoolId()).GetParent ()).GetParent(), "scanner.cursor"); //wait for one minute for deletion to succeed; for (int i = 0; !scanCursor.Delete(); i++) { NUnit.Framework.Assert.IsTrue("Could not delete " + scanCursor.GetAbsolutePath() + " in one minute", i < 60); try { Sharpen.Thread.Sleep(1000); } catch (Exception) { } } // restart the datanode so the corrupt replica will be detected cluster.RestartDataNode(dnProps); DFSTestUtil.WaitReplication(fs, fileName, (short)2); string blockPoolId = cluster.GetNamesystem().GetBlockPoolId(); DatanodeID corruptDataNode = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes ()[2], blockPoolId); FSNamesystem namesystem = cluster.GetNamesystem(); BlockManager bm = namesystem.GetBlockManager(); HeartbeatManager hm = bm.GetDatanodeManager().GetHeartbeatManager(); try { namesystem.WriteLock(); lock (hm) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs string corruptMachineName = corruptDataNode.GetXferAddr(); foreach (DatanodeDescriptor datanode in hm.GetDatanodes()) { if (!corruptMachineName.Equals(datanode.GetXferAddr())) { datanode.GetStorageInfos()[0].SetUtilizationForTesting(100L, 100L, 0, 100L); datanode.UpdateHeartbeat(BlockManagerTestUtil.GetStorageReportsForDatanode(datanode ), 0L, 0L, 0, 0, null); } } // decrease the replication factor to 1; NameNodeAdapter.SetReplication(namesystem, fileName.ToString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost NUnit.Framework.Assert.AreEqual(1, bm.CountNodes(block.GetLocalBlock()).LiveReplicas ()); } } finally { namesystem.WriteUnlock(); } } finally { cluster.Shutdown(); } }
public virtual void TestNodeCount() { // start a mini dfs cluster of 2 nodes Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(ReplicationFactor ).Build(); try { FSNamesystem namesystem = cluster.GetNamesystem(); BlockManager bm = namesystem.GetBlockManager(); HeartbeatManager hm = bm.GetDatanodeManager().GetHeartbeatManager(); FileSystem fs = cluster.GetFileSystem(); // populate the cluster with a one block file Path FilePath = new Path("/testfile"); DFSTestUtil.CreateFile(fs, FilePath, 1L, ReplicationFactor, 1L); DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, FilePath); // keep a copy of all datanode descriptor DatanodeDescriptor[] datanodes = hm.GetDatanodes(); // start two new nodes cluster.StartDataNodes(conf, 2, true, null, null); cluster.WaitActive(); // bring down first datanode DatanodeDescriptor datanode = datanodes[0]; MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(datanode.GetXferAddr ()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), datanode.GetXferAddr ()); // the block will be replicated DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor); // restart the first datanode cluster.RestartDataNode(dnprop); cluster.WaitActive(); // check if excessive replica is detected (transient) InitializeTimeout(Timeout); while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() == 0) { CheckTimeout("excess replicas not detected"); } // find out a non-excess node DatanodeDescriptor nonExcessDN = null; foreach (DatanodeStorageInfo storage in bm.blocksMap.GetStorages(block.GetLocalBlock ())) { DatanodeDescriptor dn = storage.GetDatanodeDescriptor(); ICollection <Block> blocks = bm.excessReplicateMap[dn.GetDatanodeUuid()]; if (blocks == null || !blocks.Contains(block.GetLocalBlock())) { nonExcessDN = dn; break; } } NUnit.Framework.Assert.IsTrue(nonExcessDN != null); // bring down non excessive datanode dnprop = cluster.StopDataNode(nonExcessDN.GetXferAddr()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), nonExcessDN.GetXferAddr ()); // The block should be replicated InitializeTimeout(Timeout); while (CountNodes(block.GetLocalBlock(), namesystem).LiveReplicas() != ReplicationFactor ) { CheckTimeout("live replica count not correct", 1000); } // restart the first datanode cluster.RestartDataNode(dnprop); cluster.WaitActive(); // check if excessive replica is detected (transient) InitializeTimeout(Timeout); while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() != 2) { CheckTimeout("excess replica count not equal to 2"); } } finally { cluster.Shutdown(); } }