/// <summary> /// Ensure that the given NameNode marks the specified DataNode as /// entirely dead/expired. /// </summary> /// <param name="nn">the NameNode to manipulate</param> /// <param name="dnName">the name of the DataNode</param> public static void NoticeDeadDatanode(NameNode nn, string dnName) { FSNamesystem namesystem = nn.GetNamesystem(); namesystem.WriteLock(); try { DatanodeManager dnm = namesystem.GetBlockManager().GetDatanodeManager(); HeartbeatManager hbm = dnm.GetHeartbeatManager(); DatanodeDescriptor[] dnds = hbm.GetDatanodes(); DatanodeDescriptor theDND = null; foreach (DatanodeDescriptor dnd in dnds) { if (dnd.GetXferAddr().Equals(dnName)) { theDND = dnd; } } NUnit.Framework.Assert.IsNotNull("Could not find DN with name: " + dnName, theDND ); lock (hbm) { DFSTestUtil.SetDatanodeDead(theDND); hbm.HeartbeatCheck(); } } finally { namesystem.WriteUnlock(); } }
public virtual void TestIncludeExcludeLists() { BlockManager bm = Org.Mockito.Mockito.Mock <BlockManager>(); FSNamesystem fsn = Org.Mockito.Mockito.Mock <FSNamesystem>(); Configuration conf = new Configuration(); HostFileManager hm = new HostFileManager(); HostFileManager.HostSet includedNodes = new HostFileManager.HostSet(); HostFileManager.HostSet excludedNodes = new HostFileManager.HostSet(); includedNodes.Add(Entry("127.0.0.1:12345")); includedNodes.Add(Entry("localhost:12345")); includedNodes.Add(Entry("127.0.0.1:12345")); includedNodes.Add(Entry("127.0.0.2")); excludedNodes.Add(Entry("127.0.0.1:12346")); excludedNodes.Add(Entry("127.0.30.1:12346")); NUnit.Framework.Assert.AreEqual(2, includedNodes.Size()); NUnit.Framework.Assert.AreEqual(2, excludedNodes.Size()); hm.Refresh(includedNodes, excludedNodes); DatanodeManager dm = new DatanodeManager(bm, fsn, conf); Whitebox.SetInternalState(dm, "hostFileManager", hm); IDictionary <string, DatanodeDescriptor> dnMap = (IDictionary <string, DatanodeDescriptor >)Whitebox.GetInternalState(dm, "datanodeMap"); // After the de-duplication, there should be only one DN from the included // nodes declared as dead. NUnit.Framework.Assert.AreEqual(2, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType .All).Count); NUnit.Framework.Assert.AreEqual(2, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Dead).Count); dnMap["uuid-foo"] = new DatanodeDescriptor(new DatanodeID("127.0.0.1", "localhost" , "uuid-foo", 12345, 1020, 1021, 1022)); NUnit.Framework.Assert.AreEqual(1, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Dead).Count); dnMap["uuid-bar"] = new DatanodeDescriptor(new DatanodeID("127.0.0.2", "127.0.0.2" , "uuid-bar", 12345, 1020, 1021, 1022)); NUnit.Framework.Assert.AreEqual(0, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Dead).Count); DatanodeDescriptor spam = new DatanodeDescriptor(new DatanodeID("127.0.0" + ".3", "127.0.0.3", "uuid-spam", 12345, 1020, 1021, 1022)); DFSTestUtil.SetDatanodeDead(spam); includedNodes.Add(Entry("127.0.0.3:12345")); dnMap["uuid-spam"] = spam; NUnit.Framework.Assert.AreEqual(1, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Dead).Count); Sharpen.Collections.Remove(dnMap, "uuid-spam"); NUnit.Framework.Assert.AreEqual(1, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Dead).Count); excludedNodes.Add(Entry("127.0.0.3")); NUnit.Framework.Assert.AreEqual(0, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Dead).Count); }
public virtual void TestXceiverCount() { Configuration conf = new HdfsConfiguration(); // retry one time, if close fails conf.SetInt(DFSConfigKeys.DfsClientBlockWriteLocatefollowingblockRetriesKey, 1); MiniDFSCluster cluster = null; int nodes = 8; int fileCount = 5; short fileRepl = 3; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(nodes).Build(); cluster.WaitActive(); FSNamesystem namesystem = cluster.GetNamesystem(); DatanodeManager dnm = namesystem.GetBlockManager().GetDatanodeManager(); IList <DataNode> datanodes = cluster.GetDataNodes(); DistributedFileSystem fs = cluster.GetFileSystem(); // trigger heartbeats in case not already sent TriggerHeartbeats(datanodes); // check that all nodes are live and in service int expectedTotalLoad = nodes; // xceiver server adds 1 to load int expectedInServiceNodes = nodes; int expectedInServiceLoad = nodes; CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); // shutdown half the nodes and force a heartbeat check to ensure // counts are accurate for (int i = 0; i < nodes / 2; i++) { DataNode dn = datanodes[i]; DatanodeDescriptor dnd = dnm.GetDatanode(dn.GetDatanodeId()); dn.Shutdown(); DFSTestUtil.SetDatanodeDead(dnd); BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager()); //Verify decommission of dead node won't impact nodesInService metrics. dnm.GetDecomManager().StartDecommission(dnd); expectedInServiceNodes--; NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, namesystem.GetNumLiveDataNodes ()); NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem )); //Verify recommission of dead node won't impact nodesInService metrics. dnm.GetDecomManager().StopDecommission(dnd); NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem )); } // restart the nodes to verify that counts are correct after // node re-registration cluster.RestartDataNodes(); cluster.WaitActive(); datanodes = cluster.GetDataNodes(); expectedInServiceNodes = nodes; NUnit.Framework.Assert.AreEqual(nodes, datanodes.Count); CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); // create streams and hsync to force datastreamers to start DFSOutputStream[] streams = new DFSOutputStream[fileCount]; for (int i_1 = 0; i_1 < fileCount; i_1++) { streams[i_1] = (DFSOutputStream)fs.Create(new Path("/f" + i_1), fileRepl).GetWrappedStream (); streams[i_1].Write(Sharpen.Runtime.GetBytesForString("1")); streams[i_1].Hsync(); // the load for writers is 2 because both the write xceiver & packet // responder threads are counted in the load expectedTotalLoad += 2 * fileRepl; expectedInServiceLoad += 2 * fileRepl; } // force nodes to send load update TriggerHeartbeats(datanodes); CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); // decomm a few nodes, substract their load from the expected load, // trigger heartbeat to force load update for (int i_2 = 0; i_2 < fileRepl; i_2++) { expectedInServiceNodes--; DatanodeDescriptor dnd = dnm.GetDatanode(datanodes[i_2].GetDatanodeId()); expectedInServiceLoad -= dnd.GetXceiverCount(); dnm.GetDecomManager().StartDecommission(dnd); DataNodeTestUtils.TriggerHeartbeat(datanodes[i_2]); Sharpen.Thread.Sleep(100); CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); } // check expected load while closing each stream. recalc expected // load based on whether the nodes in the pipeline are decomm for (int i_3 = 0; i_3 < fileCount; i_3++) { int decomm = 0; foreach (DatanodeInfo dni in streams[i_3].GetPipeline()) { DatanodeDescriptor dnd = dnm.GetDatanode(dni); expectedTotalLoad -= 2; if (dnd.IsDecommissionInProgress() || dnd.IsDecommissioned()) { decomm++; } else { expectedInServiceLoad -= 2; } } try { streams[i_3].Close(); } catch (IOException ioe) { // nodes will go decommissioned even if there's a UC block whose // other locations are decommissioned too. we'll ignore that // bug for now if (decomm < fileRepl) { throw; } } TriggerHeartbeats(datanodes); // verify node count and loads CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); } // shutdown each node, verify node counts based on decomm state for (int i_4 = 0; i_4 < nodes; i_4++) { DataNode dn = datanodes[i_4]; dn.Shutdown(); // force it to appear dead so live count decreases DatanodeDescriptor dnDesc = dnm.GetDatanode(dn.GetDatanodeId()); DFSTestUtil.SetDatanodeDead(dnDesc); BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager()); NUnit.Framework.Assert.AreEqual(nodes - 1 - i_4, namesystem.GetNumLiveDataNodes() ); // first few nodes are already out of service if (i_4 >= fileRepl) { expectedInServiceNodes--; } NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem )); // live nodes always report load of 1. no nodes is load 0 double expectedXceiverAvg = (i_4 == nodes - 1) ? 0.0 : 1.0; NUnit.Framework.Assert.AreEqual((double)expectedXceiverAvg, GetInServiceXceiverAverage (namesystem), Epsilon); } // final sanity check CheckClusterHealth(0, namesystem, 0.0, 0, 0.0); } finally { if (cluster != null) { cluster.Shutdown(); } } }