public virtual void TestChooseTargetWithDecomNodes() { namenode.GetNamesystem().WriteLock(); try { string blockPoolId = namenode.GetNamesystem().GetBlockPoolId(); dnManager.HandleHeartbeat(dnrList[3], BlockManagerTestUtil.GetStorageReportsForDatanode (dataNodes[3]), blockPoolId, dataNodes[3].GetCacheCapacity(), dataNodes[3].GetCacheRemaining (), 2, 0, 0, null); dnManager.HandleHeartbeat(dnrList[4], BlockManagerTestUtil.GetStorageReportsForDatanode (dataNodes[4]), blockPoolId, dataNodes[4].GetCacheCapacity(), dataNodes[4].GetCacheRemaining (), 4, 0, 0, null); dnManager.HandleHeartbeat(dnrList[5], BlockManagerTestUtil.GetStorageReportsForDatanode (dataNodes[5]), blockPoolId, dataNodes[5].GetCacheCapacity(), dataNodes[5].GetCacheRemaining (), 4, 0, 0, null); // value in the above heartbeats int load = 2 + 4 + 4; FSNamesystem fsn = namenode.GetNamesystem(); NUnit.Framework.Assert.AreEqual((double)load / 6, dnManager.GetFSClusterStats().GetInServiceXceiverAverage (), Epsilon); // Decommission DNs so BlockPlacementPolicyDefault.isGoodTarget() // returns false for (int i = 0; i < 3; i++) { DatanodeDescriptor d = dnManager.GetDatanode(dnrList[i]); dnManager.GetDecomManager().StartDecommission(d); d.SetDecommissioned(); } NUnit.Framework.Assert.AreEqual((double)load / 3, dnManager.GetFSClusterStats().GetInServiceXceiverAverage (), Epsilon); // update references of writer DN to update the de-commissioned state IList <DatanodeDescriptor> liveNodes = new AList <DatanodeDescriptor>(); dnManager.FetchDatanodes(liveNodes, null, false); DatanodeDescriptor writerDn = null; if (liveNodes.Contains(dataNodes[0])) { writerDn = liveNodes[liveNodes.IndexOf(dataNodes[0])]; } // Call chooseTarget() DatanodeStorageInfo[] targets = namenode.GetNamesystem().GetBlockManager().GetBlockPlacementPolicy ().ChooseTarget("testFile.txt", 3, writerDn, new AList <DatanodeStorageInfo>(), false , null, 1024, TestBlockStoragePolicy.DefaultStoragePolicy); NUnit.Framework.Assert.AreEqual(3, targets.Length); ICollection <DatanodeStorageInfo> targetSet = new HashSet <DatanodeStorageInfo>(Arrays .AsList(targets)); for (int i_1 = 3; i_1 < storages.Length; i_1++) { NUnit.Framework.Assert.IsTrue(targetSet.Contains(storages[i_1])); } } finally { dataNodes[0].StopDecommission(); dataNodes[1].StopDecommission(); dataNodes[2].StopDecommission(); namenode.GetNamesystem().WriteUnlock(); } }
/// <summary>Have DatanodeManager check decommission state.</summary> /// <param name="dm">the DatanodeManager to manipulate</param> /// <exception cref="Sharpen.ExecutionException"/> /// <exception cref="System.Exception"/> public static void RecheckDecommissionState(DatanodeManager dm) { dm.GetDecomManager().RunMonitor(); }
public virtual void TestXceiverCount() { Configuration conf = new HdfsConfiguration(); // retry one time, if close fails conf.SetInt(DFSConfigKeys.DfsClientBlockWriteLocatefollowingblockRetriesKey, 1); MiniDFSCluster cluster = null; int nodes = 8; int fileCount = 5; short fileRepl = 3; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(nodes).Build(); cluster.WaitActive(); FSNamesystem namesystem = cluster.GetNamesystem(); DatanodeManager dnm = namesystem.GetBlockManager().GetDatanodeManager(); IList <DataNode> datanodes = cluster.GetDataNodes(); DistributedFileSystem fs = cluster.GetFileSystem(); // trigger heartbeats in case not already sent TriggerHeartbeats(datanodes); // check that all nodes are live and in service int expectedTotalLoad = nodes; // xceiver server adds 1 to load int expectedInServiceNodes = nodes; int expectedInServiceLoad = nodes; CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); // shutdown half the nodes and force a heartbeat check to ensure // counts are accurate for (int i = 0; i < nodes / 2; i++) { DataNode dn = datanodes[i]; DatanodeDescriptor dnd = dnm.GetDatanode(dn.GetDatanodeId()); dn.Shutdown(); DFSTestUtil.SetDatanodeDead(dnd); BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager()); //Verify decommission of dead node won't impact nodesInService metrics. dnm.GetDecomManager().StartDecommission(dnd); expectedInServiceNodes--; NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, namesystem.GetNumLiveDataNodes ()); NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem )); //Verify recommission of dead node won't impact nodesInService metrics. dnm.GetDecomManager().StopDecommission(dnd); NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem )); } // restart the nodes to verify that counts are correct after // node re-registration cluster.RestartDataNodes(); cluster.WaitActive(); datanodes = cluster.GetDataNodes(); expectedInServiceNodes = nodes; NUnit.Framework.Assert.AreEqual(nodes, datanodes.Count); CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); // create streams and hsync to force datastreamers to start DFSOutputStream[] streams = new DFSOutputStream[fileCount]; for (int i_1 = 0; i_1 < fileCount; i_1++) { streams[i_1] = (DFSOutputStream)fs.Create(new Path("/f" + i_1), fileRepl).GetWrappedStream (); streams[i_1].Write(Sharpen.Runtime.GetBytesForString("1")); streams[i_1].Hsync(); // the load for writers is 2 because both the write xceiver & packet // responder threads are counted in the load expectedTotalLoad += 2 * fileRepl; expectedInServiceLoad += 2 * fileRepl; } // force nodes to send load update TriggerHeartbeats(datanodes); CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); // decomm a few nodes, substract their load from the expected load, // trigger heartbeat to force load update for (int i_2 = 0; i_2 < fileRepl; i_2++) { expectedInServiceNodes--; DatanodeDescriptor dnd = dnm.GetDatanode(datanodes[i_2].GetDatanodeId()); expectedInServiceLoad -= dnd.GetXceiverCount(); dnm.GetDecomManager().StartDecommission(dnd); DataNodeTestUtils.TriggerHeartbeat(datanodes[i_2]); Sharpen.Thread.Sleep(100); CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); } // check expected load while closing each stream. recalc expected // load based on whether the nodes in the pipeline are decomm for (int i_3 = 0; i_3 < fileCount; i_3++) { int decomm = 0; foreach (DatanodeInfo dni in streams[i_3].GetPipeline()) { DatanodeDescriptor dnd = dnm.GetDatanode(dni); expectedTotalLoad -= 2; if (dnd.IsDecommissionInProgress() || dnd.IsDecommissioned()) { decomm++; } else { expectedInServiceLoad -= 2; } } try { streams[i_3].Close(); } catch (IOException ioe) { // nodes will go decommissioned even if there's a UC block whose // other locations are decommissioned too. we'll ignore that // bug for now if (decomm < fileRepl) { throw; } } TriggerHeartbeats(datanodes); // verify node count and loads CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); } // shutdown each node, verify node counts based on decomm state for (int i_4 = 0; i_4 < nodes; i_4++) { DataNode dn = datanodes[i_4]; dn.Shutdown(); // force it to appear dead so live count decreases DatanodeDescriptor dnDesc = dnm.GetDatanode(dn.GetDatanodeId()); DFSTestUtil.SetDatanodeDead(dnDesc); BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager()); NUnit.Framework.Assert.AreEqual(nodes - 1 - i_4, namesystem.GetNumLiveDataNodes() ); // first few nodes are already out of service if (i_4 >= fileRepl) { expectedInServiceNodes--; } NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem )); // live nodes always report load of 1. no nodes is load 0 double expectedXceiverAvg = (i_4 == nodes - 1) ? 0.0 : 1.0; NUnit.Framework.Assert.AreEqual((double)expectedXceiverAvg, GetInServiceXceiverAverage (namesystem), Epsilon); } // final sanity check CheckClusterHealth(0, namesystem, 0.0, 0, 0.0); } finally { if (cluster != null) { cluster.Shutdown(); } } }