public virtual void TestStaleNodes() { // Set two datanodes as stale for (int i = 0; i < 2; i++) { DataNode dn = cluster.GetDataNodes()[i]; DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true); long staleInterval = Conf.GetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey , DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalDefault); DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager( ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, -(staleInterval + 1)); } // Let HeartbeatManager to check heartbeat BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager ()); MetricsAsserts.AssertGauge("StaleDataNodes", 2, MetricsAsserts.GetMetrics(NsMetrics )); // Reset stale datanodes for (int i_1 = 0; i_1 < 2; i_1++) { DataNode dn = cluster.GetDataNodes()[i_1]; DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, false); DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager( ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, 0); } // Let HeartbeatManager to refresh BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager ()); MetricsAsserts.AssertGauge("StaleDataNodes", 0, MetricsAsserts.GetMetrics(NsMetrics )); }
public virtual void TestInitializeBlockRecovery() { DatanodeStorageInfo s1 = DFSTestUtil.CreateDatanodeStorageInfo("10.10.1.1", "s1"); DatanodeDescriptor dd1 = s1.GetDatanodeDescriptor(); DatanodeStorageInfo s2 = DFSTestUtil.CreateDatanodeStorageInfo("10.10.1.2", "s2"); DatanodeDescriptor dd2 = s2.GetDatanodeDescriptor(); DatanodeStorageInfo s3 = DFSTestUtil.CreateDatanodeStorageInfo("10.10.1.3", "s3"); DatanodeDescriptor dd3 = s3.GetDatanodeDescriptor(); dd1.isAlive = dd2.isAlive = dd3.isAlive = true; BlockInfoContiguousUnderConstruction blockInfo = new BlockInfoContiguousUnderConstruction (new Block(0, 0, GenerationStamp.LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState .UnderConstruction, new DatanodeStorageInfo[] { s1, s2, s3 }); // Recovery attempt #1. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -3 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -1 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -2 * 1000); blockInfo.InitializeBlockRecovery(1); BlockInfoContiguousUnderConstruction[] blockInfoRecovery = dd2.GetLeaseRecoveryCommand (1); NUnit.Framework.Assert.AreEqual(blockInfoRecovery[0], blockInfo); // Recovery attempt #2. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -2 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -1 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -3 * 1000); blockInfo.InitializeBlockRecovery(2); blockInfoRecovery = dd1.GetLeaseRecoveryCommand(1); NUnit.Framework.Assert.AreEqual(blockInfoRecovery[0], blockInfo); // Recovery attempt #3. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -2 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -1 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -3 * 1000); blockInfo.InitializeBlockRecovery(3); blockInfoRecovery = dd3.GetLeaseRecoveryCommand(1); NUnit.Framework.Assert.AreEqual(blockInfoRecovery[0], blockInfo); // Recovery attempt #4. // Reset everything. And again pick DN with most recent heart beat. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -2 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -1 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0); blockInfo.InitializeBlockRecovery(3); blockInfoRecovery = dd3.GetLeaseRecoveryCommand(1); NUnit.Framework.Assert.AreEqual(blockInfoRecovery[0], blockInfo); }
public virtual void TestHeartbeatBlockRecovery() { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); try { cluster.WaitActive(); FSNamesystem namesystem = cluster.GetNamesystem(); HeartbeatManager hm = namesystem.GetBlockManager().GetDatanodeManager().GetHeartbeatManager (); string poolId = namesystem.GetBlockPoolId(); DatanodeRegistration nodeReg1 = DataNodeTestUtils.GetDNRegistrationForBP(cluster. GetDataNodes()[0], poolId); DatanodeDescriptor dd1 = NameNodeAdapter.GetDatanode(namesystem, nodeReg1); dd1.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid())); DatanodeRegistration nodeReg2 = DataNodeTestUtils.GetDNRegistrationForBP(cluster. GetDataNodes()[1], poolId); DatanodeDescriptor dd2 = NameNodeAdapter.GetDatanode(namesystem, nodeReg2); dd2.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid())); DatanodeRegistration nodeReg3 = DataNodeTestUtils.GetDNRegistrationForBP(cluster. GetDataNodes()[2], poolId); DatanodeDescriptor dd3 = NameNodeAdapter.GetDatanode(namesystem, nodeReg3); dd3.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid())); try { namesystem.WriteLock(); lock (hm) { NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem); NameNodeAdapter.SendHeartBeat(nodeReg2, dd2, namesystem); NameNodeAdapter.SendHeartBeat(nodeReg3, dd3, namesystem); // Test with all alive nodes. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, 0); DFSTestUtil.ResetLastUpdatesWithOffset(dd2, 0); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0); DatanodeStorageInfo[] storages = new DatanodeStorageInfo[] { dd1.GetStorageInfos( )[0], dd2.GetStorageInfos()[0], dd3.GetStorageInfos()[0] }; BlockInfoContiguousUnderConstruction blockInfo = new BlockInfoContiguousUnderConstruction (new Block(0, 0, GenerationStamp.LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState .UnderRecovery, storages); dd1.AddBlockToBeRecovered(blockInfo); DatanodeCommand[] cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem) .GetCommands(); NUnit.Framework.Assert.AreEqual(1, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction ()); BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand)cmds[0]; NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count); DatanodeInfo[] recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations(); NUnit.Framework.Assert.AreEqual(3, recoveringNodes.Length); NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1); NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd2); NUnit.Framework.Assert.AreEqual(recoveringNodes[2], dd3); // Test with one stale node. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, 0); // More than the default stale interval of 30 seconds. DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -40 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0); blockInfo = new BlockInfoContiguousUnderConstruction(new Block(0, 0, GenerationStamp .LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState.UnderRecovery, storages ); dd1.AddBlockToBeRecovered(blockInfo); cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem).GetCommands(); NUnit.Framework.Assert.AreEqual(1, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction ()); recoveryCommand = (BlockRecoveryCommand)cmds[0]; NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count); recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations(); NUnit.Framework.Assert.AreEqual(2, recoveringNodes.Length); // dd2 is skipped. NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1); NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd3); // Test with all stale node. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -60 * 1000); // More than the default stale interval of 30 seconds. DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -40 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -80 * 1000); blockInfo = new BlockInfoContiguousUnderConstruction(new Block(0, 0, GenerationStamp .LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState.UnderRecovery, storages ); dd1.AddBlockToBeRecovered(blockInfo); cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem).GetCommands(); NUnit.Framework.Assert.AreEqual(1, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction ()); recoveryCommand = (BlockRecoveryCommand)cmds[0]; NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count); recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations(); // Only dd1 is included since it heart beated and hence its not stale // when the list of recovery blocks is constructed. NUnit.Framework.Assert.AreEqual(3, recoveringNodes.Length); NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1); NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd2); NUnit.Framework.Assert.AreEqual(recoveringNodes[2], dd3); } } finally { namesystem.WriteUnlock(); } } finally { cluster.Shutdown(); } }
public virtual void TestReadSelectNonStaleDatanode() { HdfsConfiguration conf = new HdfsConfiguration(); conf.SetBoolean(DFSConfigKeys.DfsNamenodeAvoidStaleDatanodeForReadKey, true); long staleInterval = 30 * 1000 * 60; conf.SetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey, staleInterval); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes ).Racks(racks).Build(); cluster.WaitActive(); IPEndPoint addr = new IPEndPoint("localhost", cluster.GetNameNodePort()); DFSClient client = new DFSClient(addr, conf); IList <DatanodeDescriptor> nodeInfoList = cluster.GetNameNode().GetNamesystem().GetBlockManager ().GetDatanodeManager().GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Live); NUnit.Framework.Assert.AreEqual("Unexpected number of datanodes", numDatanodes, nodeInfoList .Count); FileSystem fileSys = cluster.GetFileSystem(); FSDataOutputStream stm = null; try { // do the writing but do not close the FSDataOutputStream // in order to mimic the ongoing writing Path fileName = new Path("/file1"); stm = fileSys.Create(fileName, true, fileSys.GetConf().GetInt(CommonConfigurationKeys .IoFileBufferSizeKey, 4096), (short)3, blockSize); stm.Write(new byte[(blockSize * 3) / 2]); // We do not close the stream so that // the writing seems to be still ongoing stm.Hflush(); LocatedBlocks blocks = client.GetNamenode().GetBlockLocations(fileName.ToString() , 0, blockSize); DatanodeInfo[] nodes = blocks.Get(0).GetLocations(); NUnit.Framework.Assert.AreEqual(nodes.Length, 3); DataNode staleNode = null; DatanodeDescriptor staleNodeInfo = null; // stop the heartbeat of the first node staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName()); NUnit.Framework.Assert.IsNotNull(staleNode); // set the first node as stale staleNodeInfo = cluster.GetNameNode().GetNamesystem().GetBlockManager().GetDatanodeManager ().GetDatanode(staleNode.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, -(staleInterval + 1)); LocatedBlocks blocksAfterStale = client.GetNamenode().GetBlockLocations(fileName. ToString(), 0, blockSize); DatanodeInfo[] nodesAfterStale = blocksAfterStale.Get(0).GetLocations(); NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3); NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName ()); // restart the staleNode's heartbeat DataNodeTestUtils.SetHeartbeatsDisabledForTests(staleNode, false); // reset the first node as non-stale, so as to avoid two stale nodes DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, 0); LocatedBlock lastBlock = client.GetLocatedBlocks(fileName.ToString(), 0, long.MaxValue ).GetLastLocatedBlock(); nodes = lastBlock.GetLocations(); NUnit.Framework.Assert.AreEqual(nodes.Length, 3); // stop the heartbeat of the first node for the last block staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName()); NUnit.Framework.Assert.IsNotNull(staleNode); // set the node as stale DatanodeDescriptor dnDesc = cluster.GetNameNode().GetNamesystem().GetBlockManager ().GetDatanodeManager().GetDatanode(staleNode.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDesc, -(staleInterval + 1)); LocatedBlock lastBlockAfterStale = client.GetLocatedBlocks(fileName.ToString(), 0 , long.MaxValue).GetLastLocatedBlock(); nodesAfterStale = lastBlockAfterStale.GetLocations(); NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3); NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName ()); } finally { if (stm != null) { stm.Close(); } client.Close(); cluster.Shutdown(); } }