/// <summary> /// Ensure that the given NameNode marks the specified DataNode as /// entirely dead/expired. /// </summary> /// <param name="nn">the NameNode to manipulate</param> /// <param name="dnName">the name of the DataNode</param> public static void NoticeDeadDatanode(NameNode nn, string dnName) { FSNamesystem namesystem = nn.GetNamesystem(); namesystem.WriteLock(); try { DatanodeManager dnm = namesystem.GetBlockManager().GetDatanodeManager(); HeartbeatManager hbm = dnm.GetHeartbeatManager(); DatanodeDescriptor[] dnds = hbm.GetDatanodes(); DatanodeDescriptor theDND = null; foreach (DatanodeDescriptor dnd in dnds) { if (dnd.GetXferAddr().Equals(dnName)) { theDND = dnd; } } NUnit.Framework.Assert.IsNotNull("Could not find DN with name: " + dnName, theDND ); lock (hbm) { DFSTestUtil.SetDatanodeDead(theDND); hbm.HeartbeatCheck(); } } finally { namesystem.WriteUnlock(); } }
internal DecommissionManager(Namesystem namesystem, BlockManager blockManager, HeartbeatManager hbManager) { this.namesystem = namesystem; this.blockManager = blockManager; this.hbManager = hbManager; executor = Executors.NewScheduledThreadPool(1, new ThreadFactoryBuilder().SetNameFormat ("DecommissionMonitor-%d").SetDaemon(true).Build()); decomNodeBlocks = new SortedDictionary <DatanodeDescriptor, AbstractList <BlockInfoContiguous > >(); pendingNodes = new List <DatanodeDescriptor>(); }
public virtual void TestProcesOverReplicateBlock() { Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsDatanodeScanPeriodHoursKey, 100L); conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000L); conf.Set(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, Sharpen.Extensions.ToString (2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); FileSystem fs = cluster.GetFileSystem(); try { Path fileName = new Path("/foo1"); DFSTestUtil.CreateFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.WaitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); NUnit.Framework.Assert.IsTrue(cluster.CorruptReplica(0, block)); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // remove block scanner log to trigger block scanning FilePath scanCursor = new FilePath(new FilePath(MiniDFSCluster.GetFinalizedDir(cluster .GetInstanceStorageDir(0, 0), cluster.GetNamesystem().GetBlockPoolId()).GetParent ()).GetParent(), "scanner.cursor"); //wait for one minute for deletion to succeed; for (int i = 0; !scanCursor.Delete(); i++) { NUnit.Framework.Assert.IsTrue("Could not delete " + scanCursor.GetAbsolutePath() + " in one minute", i < 60); try { Sharpen.Thread.Sleep(1000); } catch (Exception) { } } // restart the datanode so the corrupt replica will be detected cluster.RestartDataNode(dnProps); DFSTestUtil.WaitReplication(fs, fileName, (short)2); string blockPoolId = cluster.GetNamesystem().GetBlockPoolId(); DatanodeID corruptDataNode = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes ()[2], blockPoolId); FSNamesystem namesystem = cluster.GetNamesystem(); BlockManager bm = namesystem.GetBlockManager(); HeartbeatManager hm = bm.GetDatanodeManager().GetHeartbeatManager(); try { namesystem.WriteLock(); lock (hm) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs string corruptMachineName = corruptDataNode.GetXferAddr(); foreach (DatanodeDescriptor datanode in hm.GetDatanodes()) { if (!corruptMachineName.Equals(datanode.GetXferAddr())) { datanode.GetStorageInfos()[0].SetUtilizationForTesting(100L, 100L, 0, 100L); datanode.UpdateHeartbeat(BlockManagerTestUtil.GetStorageReportsForDatanode(datanode ), 0L, 0L, 0, 0, null); } } // decrease the replication factor to 1; NameNodeAdapter.SetReplication(namesystem, fileName.ToString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost NUnit.Framework.Assert.AreEqual(1, bm.CountNodes(block.GetLocalBlock()).LiveReplicas ()); } } finally { namesystem.WriteUnlock(); } } finally { cluster.Shutdown(); } }
public virtual void TestHeartbeat() { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).Build(); try { cluster.WaitActive(); FSNamesystem namesystem = cluster.GetNamesystem(); HeartbeatManager hm = namesystem.GetBlockManager().GetDatanodeManager().GetHeartbeatManager (); string poolId = namesystem.GetBlockPoolId(); DatanodeRegistration nodeReg = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes ()[0], poolId); DatanodeDescriptor dd = NameNodeAdapter.GetDatanode(namesystem, nodeReg); string storageID = DatanodeStorage.GenerateUuid(); dd.UpdateStorage(new DatanodeStorage(storageID)); int RemainingBlocks = 1; int MaxReplicateLimit = conf.GetInt(DFSConfigKeys.DfsNamenodeReplicationMaxStreamsKey , 2); int MaxInvalidateLimit = DFSConfigKeys.DfsBlockInvalidateLimitDefault; int MaxInvalidateBlocks = 2 * MaxInvalidateLimit + RemainingBlocks; int MaxReplicateBlocks = 2 * MaxReplicateLimit + RemainingBlocks; DatanodeStorageInfo[] OneTarget = new DatanodeStorageInfo[] { dd.GetStorageInfo(storageID ) }; try { namesystem.WriteLock(); lock (hm) { for (int i = 0; i < MaxReplicateBlocks; i++) { dd.AddBlockToBeReplicated(new Block(i, 0, GenerationStamp.LastReservedStamp), OneTarget ); } DatanodeCommand[] cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands (); NUnit.Framework.Assert.AreEqual(1, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaTransfer, cmds[0].GetAction() ); NUnit.Framework.Assert.AreEqual(MaxReplicateLimit, ((BlockCommand)cmds[0]).GetBlocks ().Length); AList <Block> blockList = new AList <Block>(MaxInvalidateBlocks); for (int i_1 = 0; i_1 < MaxInvalidateBlocks; i_1++) { blockList.AddItem(new Block(i_1, 0, GenerationStamp.LastReservedStamp)); } dd.AddBlocksToBeInvalidated(blockList); cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands(); NUnit.Framework.Assert.AreEqual(2, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaTransfer, cmds[0].GetAction() ); NUnit.Framework.Assert.AreEqual(MaxReplicateLimit, ((BlockCommand)cmds[0]).GetBlocks ().Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaInvalidate, cmds[1].GetAction ()); NUnit.Framework.Assert.AreEqual(MaxInvalidateLimit, ((BlockCommand)cmds[1]).GetBlocks ().Length); cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands(); NUnit.Framework.Assert.AreEqual(2, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaTransfer, cmds[0].GetAction() ); NUnit.Framework.Assert.AreEqual(RemainingBlocks, ((BlockCommand)cmds[0]).GetBlocks ().Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaInvalidate, cmds[1].GetAction ()); NUnit.Framework.Assert.AreEqual(MaxInvalidateLimit, ((BlockCommand)cmds[1]).GetBlocks ().Length); cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands(); NUnit.Framework.Assert.AreEqual(1, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaInvalidate, cmds[0].GetAction ()); NUnit.Framework.Assert.AreEqual(RemainingBlocks, ((BlockCommand)cmds[0]).GetBlocks ().Length); cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands(); NUnit.Framework.Assert.AreEqual(0, cmds.Length); } } finally { namesystem.WriteUnlock(); } } finally { cluster.Shutdown(); } }
public virtual void TestHeartbeatBlockRecovery() { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); try { cluster.WaitActive(); FSNamesystem namesystem = cluster.GetNamesystem(); HeartbeatManager hm = namesystem.GetBlockManager().GetDatanodeManager().GetHeartbeatManager (); string poolId = namesystem.GetBlockPoolId(); DatanodeRegistration nodeReg1 = DataNodeTestUtils.GetDNRegistrationForBP(cluster. GetDataNodes()[0], poolId); DatanodeDescriptor dd1 = NameNodeAdapter.GetDatanode(namesystem, nodeReg1); dd1.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid())); DatanodeRegistration nodeReg2 = DataNodeTestUtils.GetDNRegistrationForBP(cluster. GetDataNodes()[1], poolId); DatanodeDescriptor dd2 = NameNodeAdapter.GetDatanode(namesystem, nodeReg2); dd2.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid())); DatanodeRegistration nodeReg3 = DataNodeTestUtils.GetDNRegistrationForBP(cluster. GetDataNodes()[2], poolId); DatanodeDescriptor dd3 = NameNodeAdapter.GetDatanode(namesystem, nodeReg3); dd3.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid())); try { namesystem.WriteLock(); lock (hm) { NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem); NameNodeAdapter.SendHeartBeat(nodeReg2, dd2, namesystem); NameNodeAdapter.SendHeartBeat(nodeReg3, dd3, namesystem); // Test with all alive nodes. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, 0); DFSTestUtil.ResetLastUpdatesWithOffset(dd2, 0); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0); DatanodeStorageInfo[] storages = new DatanodeStorageInfo[] { dd1.GetStorageInfos( )[0], dd2.GetStorageInfos()[0], dd3.GetStorageInfos()[0] }; BlockInfoContiguousUnderConstruction blockInfo = new BlockInfoContiguousUnderConstruction (new Block(0, 0, GenerationStamp.LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState .UnderRecovery, storages); dd1.AddBlockToBeRecovered(blockInfo); DatanodeCommand[] cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem) .GetCommands(); NUnit.Framework.Assert.AreEqual(1, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction ()); BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand)cmds[0]; NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count); DatanodeInfo[] recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations(); NUnit.Framework.Assert.AreEqual(3, recoveringNodes.Length); NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1); NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd2); NUnit.Framework.Assert.AreEqual(recoveringNodes[2], dd3); // Test with one stale node. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, 0); // More than the default stale interval of 30 seconds. DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -40 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0); blockInfo = new BlockInfoContiguousUnderConstruction(new Block(0, 0, GenerationStamp .LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState.UnderRecovery, storages ); dd1.AddBlockToBeRecovered(blockInfo); cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem).GetCommands(); NUnit.Framework.Assert.AreEqual(1, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction ()); recoveryCommand = (BlockRecoveryCommand)cmds[0]; NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count); recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations(); NUnit.Framework.Assert.AreEqual(2, recoveringNodes.Length); // dd2 is skipped. NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1); NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd3); // Test with all stale node. DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -60 * 1000); // More than the default stale interval of 30 seconds. DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -40 * 1000); DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -80 * 1000); blockInfo = new BlockInfoContiguousUnderConstruction(new Block(0, 0, GenerationStamp .LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState.UnderRecovery, storages ); dd1.AddBlockToBeRecovered(blockInfo); cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem).GetCommands(); NUnit.Framework.Assert.AreEqual(1, cmds.Length); NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction ()); recoveryCommand = (BlockRecoveryCommand)cmds[0]; NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count); recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations(); // Only dd1 is included since it heart beated and hence its not stale // when the list of recovery blocks is constructed. NUnit.Framework.Assert.AreEqual(3, recoveringNodes.Length); NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1); NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd2); NUnit.Framework.Assert.AreEqual(recoveringNodes[2], dd3); } } finally { namesystem.WriteUnlock(); } } finally { cluster.Shutdown(); } }
public virtual void TestNodeCount() { // start a mini dfs cluster of 2 nodes Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(ReplicationFactor ).Build(); try { FSNamesystem namesystem = cluster.GetNamesystem(); BlockManager bm = namesystem.GetBlockManager(); HeartbeatManager hm = bm.GetDatanodeManager().GetHeartbeatManager(); FileSystem fs = cluster.GetFileSystem(); // populate the cluster with a one block file Path FilePath = new Path("/testfile"); DFSTestUtil.CreateFile(fs, FilePath, 1L, ReplicationFactor, 1L); DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, FilePath); // keep a copy of all datanode descriptor DatanodeDescriptor[] datanodes = hm.GetDatanodes(); // start two new nodes cluster.StartDataNodes(conf, 2, true, null, null); cluster.WaitActive(); // bring down first datanode DatanodeDescriptor datanode = datanodes[0]; MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(datanode.GetXferAddr ()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), datanode.GetXferAddr ()); // the block will be replicated DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor); // restart the first datanode cluster.RestartDataNode(dnprop); cluster.WaitActive(); // check if excessive replica is detected (transient) InitializeTimeout(Timeout); while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() == 0) { CheckTimeout("excess replicas not detected"); } // find out a non-excess node DatanodeDescriptor nonExcessDN = null; foreach (DatanodeStorageInfo storage in bm.blocksMap.GetStorages(block.GetLocalBlock ())) { DatanodeDescriptor dn = storage.GetDatanodeDescriptor(); ICollection <Block> blocks = bm.excessReplicateMap[dn.GetDatanodeUuid()]; if (blocks == null || !blocks.Contains(block.GetLocalBlock())) { nonExcessDN = dn; break; } } NUnit.Framework.Assert.IsTrue(nonExcessDN != null); // bring down non excessive datanode dnprop = cluster.StopDataNode(nonExcessDN.GetXferAddr()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), nonExcessDN.GetXferAddr ()); // The block should be replicated InitializeTimeout(Timeout); while (CountNodes(block.GetLocalBlock(), namesystem).LiveReplicas() != ReplicationFactor ) { CheckTimeout("live replica count not correct", 1000); } // restart the first datanode cluster.RestartDataNode(dnprop); cluster.WaitActive(); // check if excessive replica is detected (transient) InitializeTimeout(Timeout); while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() != 2) { CheckTimeout("excess replica count not equal to 2"); } } finally { cluster.Shutdown(); } }
internal Monitor(HeartbeatManager _enclosing) { this._enclosing = _enclosing; }