public virtual void TestStaleNodes() { // Set two datanodes as stale for (int i = 0; i < 2; i++) { DataNode dn = cluster.GetDataNodes()[i]; DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true); long staleInterval = Conf.GetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey , DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalDefault); DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager( ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, -(staleInterval + 1)); } // Let HeartbeatManager to check heartbeat BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager ()); MetricsAsserts.AssertGauge("StaleDataNodes", 2, MetricsAsserts.GetMetrics(NsMetrics )); // Reset stale datanodes for (int i_1 = 0; i_1 < 2; i_1++) { DataNode dn = cluster.GetDataNodes()[i_1]; DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, false); DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager( ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, 0); } // Let HeartbeatManager to refresh BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager ()); MetricsAsserts.AssertGauge("StaleDataNodes", 0, MetricsAsserts.GetMetrics(NsMetrics )); }
/// <summary>Stop the heartbeat of a datanode in the MiniDFSCluster</summary> /// <param name="cluster">The MiniDFSCluster</param> /// <param name="hostName">The hostName of the datanode to be stopped</param> /// <returns>The DataNode whose heartbeat has been stopped</returns> private DataNode StopDataNodeHeartbeat(MiniDFSCluster cluster, string hostName) { foreach (DataNode dn in cluster.GetDataNodes()) { if (dn.GetDatanodeId().GetHostName().Equals(hostName)) { DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true); return(dn); } } return(null); }
public virtual void TestPendingAndInvalidate() { Configuration Conf = new HdfsConfiguration(); Conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, 1024); Conf.SetLong(DFSConfigKeys.DfsHeartbeatIntervalKey, DfsReplicationInterval); Conf.SetInt(DFSConfigKeys.DfsNamenodeReplicationIntervalKey, DfsReplicationInterval ); MiniDFSCluster cluster = new MiniDFSCluster.Builder(Conf).NumDataNodes(DatanodeCount ).Build(); cluster.WaitActive(); FSNamesystem namesystem = cluster.GetNamesystem(); BlockManager bm = namesystem.GetBlockManager(); DistributedFileSystem fs = cluster.GetFileSystem(); try { // 1. create a file Path filePath = new Path("/tmp.txt"); DFSTestUtil.CreateFile(fs, filePath, 1024, (short)3, 0L); // 2. disable the heartbeats foreach (DataNode dn in cluster.GetDataNodes()) { DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true); } // 3. mark a couple of blocks as corrupt LocatedBlock block = NameNodeAdapter.GetBlockLocations(cluster.GetNameNode(), filePath .ToString(), 0, 1).Get(0); cluster.GetNamesystem().WriteLock(); try { bm.FindAndMarkBlockAsCorrupt(block.GetBlock(), block.GetLocations()[0], "STORAGE_ID" , "TEST"); bm.FindAndMarkBlockAsCorrupt(block.GetBlock(), block.GetLocations()[1], "STORAGE_ID" , "TEST"); } finally { cluster.GetNamesystem().WriteUnlock(); } BlockManagerTestUtil.ComputeAllPendingWork(bm); BlockManagerTestUtil.UpdateState(bm); NUnit.Framework.Assert.AreEqual(bm.GetPendingReplicationBlocksCount(), 1L); NUnit.Framework.Assert.AreEqual(bm.pendingReplications.GetNumReplicas(block.GetBlock ().GetLocalBlock()), 2); // 4. delete the file fs.Delete(filePath, true); // retry at most 10 times, each time sleep for 1s. Note that 10s is much // less than the default pending record timeout (5~10min) int retries = 10; long pendingNum = bm.GetPendingReplicationBlocksCount(); while (pendingNum != 0 && retries-- > 0) { Sharpen.Thread.Sleep(1000); // let NN do the deletion BlockManagerTestUtil.UpdateState(bm); pendingNum = bm.GetPendingReplicationBlocksCount(); } NUnit.Framework.Assert.AreEqual(pendingNum, 0L); } finally { cluster.Shutdown(); } }
public virtual void TestBlockReceived() { Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, 1024); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(DatanodeCount).Build(); cluster.WaitActive(); DistributedFileSystem hdfs = cluster.GetFileSystem(); FSNamesystem fsn = cluster.GetNamesystem(); BlockManager blkManager = fsn.GetBlockManager(); string file = "/tmp.txt"; Path filePath = new Path(file); short replFactor = 1; DFSTestUtil.CreateFile(hdfs, filePath, 1024L, replFactor, 0); // temporarily stop the heartbeat AList <DataNode> datanodes = cluster.GetDataNodes(); for (int i = 0; i < DatanodeCount; i++) { DataNodeTestUtils.SetHeartbeatsDisabledForTests(datanodes[i], true); } hdfs.SetReplication(filePath, (short)DatanodeCount); BlockManagerTestUtil.ComputeAllPendingWork(blkManager); NUnit.Framework.Assert.AreEqual(1, blkManager.pendingReplications.Size()); INodeFile fileNode = fsn.GetFSDirectory().GetINode4Write(file).AsFile(); Block[] blocks = fileNode.GetBlocks(); NUnit.Framework.Assert.AreEqual(DatanodeCount - 1, blkManager.pendingReplications .GetNumReplicas(blocks[0])); LocatedBlock locatedBlock = hdfs.GetClient().GetLocatedBlocks(file, 0).Get(0); DatanodeInfo existingDn = (locatedBlock.GetLocations())[0]; int reportDnNum = 0; string poolId = cluster.GetNamesystem().GetBlockPoolId(); // let two datanodes (other than the one that already has the data) to // report to NN for (int i_1 = 0; i_1 < DatanodeCount && reportDnNum < 2; i_1++) { if (!datanodes[i_1].GetDatanodeId().Equals(existingDn)) { DatanodeRegistration dnR = datanodes[i_1].GetDNRegistrationForBP(poolId); StorageReceivedDeletedBlocks[] report = new StorageReceivedDeletedBlocks[] { new StorageReceivedDeletedBlocks("Fake-storage-ID-Ignored", new ReceivedDeletedBlockInfo [] { new ReceivedDeletedBlockInfo(blocks[0], ReceivedDeletedBlockInfo.BlockStatus .ReceivedBlock, string.Empty) }) }; cluster.GetNameNodeRpc().BlockReceivedAndDeleted(dnR, poolId, report); reportDnNum++; } } NUnit.Framework.Assert.AreEqual(DatanodeCount - 3, blkManager.pendingReplications .GetNumReplicas(blocks[0])); // let the same datanodes report again for (int i_2 = 0; i_2 < DatanodeCount && reportDnNum < 2; i_2++) { if (!datanodes[i_2].GetDatanodeId().Equals(existingDn)) { DatanodeRegistration dnR = datanodes[i_2].GetDNRegistrationForBP(poolId); StorageReceivedDeletedBlocks[] report = new StorageReceivedDeletedBlocks[] { new StorageReceivedDeletedBlocks("Fake-storage-ID-Ignored", new ReceivedDeletedBlockInfo [] { new ReceivedDeletedBlockInfo(blocks[0], ReceivedDeletedBlockInfo.BlockStatus .ReceivedBlock, string.Empty) }) }; cluster.GetNameNodeRpc().BlockReceivedAndDeleted(dnR, poolId, report); reportDnNum++; } } NUnit.Framework.Assert.AreEqual(DatanodeCount - 3, blkManager.pendingReplications .GetNumReplicas(blocks[0])); // re-enable heartbeat for the datanode that has data for (int i_3 = 0; i_3 < DatanodeCount; i_3++) { DataNodeTestUtils.SetHeartbeatsDisabledForTests(datanodes[i_3], false); DataNodeTestUtils.TriggerHeartbeat(datanodes[i_3]); } Sharpen.Thread.Sleep(5000); NUnit.Framework.Assert.AreEqual(0, blkManager.pendingReplications.Size()); } finally { if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestReadSelectNonStaleDatanode() { HdfsConfiguration conf = new HdfsConfiguration(); conf.SetBoolean(DFSConfigKeys.DfsNamenodeAvoidStaleDatanodeForReadKey, true); long staleInterval = 30 * 1000 * 60; conf.SetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey, staleInterval); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes ).Racks(racks).Build(); cluster.WaitActive(); IPEndPoint addr = new IPEndPoint("localhost", cluster.GetNameNodePort()); DFSClient client = new DFSClient(addr, conf); IList <DatanodeDescriptor> nodeInfoList = cluster.GetNameNode().GetNamesystem().GetBlockManager ().GetDatanodeManager().GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Live); NUnit.Framework.Assert.AreEqual("Unexpected number of datanodes", numDatanodes, nodeInfoList .Count); FileSystem fileSys = cluster.GetFileSystem(); FSDataOutputStream stm = null; try { // do the writing but do not close the FSDataOutputStream // in order to mimic the ongoing writing Path fileName = new Path("/file1"); stm = fileSys.Create(fileName, true, fileSys.GetConf().GetInt(CommonConfigurationKeys .IoFileBufferSizeKey, 4096), (short)3, blockSize); stm.Write(new byte[(blockSize * 3) / 2]); // We do not close the stream so that // the writing seems to be still ongoing stm.Hflush(); LocatedBlocks blocks = client.GetNamenode().GetBlockLocations(fileName.ToString() , 0, blockSize); DatanodeInfo[] nodes = blocks.Get(0).GetLocations(); NUnit.Framework.Assert.AreEqual(nodes.Length, 3); DataNode staleNode = null; DatanodeDescriptor staleNodeInfo = null; // stop the heartbeat of the first node staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName()); NUnit.Framework.Assert.IsNotNull(staleNode); // set the first node as stale staleNodeInfo = cluster.GetNameNode().GetNamesystem().GetBlockManager().GetDatanodeManager ().GetDatanode(staleNode.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, -(staleInterval + 1)); LocatedBlocks blocksAfterStale = client.GetNamenode().GetBlockLocations(fileName. ToString(), 0, blockSize); DatanodeInfo[] nodesAfterStale = blocksAfterStale.Get(0).GetLocations(); NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3); NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName ()); // restart the staleNode's heartbeat DataNodeTestUtils.SetHeartbeatsDisabledForTests(staleNode, false); // reset the first node as non-stale, so as to avoid two stale nodes DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, 0); LocatedBlock lastBlock = client.GetLocatedBlocks(fileName.ToString(), 0, long.MaxValue ).GetLastLocatedBlock(); nodes = lastBlock.GetLocations(); NUnit.Framework.Assert.AreEqual(nodes.Length, 3); // stop the heartbeat of the first node for the last block staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName()); NUnit.Framework.Assert.IsNotNull(staleNode); // set the node as stale DatanodeDescriptor dnDesc = cluster.GetNameNode().GetNamesystem().GetBlockManager ().GetDatanodeManager().GetDatanode(staleNode.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDesc, -(staleInterval + 1)); LocatedBlock lastBlockAfterStale = client.GetLocatedBlocks(fileName.ToString(), 0 , long.MaxValue).GetLastLocatedBlock(); nodesAfterStale = lastBlockAfterStale.GetLocations(); NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3); NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName ()); } finally { if (stm != null) { stm.Close(); } client.Close(); cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> public virtual void HardLeaseRecoveryRestartHelper(bool doRename, int size) { if (size < 0) { size = AppendTestUtil.NextInt(FileSize + 1); } //create a file string fileStr = "/hardLeaseRecovery"; AppendTestUtil.Log.Info("filestr=" + fileStr); Path filePath = new Path(fileStr); FSDataOutputStream stm = dfs.Create(filePath, true, BufSize, ReplicationNum, BlockSize ); NUnit.Framework.Assert.IsTrue(dfs.dfs.Exists(fileStr)); // write bytes into the file. AppendTestUtil.Log.Info("size=" + size); stm.Write(buffer, 0, size); string originalLeaseHolder = NameNodeAdapter.GetLeaseHolderForPath(cluster.GetNameNode (), fileStr); NUnit.Framework.Assert.IsFalse("original lease holder should not be the NN", originalLeaseHolder .Equals(HdfsServerConstants.NamenodeLeaseHolder)); // hflush file AppendTestUtil.Log.Info("hflush"); stm.Hflush(); // check visible length HdfsDataInputStream @in = (HdfsDataInputStream)dfs.Open(filePath); NUnit.Framework.Assert.AreEqual(size, @in.GetVisibleLength()); @in.Close(); if (doRename) { fileStr += ".renamed"; Path renamedPath = new Path(fileStr); NUnit.Framework.Assert.IsTrue(dfs.Rename(filePath, renamedPath)); filePath = renamedPath; } // kill the lease renewal thread AppendTestUtil.Log.Info("leasechecker.interruptAndJoin()"); dfs.dfs.GetLeaseRenewer().InterruptAndJoin(); // Make sure the DNs don't send a heartbeat for a while, so the blocks // won't actually get completed during lease recovery. foreach (DataNode dn in cluster.GetDataNodes()) { DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true); } // set the hard limit to be 1 second cluster.SetLeasePeriod(LongLeasePeriod, ShortLeasePeriod); // Make sure lease recovery begins. Sharpen.Thread.Sleep(HdfsServerConstants.NamenodeLeaseRecheckInterval * 2); CheckLease(fileStr, size); cluster.RestartNameNode(false); CheckLease(fileStr, size); // Let the DNs send heartbeats again. foreach (DataNode dn_1 in cluster.GetDataNodes()) { DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn_1, false); } cluster.WaitActive(); // set the hard limit to be 1 second, to initiate lease recovery. cluster.SetLeasePeriod(LongLeasePeriod, ShortLeasePeriod); // wait for lease recovery to complete LocatedBlocks locatedBlocks; do { Sharpen.Thread.Sleep(ShortLeasePeriod); locatedBlocks = dfs.dfs.GetLocatedBlocks(fileStr, 0L, size); }while (locatedBlocks.IsUnderConstruction()); NUnit.Framework.Assert.AreEqual(size, locatedBlocks.GetFileLength()); // make sure that the client can't write data anymore. try { stm.Write('b'); stm.Hflush(); NUnit.Framework.Assert.Fail("Should not be able to flush after we've lost the lease" ); } catch (IOException e) { Log.Info("Expceted exception on write/hflush", e); } try { stm.Close(); NUnit.Framework.Assert.Fail("Should not be able to close after we've lost the lease" ); } catch (IOException e) { Log.Info("Expected exception on close", e); } // verify data AppendTestUtil.Log.Info("File size is good. Now validating sizes from datanodes..." ); AppendTestUtil.CheckFullFile(dfs, filePath, size, buffer, fileStr); }