/// <summary> /// Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked /// as dead before decommission has completed. /// </summary> /// <remarks> /// Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked /// as dead before decommission has completed. That will allow DN to resume /// the replication process after it rejoins the cluster. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDecommissionStatusAfterDNRestart() { DistributedFileSystem fileSys = (DistributedFileSystem)cluster.GetFileSystem(); // Create a file with one block. That block has one replica. Path f = new Path("decommission.dat"); DFSTestUtil.CreateFile(fileSys, f, fileSize, fileSize, fileSize, (short)1, seed); // Find the DN that owns the only replica. RemoteIterator <LocatedFileStatus> fileList = fileSys.ListLocatedStatus(f); BlockLocation[] blockLocations = fileList.Next().GetBlockLocations(); string dnName = blockLocations[0].GetNames()[0]; // Decommission the DN. FSNamesystem fsn = cluster.GetNamesystem(); DatanodeManager dm = fsn.GetBlockManager().GetDatanodeManager(); DecommissionNode(fsn, localFileSys, dnName); dm.RefreshNodes(conf); // Stop the DN when decommission is in progress. // Given DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY is to 1 and the size of // the block, it will take much longer time that test timeout value for // the decommission to complete. So when stopDataNode is called, // decommission should be in progress. MiniDFSCluster.DataNodeProperties dataNodeProperties = cluster.StopDataNode(dnName ); IList <DatanodeDescriptor> dead = new AList <DatanodeDescriptor>(); while (true) { dm.FetchDatanodes(null, dead, false); if (dead.Count == 1) { break; } Sharpen.Thread.Sleep(1000); } // Force removal of the dead node's blocks. BlockManagerTestUtil.CheckHeartbeat(fsn.GetBlockManager()); // Force DatanodeManager to check decommission state. BlockManagerTestUtil.RecheckDecommissionState(dm); // Verify that the DN remains in DECOMMISSION_INPROGRESS state. NUnit.Framework.Assert.IsTrue("the node should be DECOMMISSION_IN_PROGRESSS", dead [0].IsDecommissionInProgress()); // Check DatanodeManager#getDecommissionNodes, make sure it returns // the node as decommissioning, even if it's dead IList <DatanodeDescriptor> decomlist = dm.GetDecommissioningNodes(); NUnit.Framework.Assert.IsTrue("The node should be be decommissioning", decomlist. Count == 1); // Delete the under-replicated file, which should let the // DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED CleanupFile(fileSys, f); BlockManagerTestUtil.RecheckDecommissionState(dm); NUnit.Framework.Assert.IsTrue("the node should be decommissioned", dead[0].IsDecommissioned ()); // Add the node back cluster.RestartDataNode(dataNodeProperties, true); cluster.WaitActive(); // Call refreshNodes on FSNamesystem with empty exclude file. // This will remove the datanodes from decommissioning list and // make them available again. WriteConfigFile(localFileSys, excludeFile, null); dm.RefreshNodes(conf); }
public virtual void TestDecommissionStatus() { IPEndPoint addr = new IPEndPoint("localhost", cluster.GetNameNodePort()); DFSClient client = new DFSClient(addr, conf); DatanodeInfo[] info = client.DatanodeReport(HdfsConstants.DatanodeReportType.Live ); NUnit.Framework.Assert.AreEqual("Number of Datanodes ", 2, info.Length); DistributedFileSystem fileSys = cluster.GetFileSystem(); DFSAdmin admin = new DFSAdmin(cluster.GetConfiguration(0)); short replicas = numDatanodes; // // Decommission one node. Verify the decommission status // Path file1 = new Path("decommission.dat"); WriteFile(fileSys, file1, replicas); Path file2 = new Path("decommission1.dat"); FSDataOutputStream st1 = WriteIncompleteFile(fileSys, file2, replicas); foreach (DataNode d in cluster.GetDataNodes()) { DataNodeTestUtils.TriggerBlockReport(d); } FSNamesystem fsn = cluster.GetNamesystem(); DatanodeManager dm = fsn.GetBlockManager().GetDatanodeManager(); for (int iteration = 0; iteration < numDatanodes; iteration++) { string downnode = DecommissionNode(fsn, client, localFileSys, iteration); dm.RefreshNodes(conf); decommissionedNodes.AddItem(downnode); BlockManagerTestUtil.RecheckDecommissionState(dm); IList <DatanodeDescriptor> decommissioningNodes = dm.GetDecommissioningNodes(); if (iteration == 0) { NUnit.Framework.Assert.AreEqual(decommissioningNodes.Count, 1); DatanodeDescriptor decommNode = decommissioningNodes[0]; CheckDecommissionStatus(decommNode, 3, 0, 1); CheckDFSAdminDecommissionStatus(decommissioningNodes.SubList(0, 1), fileSys, admin ); } else { NUnit.Framework.Assert.AreEqual(decommissioningNodes.Count, 2); DatanodeDescriptor decommNode1 = decommissioningNodes[0]; DatanodeDescriptor decommNode2 = decommissioningNodes[1]; // This one is still 3,3,1 since it passed over the UC block // earlier, before node 2 was decommed CheckDecommissionStatus(decommNode1, 3, 3, 1); // This one is 4,4,2 since it has the full state CheckDecommissionStatus(decommNode2, 4, 4, 2); CheckDFSAdminDecommissionStatus(decommissioningNodes.SubList(0, 2), fileSys, admin ); } } // Call refreshNodes on FSNamesystem with empty exclude file. // This will remove the datanodes from decommissioning list and // make them available again. WriteConfigFile(localFileSys, excludeFile, null); dm.RefreshNodes(conf); st1.Close(); CleanupFile(fileSys, file1); CleanupFile(fileSys, file2); }