/// <summary> /// Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked /// as dead before decommission has completed. /// </summary> /// <remarks> /// Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked /// as dead before decommission has completed. That will allow DN to resume /// the replication process after it rejoins the cluster. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDecommissionStatusAfterDNRestart() { DistributedFileSystem fileSys = (DistributedFileSystem)cluster.GetFileSystem(); // Create a file with one block. That block has one replica. Path f = new Path("decommission.dat"); DFSTestUtil.CreateFile(fileSys, f, fileSize, fileSize, fileSize, (short)1, seed); // Find the DN that owns the only replica. RemoteIterator <LocatedFileStatus> fileList = fileSys.ListLocatedStatus(f); BlockLocation[] blockLocations = fileList.Next().GetBlockLocations(); string dnName = blockLocations[0].GetNames()[0]; // Decommission the DN. FSNamesystem fsn = cluster.GetNamesystem(); DatanodeManager dm = fsn.GetBlockManager().GetDatanodeManager(); DecommissionNode(fsn, localFileSys, dnName); dm.RefreshNodes(conf); // Stop the DN when decommission is in progress. // Given DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY is to 1 and the size of // the block, it will take much longer time that test timeout value for // the decommission to complete. So when stopDataNode is called, // decommission should be in progress. MiniDFSCluster.DataNodeProperties dataNodeProperties = cluster.StopDataNode(dnName ); IList <DatanodeDescriptor> dead = new AList <DatanodeDescriptor>(); while (true) { dm.FetchDatanodes(null, dead, false); if (dead.Count == 1) { break; } Sharpen.Thread.Sleep(1000); } // Force removal of the dead node's blocks. BlockManagerTestUtil.CheckHeartbeat(fsn.GetBlockManager()); // Force DatanodeManager to check decommission state. BlockManagerTestUtil.RecheckDecommissionState(dm); // Verify that the DN remains in DECOMMISSION_INPROGRESS state. NUnit.Framework.Assert.IsTrue("the node should be DECOMMISSION_IN_PROGRESSS", dead [0].IsDecommissionInProgress()); // Check DatanodeManager#getDecommissionNodes, make sure it returns // the node as decommissioning, even if it's dead IList <DatanodeDescriptor> decomlist = dm.GetDecommissioningNodes(); NUnit.Framework.Assert.IsTrue("The node should be be decommissioning", decomlist. Count == 1); // Delete the under-replicated file, which should let the // DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED CleanupFile(fileSys, f); BlockManagerTestUtil.RecheckDecommissionState(dm); NUnit.Framework.Assert.IsTrue("the node should be decommissioned", dead[0].IsDecommissioned ()); // Add the node back cluster.RestartDataNode(dataNodeProperties, true); cluster.WaitActive(); // Call refreshNodes on FSNamesystem with empty exclude file. // This will remove the datanodes from decommissioning list and // make them available again. WriteConfigFile(localFileSys, excludeFile, null); dm.RefreshNodes(conf); }