/// <summary>Verify the support for decommissioning a datanode that is already dead.</summary> /// <remarks> /// Verify the support for decommissioning a datanode that is already dead. /// Under this scenario the datanode should immediately be marked as /// DECOMMISSIONED /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDecommissionDeadDN() { Logger log = Logger.GetLogger(typeof(DecommissionManager)); log.SetLevel(Level.Debug); DatanodeID dnID = cluster.GetDataNodes()[0].GetDatanodeId(); string dnName = dnID.GetXferAddr(); MiniDFSCluster.DataNodeProperties stoppedDN = cluster.StopDataNode(0); DFSTestUtil.WaitForDatanodeState(cluster, dnID.GetDatanodeUuid(), false, 30000); FSNamesystem fsn = cluster.GetNamesystem(); DatanodeManager dm = fsn.GetBlockManager().GetDatanodeManager(); DatanodeDescriptor dnDescriptor = dm.GetDatanode(dnID); DecommissionNode(fsn, localFileSys, dnName); dm.RefreshNodes(conf); BlockManagerTestUtil.RecheckDecommissionState(dm); NUnit.Framework.Assert.IsTrue(dnDescriptor.IsDecommissioned()); // Add the node back cluster.RestartDataNode(stoppedDN, true); cluster.WaitActive(); // Call refreshNodes on FSNamesystem with empty exclude file to remove the // datanode from decommissioning list and make it available again. WriteConfigFile(localFileSys, excludeFile, null); dm.RefreshNodes(conf); }
/// <summary> /// Test for the case where the client beings to read a long block, but doesn't /// read bytes off the stream quickly. /// </summary> /// <remarks> /// Test for the case where the client beings to read a long block, but doesn't /// read bytes off the stream quickly. The datanode should time out sending the /// chunks and the transceiver should die, even if it has a long keepalive. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestSlowReader() { // Set a client socket cache expiry time much longer than // the datanode-side expiration time. long ClientExpiryMs = 600000L; Configuration clientConf = new Configuration(conf); clientConf.SetLong(DFSConfigKeys.DfsClientSocketCacheExpiryMsecKey, ClientExpiryMs ); clientConf.Set(DFSConfigKeys.DfsClientContext, "testSlowReader"); DistributedFileSystem fs = (DistributedFileSystem)FileSystem.Get(cluster.GetURI() , clientConf); // Restart the DN with a shorter write timeout. MiniDFSCluster.DataNodeProperties props = cluster.StopDataNode(0); props.conf.SetInt(DFSConfigKeys.DfsDatanodeSocketWriteTimeoutKey, WriteTimeout); props.conf.SetInt(DFSConfigKeys.DfsDatanodeSocketReuseKeepaliveKey, 120000); NUnit.Framework.Assert.IsTrue(cluster.RestartDataNode(props, true)); dn = cluster.GetDataNodes()[0]; // Wait for heartbeats to avoid a startup race where we // try to write the block while the DN is still starting. cluster.TriggerHeartbeats(); DFSTestUtil.CreateFile(fs, TestFile, 1024 * 1024 * 8L, (short)1, 0L); FSDataInputStream stm = fs.Open(TestFile); stm.Read(); AssertXceiverCount(1); GenericTestUtils.WaitFor(new _Supplier_193(this), 500, 50000); // DN should time out in sendChunks, and this should force // the xceiver to exit. IOUtils.CloseStream(stm); }
/// <exception cref="System.IO.IOException"/> private void CorruptBlock(MiniDFSCluster cluster, FileSystem fs, Path fileName, int dnIndex, ExtendedBlock block) { // corrupt the block on datanode dnIndex // the indexes change once the nodes are restarted. // But the datadirectory will not change NUnit.Framework.Assert.IsTrue(cluster.CorruptReplica(dnIndex, block)); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // Each datanode has multiple data dirs, check each for (int dirIndex = 0; dirIndex < 2; dirIndex++) { string bpid = cluster.GetNamesystem().GetBlockPoolId(); FilePath storageDir = cluster.GetStorageDir(dnIndex, dirIndex); FilePath dataDir = MiniDFSCluster.GetFinalizedDir(storageDir, bpid); FilePath scanLogFile = new FilePath(dataDir, "dncp_block_verification.log.curr"); if (scanLogFile.Exists()) { // wait for one minute for deletion to succeed; for (int i = 0; !scanLogFile.Delete(); i++) { NUnit.Framework.Assert.IsTrue("Could not delete log file in one minute", i < 60); try { Sharpen.Thread.Sleep(1000); } catch (Exception) { } } } } // restart the detained so the corrupt replica will be detected cluster.RestartDataNode(dnProps); }
/// <exception cref="System.IO.IOException"/> private static bool WipeAndRestartDn(MiniDFSCluster cluster, int dnIndex) { // stop the DN, reformat it, then start it again with the same xfer port. MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(dnIndex); cluster.FormatDataNodeDirs(); return(cluster.RestartDataNode(dnProps, true)); }
public virtual void TestPendingDeleteUnknownBlocks() { int fileNum = 5; // 5 files Path[] files = new Path[fileNum]; MiniDFSCluster.DataNodeProperties[] dnprops = new MiniDFSCluster.DataNodeProperties [Replication]; // create a group of files, each file contains 1 block for (int i = 0; i < fileNum; i++) { files[i] = new Path("/file" + i); DFSTestUtil.CreateFile(dfs, files[i], Blocksize, Replication, i); } // wait until all DataNodes have replicas WaitForReplication(); for (int i_1 = Replication - 1; i_1 >= 0; i_1--) { dnprops[i_1] = cluster.StopDataNode(i_1); } Sharpen.Thread.Sleep(2000); // delete 2 files, we still have 3 files remaining so that we can cover // every DN storage for (int i_2 = 0; i_2 < 2; i_2++) { dfs.Delete(files[i_2], true); } // restart NameNode cluster.RestartNameNode(false); InvalidateBlocks invalidateBlocks = (InvalidateBlocks)Whitebox.GetInternalState(cluster .GetNamesystem().GetBlockManager(), "invalidateBlocks"); InvalidateBlocks mockIb = Org.Mockito.Mockito.Spy(invalidateBlocks); Org.Mockito.Mockito.DoReturn(1L).When(mockIb).GetInvalidationDelay(); Whitebox.SetInternalState(cluster.GetNamesystem().GetBlockManager(), "invalidateBlocks" , mockIb); NUnit.Framework.Assert.AreEqual(0L, cluster.GetNamesystem().GetPendingDeletionBlocks ()); // restart DataNodes for (int i_3 = 0; i_3 < Replication; i_3++) { cluster.RestartDataNode(dnprops[i_3], true); } cluster.WaitActive(); for (int i_4 = 0; i_4 < Replication; i_4++) { DataNodeTestUtils.TriggerBlockReport(cluster.GetDataNodes()[i_4]); } Sharpen.Thread.Sleep(2000); // make sure we have received block reports by checking the total block # NUnit.Framework.Assert.AreEqual(3, cluster.GetNamesystem().GetBlocksTotal()); NUnit.Framework.Assert.AreEqual(4, cluster.GetNamesystem().GetPendingDeletionBlocks ()); cluster.RestartNameNode(true); Sharpen.Thread.Sleep(6000); NUnit.Framework.Assert.AreEqual(3, cluster.GetNamesystem().GetBlocksTotal()); NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem().GetPendingDeletionBlocks ()); }
/// <summary> /// Regression test for HDFS-2795: /// - Start an HA cluster with a DN. /// </summary> /// <remarks> /// Regression test for HDFS-2795: /// - Start an HA cluster with a DN. /// - Write several blocks to the FS with replication 1. /// - Shutdown the DN /// - Wait for the NNs to declare the DN dead. All blocks will be under-replicated. /// - Restart the DN. /// In the bug, the standby node would only very slowly notice the blocks returning /// to the cluster. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDatanodeRestarts() { Configuration conf = new Configuration(); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, 1024); // We read from the standby to watch block locations HAUtil.SetAllowStandbyReads(conf, true); conf.SetLong(DFSConfigKeys.DfsNamenodeAccesstimePrecisionKey, 0); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(1).Build(); try { NameNode nn0 = cluster.GetNameNode(0); NameNode nn1 = cluster.GetNameNode(1); cluster.TransitionToActive(0); // Create 5 blocks. DFSTestUtil.CreateFile(cluster.GetFileSystem(0), TestFilePath, 5 * 1024, (short)1 , 1L); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); // Stop the DN. DataNode dn = cluster.GetDataNodes()[0]; string dnName = dn.GetDatanodeId().GetXferAddr(); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // Make sure both NNs register it as dead. BlockManagerTestUtil.NoticeDeadDatanode(nn0, dnName); BlockManagerTestUtil.NoticeDeadDatanode(nn1, dnName); BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(5, nn0.GetNamesystem().GetUnderReplicatedBlocks() ); // The SBN will not have any blocks in its neededReplication queue // since the SBN doesn't process replication. NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks() ); LocatedBlocks locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1); NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has no replicas" , 0, locs.Get(0).GetLocations().Length); cluster.RestartDataNode(dnProps); // Wait for both NNs to re-register the DN. cluster.WaitActive(0); cluster.WaitActive(1); BlockManagerTestUtil.UpdateState(nn0.GetNamesystem().GetBlockManager()); BlockManagerTestUtil.UpdateState(nn1.GetNamesystem().GetBlockManager()); NUnit.Framework.Assert.AreEqual(0, nn0.GetNamesystem().GetUnderReplicatedBlocks() ); NUnit.Framework.Assert.AreEqual(0, nn1.GetNamesystem().GetUnderReplicatedBlocks() ); locs = nn1.GetRpcServer().GetBlockLocations(TestFile, 0, 1); NUnit.Framework.Assert.AreEqual("Standby should have registered that the block has replicas again" , 1, locs.Get(0).GetLocations().Length); } finally { cluster.Shutdown(); } }
/// <exception cref="System.IO.IOException"/> private static void RollbackRollingUpgrade(Path foo, Path bar, Path file, byte[] data, MiniDFSCluster cluster) { MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(0); cluster.RestartNameNode("-rollingUpgrade", "rollback"); cluster.RestartDataNode(dnprop, true); DistributedFileSystem dfs = cluster.GetFileSystem(); NUnit.Framework.Assert.IsTrue(dfs.Exists(foo)); NUnit.Framework.Assert.IsFalse(dfs.Exists(bar)); AppendTestUtil.CheckFullFile(dfs, file, data.Length, data); }
public virtual void TestFailedAppendBlockRejection() { Configuration conf = new HdfsConfiguration(); conf.Set("dfs.client.block.write.replace-datanode-on-failure.enable", "false"); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); DistributedFileSystem fs = null; try { fs = cluster.GetFileSystem(); Path path = new Path("/test"); FSDataOutputStream @out = fs.Create(path); @out.WriteBytes("hello\n"); @out.Close(); // stop one datanode MiniDFSCluster.DataNodeProperties dnProp = cluster.StopDataNode(0); string dnAddress = dnProp.datanode.GetXferAddress().ToString(); if (dnAddress.StartsWith("/")) { dnAddress = Sharpen.Runtime.Substring(dnAddress, 1); } // append again to bump genstamps for (int i = 0; i < 2; i++) { @out = fs.Append(path); @out.WriteBytes("helloagain\n"); @out.Close(); } // re-open and make the block state as underconstruction @out = fs.Append(path); cluster.RestartDataNode(dnProp, true); // wait till the block report comes Sharpen.Thread.Sleep(2000); // check the block locations, this should not contain restarted datanode BlockLocation[] locations = fs.GetFileBlockLocations(path, 0, long.MaxValue); string[] names = locations[0].GetNames(); foreach (string node in names) { if (node.Equals(dnAddress)) { NUnit.Framework.Assert.Fail("Failed append should not be present in latest block locations." ); } } @out.Close(); } finally { IOUtils.CloseStream(fs); cluster.Shutdown(); } }
/// <summary> /// Support for layout version change with rolling upgrade was /// added by HDFS-6800 and HDFS-6981. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestWithLayoutChangeAndFinalize() { long seed = unchecked ((int)(0x600DF00D)); try { StartCluster(); Path[] paths = new Path[3]; FilePath[] blockFiles = new FilePath[3]; // Create two files in DFS. for (int i = 0; i < 2; ++i) { paths[i] = new Path("/" + GenericTestUtils.GetMethodName() + "." + i + ".dat"); DFSTestUtil.CreateFile(fs, paths[i], BlockSize, (short)2, seed); } StartRollingUpgrade(); // Delete the first file. The DN will save its block files in trash. blockFiles[0] = GetBlockForFile(paths[0], true); FilePath trashFile0 = GetTrashFileForBlock(blockFiles[0], false); DeleteAndEnsureInTrash(paths[0], blockFiles[0], trashFile0); // Restart the DN with a new layout version to trigger layout upgrade. Log.Info("Shutting down the Datanode"); MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(0); DFSTestUtil.AddDataNodeLayoutVersion(DataNodeLayoutVersion.CurrentLayoutVersion - 1, "Test Layout for TestDataNodeRollingUpgrade"); Log.Info("Restarting the DataNode"); cluster.RestartDataNode(dnprop, true); cluster.WaitActive(); dn0 = cluster.GetDataNodes()[0]; Log.Info("The DN has been restarted"); NUnit.Framework.Assert.IsFalse(trashFile0.Exists()); NUnit.Framework.Assert.IsFalse(dn0.GetStorage().GetBPStorage(blockPoolId).IsTrashAllowed (blockFiles[0])); // Ensure that the block file for the first file was moved from 'trash' to 'previous'. NUnit.Framework.Assert.IsTrue(IsBlockFileInPrevious(blockFiles[0])); NUnit.Framework.Assert.IsFalse(IsTrashRootPresent()); // Delete the second file. Ensure that its block file is in previous. blockFiles[1] = GetBlockForFile(paths[1], true); fs.Delete(paths[1], false); NUnit.Framework.Assert.IsTrue(IsBlockFileInPrevious(blockFiles[1])); NUnit.Framework.Assert.IsFalse(IsTrashRootPresent()); // Finalize and ensure that neither block file exists in trash or previous. FinalizeRollingUpgrade(); NUnit.Framework.Assert.IsFalse(IsTrashRootPresent()); NUnit.Framework.Assert.IsFalse(IsBlockFileInPrevious(blockFiles[0])); NUnit.Framework.Assert.IsFalse(IsBlockFileInPrevious(blockFiles[1])); } finally { ShutdownCluster(); } }
/// <exception cref="System.Exception"/> private void RollbackRollingUpgrade() { // Shutdown datanodes and namenodes // Restart the namenode with rolling upgrade rollback Log.Info("Starting rollback of the rolling upgrade"); MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(0); dnprop.SetDnArgs("-rollback"); cluster.ShutdownNameNodes(); cluster.RestartNameNode("-rollingupgrade", "rollback"); cluster.RestartDataNode(dnprop); cluster.WaitActive(); nn = cluster.GetNameNode(0); dn0 = cluster.GetDataNodes()[0]; TriggerHeartBeats(); Log.Info("The cluster is active after rollback"); }
public virtual void TestBlockRecoveryWithLessMetafile() { Configuration conf = new Configuration(); conf.Set(DFSConfigKeys.DfsBlockLocalPathAccessUserKey, UserGroupInformation.GetCurrentUser ().GetShortUserName()); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).Build(); Path file = new Path("/testRecoveryFile"); DistributedFileSystem dfs = cluster.GetFileSystem(); FSDataOutputStream @out = dfs.Create(file); int count = 0; while (count < 2 * 1024 * 1024) { @out.WriteBytes("Data"); count += 4; } @out.Hsync(); // abort the original stream ((DFSOutputStream)@out.GetWrappedStream()).Abort(); LocatedBlocks locations = cluster.GetNameNodeRpc().GetBlockLocations(file.ToString (), 0, count); ExtendedBlock block = locations.Get(0).GetBlock(); DataNode dn = cluster.GetDataNodes()[0]; BlockLocalPathInfo localPathInfo = dn.GetBlockLocalPathInfo(block, null); FilePath metafile = new FilePath(localPathInfo.GetMetaPath()); NUnit.Framework.Assert.IsTrue(metafile.Exists()); // reduce the block meta file size RandomAccessFile raf = new RandomAccessFile(metafile, "rw"); raf.SetLength(metafile.Length() - 20); raf.Close(); // restart DN to make replica to RWR MiniDFSCluster.DataNodeProperties dnProp = cluster.StopDataNode(0); cluster.RestartDataNode(dnProp, true); // try to recover the lease DistributedFileSystem newdfs = (DistributedFileSystem)FileSystem.NewInstance(cluster .GetConfiguration(0)); count = 0; while (++count < 10 && !newdfs.RecoverLease(file)) { Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsTrue("File should be closed", newdfs.RecoverLease(file)); }
public virtual void TestDNWithInvalidStorageWithHA() { MiniDFSNNTopology top = new MiniDFSNNTopology().AddNameservice(new MiniDFSNNTopology.NSConf ("ns1").AddNN(new MiniDFSNNTopology.NNConf("nn0").SetClusterId("cluster-1")).AddNN (new MiniDFSNNTopology.NNConf("nn1").SetClusterId("cluster-1"))); top.SetFederation(true); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(top).NumDataNodes (0).Build(); try { cluster.StartDataNodes(conf, 1, true, null, null); // let the initialization be complete Sharpen.Thread.Sleep(10000); DataNode dn = cluster.GetDataNodes()[0]; NUnit.Framework.Assert.IsTrue("Datanode should be running", dn.IsDatanodeUp()); NUnit.Framework.Assert.AreEqual("BPOfferService should be running", 1, dn.GetAllBpOs ().Length); MiniDFSCluster.DataNodeProperties dnProp = cluster.StopDataNode(0); cluster.GetNameNode(0).Stop(); cluster.GetNameNode(1).Stop(); Configuration nn1 = cluster.GetConfiguration(0); Configuration nn2 = cluster.GetConfiguration(1); // setting up invalid cluster HdfsServerConstants.StartupOption.Format.SetClusterId("cluster-2"); DFSTestUtil.FormatNameNode(nn1); MiniDFSCluster.CopyNameDirs(FSNamesystem.GetNamespaceDirs(nn1), FSNamesystem.GetNamespaceDirs (nn2), nn2); cluster.RestartNameNode(0, false); cluster.RestartNameNode(1, false); cluster.RestartDataNode(dnProp); // let the initialization be complete Sharpen.Thread.Sleep(10000); dn = cluster.GetDataNodes()[0]; NUnit.Framework.Assert.IsFalse("Datanode should have shutdown as only service failed" , dn.IsDatanodeUp()); } finally { cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> public virtual void TestDatanodeRUwithRegularUpgrade() { try { StartCluster(); RollingUpgradeAndFinalize(); MiniDFSCluster.DataNodeProperties dn = cluster.StopDataNode(0); cluster.RestartNameNode(0, true, "-upgrade"); cluster.RestartDataNode(dn, true); cluster.WaitActive(); fs = cluster.GetFileSystem(0); Path testFile3 = new Path("/" + GenericTestUtils.GetMethodName() + ".03.dat"); DFSTestUtil.CreateFile(fs, testFile3, FileSize, ReplFactor, Seed); cluster.GetFileSystem().FinalizeUpgrade(); } finally { ShutdownCluster(); } }
public virtual void TestByAddingAnExtraDataNode() { Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000L); conf.Set(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, Sharpen.Extensions.ToString (2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(4).Build(); FileSystem fs = cluster.GetFileSystem(); FSNamesystem namesystem = cluster.GetNamesystem(); MiniDFSCluster.DataNodeProperties dnPropsFourth = cluster.StopDataNode(3); try { Path fileName = new Path("/foo1"); DFSTestUtil.CreateFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.WaitReplication(fs, fileName, (short)3); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); CorruptBlock(cluster, fs, fileName, 0, block); DFSTestUtil.WaitReplication(fs, fileName, (short)2); NUnit.Framework.Assert.AreEqual(2, CountReplicas(namesystem, block).LiveReplicas( )); NUnit.Framework.Assert.AreEqual(1, CountReplicas(namesystem, block).CorruptReplicas ()); cluster.RestartDataNode(dnPropsFourth); DFSTestUtil.WaitReplication(fs, fileName, (short)3); NUnit.Framework.Assert.AreEqual(3, CountReplicas(namesystem, block).LiveReplicas( )); NUnit.Framework.Assert.AreEqual(0, CountReplicas(namesystem, block).CorruptReplicas ()); } finally { cluster.Shutdown(); } }
/// <summary> /// Support for layout version change with rolling upgrade was /// added by HDFS-6800 and HDFS-6981. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestWithLayoutChangeAndRollback() { long seed = unchecked ((int)(0x600DF00D)); try { StartCluster(); Path[] paths = new Path[3]; FilePath[] blockFiles = new FilePath[3]; // Create two files in DFS. for (int i = 0; i < 2; ++i) { paths[i] = new Path("/" + GenericTestUtils.GetMethodName() + "." + i + ".dat"); DFSTestUtil.CreateFile(fs, paths[i], BlockSize, (short)1, seed); } StartRollingUpgrade(); // Delete the first file. The DN will save its block files in trash. blockFiles[0] = GetBlockForFile(paths[0], true); FilePath trashFile0 = GetTrashFileForBlock(blockFiles[0], false); DeleteAndEnsureInTrash(paths[0], blockFiles[0], trashFile0); // Restart the DN with a new layout version to trigger layout upgrade. Log.Info("Shutting down the Datanode"); MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(0); DFSTestUtil.AddDataNodeLayoutVersion(DataNodeLayoutVersion.CurrentLayoutVersion - 1, "Test Layout for TestDataNodeRollingUpgrade"); Log.Info("Restarting the DataNode"); cluster.RestartDataNode(dnprop, true); cluster.WaitActive(); dn0 = cluster.GetDataNodes()[0]; Log.Info("The DN has been restarted"); NUnit.Framework.Assert.IsFalse(trashFile0.Exists()); NUnit.Framework.Assert.IsFalse(dn0.GetStorage().GetBPStorage(blockPoolId).IsTrashAllowed (blockFiles[0])); // Ensure that the block file for the first file was moved from 'trash' to 'previous'. NUnit.Framework.Assert.IsTrue(IsBlockFileInPrevious(blockFiles[0])); NUnit.Framework.Assert.IsFalse(IsTrashRootPresent()); // Delete the second file. Ensure that its block file is in previous. blockFiles[1] = GetBlockForFile(paths[1], true); fs.Delete(paths[1], false); NUnit.Framework.Assert.IsTrue(IsBlockFileInPrevious(blockFiles[1])); NUnit.Framework.Assert.IsFalse(IsTrashRootPresent()); // Create and delete a third file. Its block file should not be // in either trash or previous after deletion. paths[2] = new Path("/" + GenericTestUtils.GetMethodName() + ".2.dat"); DFSTestUtil.CreateFile(fs, paths[2], BlockSize, (short)1, seed); blockFiles[2] = GetBlockForFile(paths[2], true); fs.Delete(paths[2], false); NUnit.Framework.Assert.IsFalse(IsBlockFileInPrevious(blockFiles[2])); NUnit.Framework.Assert.IsFalse(IsTrashRootPresent()); // Rollback and ensure that the first two file contents were restored. RollbackRollingUpgrade(); for (int i_1 = 0; i_1 < 2; ++i_1) { byte[] actual = DFSTestUtil.ReadFileBuffer(fs, paths[i_1]); byte[] calculated = DFSTestUtil.CalculateFileContentsFromSeed(seed, BlockSize); Assert.AssertArrayEquals(actual, calculated); } // And none of the block files must be in previous or trash. NUnit.Framework.Assert.IsFalse(IsTrashRootPresent()); for (int i_2 = 0; i_2 < 3; ++i_2) { NUnit.Framework.Assert.IsFalse(IsBlockFileInPrevious(blockFiles[i_2])); } } finally { ShutdownCluster(); } }
public virtual void TestNodeCount() { // start a mini dfs cluster of 2 nodes Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(ReplicationFactor ).Build(); try { FSNamesystem namesystem = cluster.GetNamesystem(); BlockManager bm = namesystem.GetBlockManager(); HeartbeatManager hm = bm.GetDatanodeManager().GetHeartbeatManager(); FileSystem fs = cluster.GetFileSystem(); // populate the cluster with a one block file Path FilePath = new Path("/testfile"); DFSTestUtil.CreateFile(fs, FilePath, 1L, ReplicationFactor, 1L); DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, FilePath); // keep a copy of all datanode descriptor DatanodeDescriptor[] datanodes = hm.GetDatanodes(); // start two new nodes cluster.StartDataNodes(conf, 2, true, null, null); cluster.WaitActive(); // bring down first datanode DatanodeDescriptor datanode = datanodes[0]; MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(datanode.GetXferAddr ()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), datanode.GetXferAddr ()); // the block will be replicated DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor); // restart the first datanode cluster.RestartDataNode(dnprop); cluster.WaitActive(); // check if excessive replica is detected (transient) InitializeTimeout(Timeout); while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() == 0) { CheckTimeout("excess replicas not detected"); } // find out a non-excess node DatanodeDescriptor nonExcessDN = null; foreach (DatanodeStorageInfo storage in bm.blocksMap.GetStorages(block.GetLocalBlock ())) { DatanodeDescriptor dn = storage.GetDatanodeDescriptor(); ICollection <Block> blocks = bm.excessReplicateMap[dn.GetDatanodeUuid()]; if (blocks == null || !blocks.Contains(block.GetLocalBlock())) { nonExcessDN = dn; break; } } NUnit.Framework.Assert.IsTrue(nonExcessDN != null); // bring down non excessive datanode dnprop = cluster.StopDataNode(nonExcessDN.GetXferAddr()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), nonExcessDN.GetXferAddr ()); // The block should be replicated InitializeTimeout(Timeout); while (CountNodes(block.GetLocalBlock(), namesystem).LiveReplicas() != ReplicationFactor ) { CheckTimeout("live replica count not correct", 1000); } // restart the first datanode cluster.RestartDataNode(dnprop); cluster.WaitActive(); // check if excessive replica is detected (transient) InitializeTimeout(Timeout); while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() != 2) { CheckTimeout("excess replica count not equal to 2"); } } finally { cluster.Shutdown(); } }
public virtual void TestProcesOverReplicateBlock() { Configuration conf = new HdfsConfiguration(); conf.SetLong(DFSConfigKeys.DfsDatanodeScanPeriodHoursKey, 100L); conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000L); conf.Set(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, Sharpen.Extensions.ToString (2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); FileSystem fs = cluster.GetFileSystem(); try { Path fileName = new Path("/foo1"); DFSTestUtil.CreateFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.WaitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); NUnit.Framework.Assert.IsTrue(cluster.CorruptReplica(0, block)); MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // remove block scanner log to trigger block scanning FilePath scanCursor = new FilePath(new FilePath(MiniDFSCluster.GetFinalizedDir(cluster .GetInstanceStorageDir(0, 0), cluster.GetNamesystem().GetBlockPoolId()).GetParent ()).GetParent(), "scanner.cursor"); //wait for one minute for deletion to succeed; for (int i = 0; !scanCursor.Delete(); i++) { NUnit.Framework.Assert.IsTrue("Could not delete " + scanCursor.GetAbsolutePath() + " in one minute", i < 60); try { Sharpen.Thread.Sleep(1000); } catch (Exception) { } } // restart the datanode so the corrupt replica will be detected cluster.RestartDataNode(dnProps); DFSTestUtil.WaitReplication(fs, fileName, (short)2); string blockPoolId = cluster.GetNamesystem().GetBlockPoolId(); DatanodeID corruptDataNode = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes ()[2], blockPoolId); FSNamesystem namesystem = cluster.GetNamesystem(); BlockManager bm = namesystem.GetBlockManager(); HeartbeatManager hm = bm.GetDatanodeManager().GetHeartbeatManager(); try { namesystem.WriteLock(); lock (hm) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs string corruptMachineName = corruptDataNode.GetXferAddr(); foreach (DatanodeDescriptor datanode in hm.GetDatanodes()) { if (!corruptMachineName.Equals(datanode.GetXferAddr())) { datanode.GetStorageInfos()[0].SetUtilizationForTesting(100L, 100L, 0, 100L); datanode.UpdateHeartbeat(BlockManagerTestUtil.GetStorageReportsForDatanode(datanode ), 0L, 0L, 0, 0, null); } } // decrease the replication factor to 1; NameNodeAdapter.SetReplication(namesystem, fileName.ToString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost NUnit.Framework.Assert.AreEqual(1, bm.CountNodes(block.GetLocalBlock()).LiveReplicas ()); } } finally { namesystem.WriteUnlock(); } } finally { cluster.Shutdown(); } }
/// <summary> /// Regression test for HDFS-4799, a case where, upon restart, if there /// were RWR replicas with out-of-date genstamps, the NN could accidentally /// delete good replicas instead of the bad replicas. /// </summary> /// <exception cref="System.Exception"/> public virtual void TestRWRInvalidation() { Configuration conf = new HdfsConfiguration(); // Set the deletion policy to be randomized rather than the default. // The default is based on disk space, which isn't controllable // in the context of the test, whereas a random one is more accurate // to what is seen in real clusters (nodes have random amounts of free // space) conf.SetClass(DFSConfigKeys.DfsBlockReplicatorClassnameKey, typeof(TestDNFencing.RandomDeleterPolicy ), typeof(BlockPlacementPolicy)); // Speed up the test a bit with faster heartbeats. conf.SetInt(DFSConfigKeys.DfsHeartbeatIntervalKey, 1); // Test with a bunch of separate files, since otherwise the test may // fail just due to "good luck", even if a bug is present. IList <Path> testPaths = Lists.NewArrayList(); for (int i = 0; i < 10; i++) { testPaths.AddItem(new Path("/test" + i)); } MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(2).Build(); try { IList <FSDataOutputStream> streams = Lists.NewArrayList(); try { // Open the test files and write some data to each foreach (Path path in testPaths) { FSDataOutputStream @out = cluster.GetFileSystem().Create(path, (short)2); streams.AddItem(@out); @out.WriteBytes("old gs data\n"); @out.Hflush(); } // Shutdown one of the nodes in the pipeline MiniDFSCluster.DataNodeProperties oldGenstampNode = cluster.StopDataNode(0); // Write some more data and flush again. This data will only // be in the latter genstamp copy of the blocks. for (int i_1 = 0; i_1 < streams.Count; i_1++) { Path path_1 = testPaths[i_1]; FSDataOutputStream @out = streams[i_1]; @out.WriteBytes("new gs data\n"); @out.Hflush(); // Set replication so that only one node is necessary for this block, // and close it. cluster.GetFileSystem().SetReplication(path_1, (short)1); @out.Close(); } // Upon restart, there will be two replicas, one with an old genstamp // and one current copy. This test wants to ensure that the old genstamp // copy is the one that is deleted. Log.Info("=========================== restarting cluster"); MiniDFSCluster.DataNodeProperties otherNode = cluster.StopDataNode(0); cluster.RestartNameNode(); // Restart the datanode with the corrupt replica first. cluster.RestartDataNode(oldGenstampNode); cluster.WaitActive(); // Then the other node cluster.RestartDataNode(otherNode); cluster.WaitActive(); // Compute and send invalidations, waiting until they're fully processed. cluster.GetNameNode().GetNamesystem().GetBlockManager().ComputeInvalidateWork(2); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); // Make sure we can still read the blocks. foreach (Path path_2 in testPaths) { string ret = DFSTestUtil.ReadFile(cluster.GetFileSystem(), path_2); NUnit.Framework.Assert.AreEqual("old gs data\n" + "new gs data\n", ret); } } finally { IOUtils.Cleanup(Log, Sharpen.Collections.ToArray(streams, new IDisposable[0])); } } finally { cluster.Shutdown(); } }
public virtual void TestMultiAppend2() { Configuration conf = new HdfsConfiguration(); conf.Set("dfs.client.block.write.replace-datanode-on-failure.enable", "false"); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); DistributedFileSystem fs = null; string hello = "hello\n"; try { fs = cluster.GetFileSystem(); Path path = new Path("/test"); FSDataOutputStream @out = fs.Create(path); @out.WriteBytes(hello); @out.Close(); // stop one datanode MiniDFSCluster.DataNodeProperties dnProp = cluster.StopDataNode(0); string dnAddress = dnProp.datanode.GetXferAddress().ToString(); if (dnAddress.StartsWith("/")) { dnAddress = Sharpen.Runtime.Substring(dnAddress, 1); } // append again to bump genstamps for (int i = 0; i < 2; i++) { @out = fs.Append(path, EnumSet.Of(CreateFlag.Append, CreateFlag.NewBlock), 4096, null); @out.WriteBytes(hello); @out.Close(); } // re-open and make the block state as underconstruction @out = fs.Append(path, EnumSet.Of(CreateFlag.Append, CreateFlag.NewBlock), 4096, null); cluster.RestartDataNode(dnProp, true); // wait till the block report comes Sharpen.Thread.Sleep(2000); @out.WriteBytes(hello); @out.Close(); // check the block locations LocatedBlocks blocks = fs.GetClient().GetLocatedBlocks(path.ToString(), 0L); // since we append the file 3 time, we should be 4 blocks NUnit.Framework.Assert.AreEqual(4, blocks.GetLocatedBlocks().Count); foreach (LocatedBlock block in blocks.GetLocatedBlocks()) { NUnit.Framework.Assert.AreEqual(hello.Length, block.GetBlockSize()); } StringBuilder sb = new StringBuilder(); for (int i_1 = 0; i_1 < 4; i_1++) { sb.Append(hello); } byte[] content = Sharpen.Runtime.GetBytesForString(sb.ToString()); AppendTestUtil.CheckFullFile(fs, path, content.Length, content, "Read /test"); // restart namenode to make sure the editlog can be properly applied cluster.RestartNameNode(true); cluster.WaitActive(); AppendTestUtil.CheckFullFile(fs, path, content.Length, content, "Read /test"); blocks = fs.GetClient().GetLocatedBlocks(path.ToString(), 0L); // since we append the file 3 time, we should be 4 blocks NUnit.Framework.Assert.AreEqual(4, blocks.GetLocatedBlocks().Count); foreach (LocatedBlock block_1 in blocks.GetLocatedBlocks()) { NUnit.Framework.Assert.AreEqual(hello.Length, block_1.GetBlockSize()); } } finally { IOUtils.CloseStream(fs); cluster.Shutdown(); } }
/// <summary> /// Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked /// as dead before decommission has completed. /// </summary> /// <remarks> /// Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked /// as dead before decommission has completed. That will allow DN to resume /// the replication process after it rejoins the cluster. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDecommissionStatusAfterDNRestart() { DistributedFileSystem fileSys = (DistributedFileSystem)cluster.GetFileSystem(); // Create a file with one block. That block has one replica. Path f = new Path("decommission.dat"); DFSTestUtil.CreateFile(fileSys, f, fileSize, fileSize, fileSize, (short)1, seed); // Find the DN that owns the only replica. RemoteIterator <LocatedFileStatus> fileList = fileSys.ListLocatedStatus(f); BlockLocation[] blockLocations = fileList.Next().GetBlockLocations(); string dnName = blockLocations[0].GetNames()[0]; // Decommission the DN. FSNamesystem fsn = cluster.GetNamesystem(); DatanodeManager dm = fsn.GetBlockManager().GetDatanodeManager(); DecommissionNode(fsn, localFileSys, dnName); dm.RefreshNodes(conf); // Stop the DN when decommission is in progress. // Given DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY is to 1 and the size of // the block, it will take much longer time that test timeout value for // the decommission to complete. So when stopDataNode is called, // decommission should be in progress. MiniDFSCluster.DataNodeProperties dataNodeProperties = cluster.StopDataNode(dnName ); IList <DatanodeDescriptor> dead = new AList <DatanodeDescriptor>(); while (true) { dm.FetchDatanodes(null, dead, false); if (dead.Count == 1) { break; } Sharpen.Thread.Sleep(1000); } // Force removal of the dead node's blocks. BlockManagerTestUtil.CheckHeartbeat(fsn.GetBlockManager()); // Force DatanodeManager to check decommission state. BlockManagerTestUtil.RecheckDecommissionState(dm); // Verify that the DN remains in DECOMMISSION_INPROGRESS state. NUnit.Framework.Assert.IsTrue("the node should be DECOMMISSION_IN_PROGRESSS", dead [0].IsDecommissionInProgress()); // Check DatanodeManager#getDecommissionNodes, make sure it returns // the node as decommissioning, even if it's dead IList <DatanodeDescriptor> decomlist = dm.GetDecommissioningNodes(); NUnit.Framework.Assert.IsTrue("The node should be be decommissioning", decomlist. Count == 1); // Delete the under-replicated file, which should let the // DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED CleanupFile(fileSys, f); BlockManagerTestUtil.RecheckDecommissionState(dm); NUnit.Framework.Assert.IsTrue("the node should be decommissioned", dead[0].IsDecommissioned ()); // Add the node back cluster.RestartDataNode(dataNodeProperties, true); cluster.WaitActive(); // Call refreshNodes on FSNamesystem with empty exclude file. // This will remove the datanodes from decommissioning list and // make them available again. WriteConfigFile(localFileSys, excludeFile, null); dm.RefreshNodes(conf); }
public virtual void TestChangedStorageId() { HdfsConfiguration conf = new HdfsConfiguration(); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(1).NnTopology (MiniDFSNNTopology.SimpleHATopology()).Build(); try { cluster.TransitionToActive(0); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); OutputStream @out = fs.Create(filePath); @out.Write(Sharpen.Runtime.GetBytesForString("foo bar baz")); @out.Close(); HATestUtil.WaitForStandbyToCatchUp(cluster.GetNameNode(0), cluster.GetNameNode(1) ); // Change the gen stamp of the block on datanode to go back in time (gen // stamps start at 1000) ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, filePath); NUnit.Framework.Assert.IsTrue(cluster.ChangeGenStampOfBlock(0, block, 900)); // Stop the DN so the replica with the changed gen stamp will be reported // when this DN starts up. MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0); // Restart the namenode so that when the DN comes up it will see an initial // block report. cluster.RestartNameNode(1, false); NUnit.Framework.Assert.IsTrue(cluster.RestartDataNode(dnProps, true)); // Wait until the standby NN queues up the corrupt block in the pending DN // message queue. while (cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount( ) < 1) { ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); } NUnit.Framework.Assert.AreEqual(1, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount ()); string oldStorageId = GetRegisteredDatanodeUid(cluster, 1); // Reformat/restart the DN. NUnit.Framework.Assert.IsTrue(WipeAndRestartDn(cluster, 0)); // Give the DN time to start up and register, which will cause the // DatanodeManager to dissociate the old storage ID from the DN xfer addr. string newStorageId = string.Empty; do { ThreadUtil.SleepAtLeastIgnoreInterrupts(1000); newStorageId = GetRegisteredDatanodeUid(cluster, 1); System.Console.Out.WriteLine("====> oldStorageId: " + oldStorageId + " newStorageId: " + newStorageId); }while (newStorageId.Equals(oldStorageId)); NUnit.Framework.Assert.AreEqual(0, cluster.GetNamesystem(1).GetBlockManager().GetPendingDataNodeMessageCount ()); // Now try to fail over. cluster.TransitionToStandby(0); cluster.TransitionToActive(1); } finally { cluster.Shutdown(); } }
public virtual void TestDFSAddressConfig() { Configuration conf = new HdfsConfiguration(); /*------------------------------------------------------------------------- * By default, the DataNode socket address should be localhost (127.0.0.1). *------------------------------------------------------------------------*/ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).Build(); cluster.WaitActive(); AList <DataNode> dns = cluster.GetDataNodes(); DataNode dn = dns[0]; string selfSocketAddr = dn.GetXferAddress().ToString(); System.Console.Out.WriteLine("DN Self Socket Addr == " + selfSocketAddr); NUnit.Framework.Assert.IsTrue(selfSocketAddr.Contains("/127.0.0.1:")); /*------------------------------------------------------------------------- * Shut down the datanodes, reconfigure, and bring them back up. * Even if told to use the configuration properties for dfs.datanode, * MiniDFSCluster.startDataNodes() should use localhost as the default if * the dfs.datanode properties are not set. *------------------------------------------------------------------------*/ for (int i = 0; i < dns.Count; i++) { MiniDFSCluster.DataNodeProperties dnp = cluster.StopDataNode(i); NUnit.Framework.Assert.IsNotNull("Should have been able to stop simulated datanode" , dnp); } conf.Unset(DFSConfigKeys.DfsDatanodeAddressKey); conf.Unset(DFSConfigKeys.DfsDatanodeHttpAddressKey); conf.Unset(DFSConfigKeys.DfsDatanodeIpcAddressKey); cluster.StartDataNodes(conf, 1, true, HdfsServerConstants.StartupOption.Regular, null, null, null, false, true); dns = cluster.GetDataNodes(); dn = dns[0]; selfSocketAddr = dn.GetXferAddress().ToString(); System.Console.Out.WriteLine("DN Self Socket Addr == " + selfSocketAddr); // assert that default self socket address is 127.0.0.1 NUnit.Framework.Assert.IsTrue(selfSocketAddr.Contains("/127.0.0.1:")); /*------------------------------------------------------------------------- * Shut down the datanodes, reconfigure, and bring them back up. * This time, modify the dfs.datanode properties and make sure that they * are used to configure sockets by MiniDFSCluster.startDataNodes(). *------------------------------------------------------------------------*/ for (int i_1 = 0; i_1 < dns.Count; i_1++) { MiniDFSCluster.DataNodeProperties dnp = cluster.StopDataNode(i_1); NUnit.Framework.Assert.IsNotNull("Should have been able to stop simulated datanode" , dnp); } conf.Set(DFSConfigKeys.DfsDatanodeAddressKey, "0.0.0.0:0"); conf.Set(DFSConfigKeys.DfsDatanodeHttpAddressKey, "0.0.0.0:0"); conf.Set(DFSConfigKeys.DfsDatanodeIpcAddressKey, "0.0.0.0:0"); cluster.StartDataNodes(conf, 1, true, HdfsServerConstants.StartupOption.Regular, null, null, null, false, true); dns = cluster.GetDataNodes(); dn = dns[0]; selfSocketAddr = dn.GetXferAddress().ToString(); System.Console.Out.WriteLine("DN Self Socket Addr == " + selfSocketAddr); // assert that default self socket address is 0.0.0.0 NUnit.Framework.Assert.IsTrue(selfSocketAddr.Contains("/0.0.0.0:")); cluster.Shutdown(); }
/// <exception cref="System.IO.IOException"/> public virtual void TestExcludedNodesForgiveness() { // Forgive nodes in under 2.5s for this test case. conf.SetLong(DFSConfigKeys.DfsClientWriteExcludeNodesCacheExpiryInterval, 2500); // We'll be using a 512 bytes block size just for tests // so making sure the checksum bytes too match it. conf.SetInt("io.bytes.per.checksum", 512); cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); IList <MiniDFSCluster.DataNodeProperties> props = cluster.dataNodes; FileSystem fs = cluster.GetFileSystem(); Path filePath = new Path("/testForgivingExcludedNodes"); // 256 bytes data chunk for writes byte[] bytes = new byte[256]; for (int index = 0; index < bytes.Length; index++) { bytes[index] = (byte)('0'); } // File with a 512 bytes block size FSDataOutputStream @out = fs.Create(filePath, true, 4096, (short)3, 512); // Write a block to all 3 DNs (2x256bytes). @out.Write(bytes); @out.Write(bytes); @out.Hflush(); // Remove two DNs, to put them into the exclude list. MiniDFSCluster.DataNodeProperties two = cluster.StopDataNode(2); MiniDFSCluster.DataNodeProperties one = cluster.StopDataNode(1); // Write another block. // At this point, we have two nodes already in excluded list. @out.Write(bytes); @out.Write(bytes); @out.Hflush(); // Bring back the older DNs, since they are gonna be forgiven only // afterwards of this previous block write. NUnit.Framework.Assert.AreEqual(true, cluster.RestartDataNode(one, true)); NUnit.Framework.Assert.AreEqual(true, cluster.RestartDataNode(two, true)); cluster.WaitActive(); // Sleep for 5s, to let the excluded nodes be expired // from the excludes list (i.e. forgiven after the configured wait period). // [Sleeping just in case the restart of the DNs completed < 5s cause // otherwise, we'll end up quickly excluding those again.] ThreadUtil.SleepAtLeastIgnoreInterrupts(5000); // Terminate the last good DN, to assert that there's no // single-DN-available scenario, caused by not forgiving the other // two by now. cluster.StopDataNode(0); try { // Attempt writing another block, which should still pass // cause the previous two should have been forgiven by now, // while the last good DN added to excludes this time. @out.Write(bytes); @out.Hflush(); @out.Close(); } catch (Exception e) { NUnit.Framework.Assert.Fail("Excluded DataNodes should be forgiven after a while and " + "not cause file writing exception of: '" + e.Message + "'"); } }