public virtual void TestBlocksScheduledCounter() { cluster = new MiniDFSCluster.Builder(new HdfsConfiguration()).Build(); cluster.WaitActive(); fs = cluster.GetFileSystem(); //open a file an write a few bytes: FSDataOutputStream @out = fs.Create(new Path("/testBlockScheduledCounter")); for (int i = 0; i < 1024; i++) { @out.Write(i); } // flush to make sure a block is allocated. @out.Hflush(); AList <DatanodeDescriptor> dnList = new AList <DatanodeDescriptor>(); DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager (); dm.FetchDatanodes(dnList, dnList, false); DatanodeDescriptor dn = dnList[0]; NUnit.Framework.Assert.AreEqual(1, dn.GetBlocksScheduled()); // close the file and the counter should go to zero. @out.Close(); NUnit.Framework.Assert.AreEqual(0, dn.GetBlocksScheduled()); }
/// <summary>Verify the support for decommissioning a datanode that is already dead.</summary> /// <remarks> /// Verify the support for decommissioning a datanode that is already dead. /// Under this scenario the datanode should immediately be marked as /// DECOMMISSIONED /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestDecommissionDeadDN() { Logger log = Logger.GetLogger(typeof(DecommissionManager)); log.SetLevel(Level.Debug); DatanodeID dnID = cluster.GetDataNodes()[0].GetDatanodeId(); string dnName = dnID.GetXferAddr(); MiniDFSCluster.DataNodeProperties stoppedDN = cluster.StopDataNode(0); DFSTestUtil.WaitForDatanodeState(cluster, dnID.GetDatanodeUuid(), false, 30000); FSNamesystem fsn = cluster.GetNamesystem(); DatanodeManager dm = fsn.GetBlockManager().GetDatanodeManager(); DatanodeDescriptor dnDescriptor = dm.GetDatanode(dnID); DecommissionNode(fsn, localFileSys, dnName); dm.RefreshNodes(conf); BlockManagerTestUtil.RecheckDecommissionState(dm); NUnit.Framework.Assert.IsTrue(dnDescriptor.IsDecommissioned()); // Add the node back cluster.RestartDataNode(stoppedDN, true); cluster.WaitActive(); // Call refreshNodes on FSNamesystem with empty exclude file to remove the // datanode from decommissioning list and make it available again. WriteConfigFile(localFileSys, excludeFile, null); dm.RefreshNodes(conf); }
/// <summary> /// Checks NameNode tracking of a particular DataNode for correct reporting of /// failed volumes. /// </summary> /// <param name="dm">DatanodeManager to check</param> /// <param name="dn">DataNode to check</param> /// <param name="expectCapacityKnown"> /// if true, then expect that the capacities of the /// volumes were known before the failures, and therefore the lost capacity /// can be reported /// </param> /// <param name="expectedFailedVolumes">expected locations of failed volumes</param> /// <exception cref="System.Exception">if there is any failure</exception> private void CheckFailuresAtNameNode(DatanodeManager dm, DataNode dn, bool expectCapacityKnown , params string[] expectedFailedVolumes) { DatanodeDescriptor dd = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager ().GetDatanode(dn.GetDatanodeId()); NUnit.Framework.Assert.AreEqual(expectedFailedVolumes.Length, dd.GetVolumeFailures ()); VolumeFailureSummary volumeFailureSummary = dd.GetVolumeFailureSummary(); if (expectedFailedVolumes.Length > 0) { Assert.AssertArrayEquals(expectedFailedVolumes, volumeFailureSummary.GetFailedStorageLocations ()); NUnit.Framework.Assert.IsTrue(volumeFailureSummary.GetLastVolumeFailureDate() > 0 ); long expectedCapacityLost = GetExpectedCapacityLost(expectCapacityKnown, expectedFailedVolumes .Length); NUnit.Framework.Assert.AreEqual(expectedCapacityLost, volumeFailureSummary.GetEstimatedCapacityLostTotal ()); } else { NUnit.Framework.Assert.IsNull(volumeFailureSummary); } }
public virtual void TestStaleNodes() { // Set two datanodes as stale for (int i = 0; i < 2; i++) { DataNode dn = cluster.GetDataNodes()[i]; DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true); long staleInterval = Conf.GetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey , DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalDefault); DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager( ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, -(staleInterval + 1)); } // Let HeartbeatManager to check heartbeat BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager ()); MetricsAsserts.AssertGauge("StaleDataNodes", 2, MetricsAsserts.GetMetrics(NsMetrics )); // Reset stale datanodes for (int i_1 = 0; i_1 < 2; i_1++) { DataNode dn = cluster.GetDataNodes()[i_1]; DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, false); DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager( ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, 0); } // Let HeartbeatManager to refresh BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager ()); MetricsAsserts.AssertGauge("StaleDataNodes", 0, MetricsAsserts.GetMetrics(NsMetrics )); }
private void CheckDecommissionStatus(DatanodeDescriptor decommNode, int expectedUnderRep , int expectedDecommissionOnly, int expectedUnderRepInOpenFiles) { NUnit.Framework.Assert.AreEqual("Unexpected num under-replicated blocks", expectedUnderRep , decommNode.decommissioningStatus.GetUnderReplicatedBlocks()); NUnit.Framework.Assert.AreEqual("Unexpected number of decom-only replicas", expectedDecommissionOnly , decommNode.decommissioningStatus.GetDecommissionOnlyReplicas()); NUnit.Framework.Assert.AreEqual("Unexpected number of replicas in under-replicated open files" , expectedUnderRepInOpenFiles, decommNode.decommissioningStatus.GetUnderReplicatedInOpenFiles ()); }
public virtual void TestContains() { DatanodeDescriptor nodeNotInMap = DFSTestUtil.GetDatanodeDescriptor("8.8.8.8", "/d2/r4" ); for (int i = 0; i < dataNodes.Length; i++) { NUnit.Framework.Assert.IsTrue(cluster.Contains(dataNodes[i])); } NUnit.Framework.Assert.IsFalse(cluster.Contains(nodeNotInMap)); }
public virtual void TestCreateInvalidTopology() { NetworkTopology invalCluster = new NetworkTopology(); DatanodeDescriptor[] invalDataNodes = new DatanodeDescriptor[] { DFSTestUtil.GetDatanodeDescriptor ("1.1.1.1", "/d1/r1"), DFSTestUtil.GetDatanodeDescriptor("2.2.2.2", "/d1/r1"), DFSTestUtil .GetDatanodeDescriptor("3.3.3.3", "/d1") }; invalCluster.Add(invalDataNodes[0]); invalCluster.Add(invalDataNodes[1]); try { invalCluster.Add(invalDataNodes[2]); NUnit.Framework.Assert.Fail("expected InvalidTopologyException"); } catch (NetworkTopology.InvalidTopologyException e) { NUnit.Framework.Assert.IsTrue(e.Message.StartsWith("Failed to add ")); NUnit.Framework.Assert.IsTrue(e.Message.Contains("You cannot have a rack and a non-rack node at the same " + "level of the network topology.")); } }
/// <exception cref="System.IO.IOException"/> public static HeartbeatResponse SendHeartBeat(DatanodeRegistration nodeReg, DatanodeDescriptor dd, FSNamesystem namesystem) { return(namesystem.HandleHeartbeat(nodeReg, BlockManagerTestUtil.GetStorageReportsForDatanode (dd), dd.GetCacheCapacity(), dd.GetCacheRemaining(), 0, 0, 0, null)); }
/// <summary> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. /// </summary> /// <remarks> /// Test the scenario where the NN fails over after issuing a block /// synchronization request, but before it is committed. The /// DN running the recovery should then fail to commit the synchronization /// and a later retry will succeed. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestFailoverRightBeforeCommitSynchronization() { Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); Sharpen.Thread.Sleep(500); Log.Info("Starting with NN 0 active"); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); stm = fs.Create(TestPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); // Look into the block manager on the active node for the block // under construction. NameNode nn0 = cluster.GetNameNode(0); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, TestPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN , nn0); // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf); NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath)); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Failing over to NN 1"); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); // Let the commitBlockSynchronization call go through, and check that // it failed with the correct exception. delayer.Proceed(); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t == null) { NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby" ); } GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported" , t); // Now, if we try again to recover the block, it should succeed on the new // active. LoopRecoverLease(fsOtherUser, TestPath); AppendTestUtil.Check(fs, TestPath, BlockSize / 2); } finally { IOUtils.CloseStream(stm); cluster.Shutdown(); } }
public virtual void TestDeletedBlockWhenAddBlockIsInEdit() { Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology ()).NumDataNodes(1).Build(); DFSClient client = null; try { cluster.WaitActive(); NUnit.Framework.Assert.AreEqual("Number of namenodes is not 2", 2, cluster.GetNumNameNodes ()); // Transitioning the namenode 0 to active. cluster.TransitionToActive(0); NUnit.Framework.Assert.IsTrue("Namenode 0 should be in active state", cluster.GetNameNode (0).IsActiveState()); NUnit.Framework.Assert.IsTrue("Namenode 1 should be in standby state", cluster.GetNameNode (1).IsStandbyState()); // Trigger heartbeat to mark DatanodeStorageInfo#heartbeatedSinceFailover // to true. DataNodeTestUtils.TriggerHeartbeat(cluster.GetDataNodes()[0]); FileSystem fs = cluster.GetFileSystem(0); // Trigger blockReport to mark DatanodeStorageInfo#blockContentsStale // to false. cluster.GetDataNodes()[0].TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental (false).Build()); Path fileName = new Path("/tmp.txt"); // create a file with one block DFSTestUtil.CreateFile(fs, fileName, 10L, (short)1, 1234L); DFSTestUtil.WaitReplication(fs, fileName, (short)1); client = new DFSClient(cluster.GetFileSystem(0).GetUri(), conf); IList <LocatedBlock> locatedBlocks = client.GetNamenode().GetBlockLocations("/tmp.txt" , 0, 10L).GetLocatedBlocks(); NUnit.Framework.Assert.IsTrue(locatedBlocks.Count == 1); NUnit.Framework.Assert.IsTrue(locatedBlocks[0].GetLocations().Length == 1); // add a second datanode to the cluster cluster.StartDataNodes(conf, 1, true, null, null, null, null); NUnit.Framework.Assert.AreEqual("Number of datanodes should be 2", 2, cluster.GetDataNodes ().Count); DataNode dn0 = cluster.GetDataNodes()[0]; DataNode dn1 = cluster.GetDataNodes()[1]; string activeNNBPId = cluster.GetNamesystem(0).GetBlockPoolId(); DatanodeDescriptor sourceDnDesc = NameNodeAdapter.GetDatanode(cluster.GetNamesystem (0), dn0.GetDNRegistrationForBP(activeNNBPId)); DatanodeDescriptor destDnDesc = NameNodeAdapter.GetDatanode(cluster.GetNamesystem (0), dn1.GetDNRegistrationForBP(activeNNBPId)); ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName); Log.Info("replaceBlock: " + ReplaceBlock(block, (DatanodeInfo)sourceDnDesc, (DatanodeInfo )sourceDnDesc, (DatanodeInfo)destDnDesc)); // Waiting for the FsDatasetAsyncDsikService to delete the block Sharpen.Thread.Sleep(3000); // Triggering the incremental block report to report the deleted block to // namnemode cluster.GetDataNodes()[0].TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental (true).Build()); cluster.TransitionToStandby(0); cluster.TransitionToActive(1); NUnit.Framework.Assert.IsTrue("Namenode 1 should be in active state", cluster.GetNameNode (1).IsActiveState()); NUnit.Framework.Assert.IsTrue("Namenode 0 should be in standby state", cluster.GetNameNode (0).IsStandbyState()); client.Close(); // Opening a new client for new active namenode client = new DFSClient(cluster.GetFileSystem(1).GetUri(), conf); IList <LocatedBlock> locatedBlocks1 = client.GetNamenode().GetBlockLocations("/tmp.txt" , 0, 10L).GetLocatedBlocks(); NUnit.Framework.Assert.AreEqual(1, locatedBlocks1.Count); NUnit.Framework.Assert.AreEqual("The block should be only on 1 datanode ", 1, locatedBlocks1 [0].GetLocations().Length); } finally { IOUtils.Cleanup(null, client); cluster.Shutdown(); } }
public virtual void TestReadSelectNonStaleDatanode() { HdfsConfiguration conf = new HdfsConfiguration(); conf.SetBoolean(DFSConfigKeys.DfsNamenodeAvoidStaleDatanodeForReadKey, true); long staleInterval = 30 * 1000 * 60; conf.SetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey, staleInterval); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes ).Racks(racks).Build(); cluster.WaitActive(); IPEndPoint addr = new IPEndPoint("localhost", cluster.GetNameNodePort()); DFSClient client = new DFSClient(addr, conf); IList <DatanodeDescriptor> nodeInfoList = cluster.GetNameNode().GetNamesystem().GetBlockManager ().GetDatanodeManager().GetDatanodeListForReport(HdfsConstants.DatanodeReportType .Live); NUnit.Framework.Assert.AreEqual("Unexpected number of datanodes", numDatanodes, nodeInfoList .Count); FileSystem fileSys = cluster.GetFileSystem(); FSDataOutputStream stm = null; try { // do the writing but do not close the FSDataOutputStream // in order to mimic the ongoing writing Path fileName = new Path("/file1"); stm = fileSys.Create(fileName, true, fileSys.GetConf().GetInt(CommonConfigurationKeys .IoFileBufferSizeKey, 4096), (short)3, blockSize); stm.Write(new byte[(blockSize * 3) / 2]); // We do not close the stream so that // the writing seems to be still ongoing stm.Hflush(); LocatedBlocks blocks = client.GetNamenode().GetBlockLocations(fileName.ToString() , 0, blockSize); DatanodeInfo[] nodes = blocks.Get(0).GetLocations(); NUnit.Framework.Assert.AreEqual(nodes.Length, 3); DataNode staleNode = null; DatanodeDescriptor staleNodeInfo = null; // stop the heartbeat of the first node staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName()); NUnit.Framework.Assert.IsNotNull(staleNode); // set the first node as stale staleNodeInfo = cluster.GetNameNode().GetNamesystem().GetBlockManager().GetDatanodeManager ().GetDatanode(staleNode.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, -(staleInterval + 1)); LocatedBlocks blocksAfterStale = client.GetNamenode().GetBlockLocations(fileName. ToString(), 0, blockSize); DatanodeInfo[] nodesAfterStale = blocksAfterStale.Get(0).GetLocations(); NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3); NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName ()); // restart the staleNode's heartbeat DataNodeTestUtils.SetHeartbeatsDisabledForTests(staleNode, false); // reset the first node as non-stale, so as to avoid two stale nodes DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, 0); LocatedBlock lastBlock = client.GetLocatedBlocks(fileName.ToString(), 0, long.MaxValue ).GetLastLocatedBlock(); nodes = lastBlock.GetLocations(); NUnit.Framework.Assert.AreEqual(nodes.Length, 3); // stop the heartbeat of the first node for the last block staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName()); NUnit.Framework.Assert.IsNotNull(staleNode); // set the node as stale DatanodeDescriptor dnDesc = cluster.GetNameNode().GetNamesystem().GetBlockManager ().GetDatanodeManager().GetDatanode(staleNode.GetDatanodeId()); DFSTestUtil.ResetLastUpdatesWithOffset(dnDesc, -(staleInterval + 1)); LocatedBlock lastBlockAfterStale = client.GetLocatedBlocks(fileName.ToString(), 0 , long.MaxValue).GetLastLocatedBlock(); nodesAfterStale = lastBlockAfterStale.GetLocations(); NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3); NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName ()); } finally { if (stm != null) { stm.Close(); } client.Close(); cluster.Shutdown(); } }
public virtual void TestDecommissionStatus() { IPEndPoint addr = new IPEndPoint("localhost", cluster.GetNameNodePort()); DFSClient client = new DFSClient(addr, conf); DatanodeInfo[] info = client.DatanodeReport(HdfsConstants.DatanodeReportType.Live ); NUnit.Framework.Assert.AreEqual("Number of Datanodes ", 2, info.Length); DistributedFileSystem fileSys = cluster.GetFileSystem(); DFSAdmin admin = new DFSAdmin(cluster.GetConfiguration(0)); short replicas = numDatanodes; // // Decommission one node. Verify the decommission status // Path file1 = new Path("decommission.dat"); WriteFile(fileSys, file1, replicas); Path file2 = new Path("decommission1.dat"); FSDataOutputStream st1 = WriteIncompleteFile(fileSys, file2, replicas); foreach (DataNode d in cluster.GetDataNodes()) { DataNodeTestUtils.TriggerBlockReport(d); } FSNamesystem fsn = cluster.GetNamesystem(); DatanodeManager dm = fsn.GetBlockManager().GetDatanodeManager(); for (int iteration = 0; iteration < numDatanodes; iteration++) { string downnode = DecommissionNode(fsn, client, localFileSys, iteration); dm.RefreshNodes(conf); decommissionedNodes.AddItem(downnode); BlockManagerTestUtil.RecheckDecommissionState(dm); IList <DatanodeDescriptor> decommissioningNodes = dm.GetDecommissioningNodes(); if (iteration == 0) { NUnit.Framework.Assert.AreEqual(decommissioningNodes.Count, 1); DatanodeDescriptor decommNode = decommissioningNodes[0]; CheckDecommissionStatus(decommNode, 3, 0, 1); CheckDFSAdminDecommissionStatus(decommissioningNodes.SubList(0, 1), fileSys, admin ); } else { NUnit.Framework.Assert.AreEqual(decommissioningNodes.Count, 2); DatanodeDescriptor decommNode1 = decommissioningNodes[0]; DatanodeDescriptor decommNode2 = decommissioningNodes[1]; // This one is still 3,3,1 since it passed over the UC block // earlier, before node 2 was decommed CheckDecommissionStatus(decommNode1, 3, 3, 1); // This one is 4,4,2 since it has the full state CheckDecommissionStatus(decommNode2, 4, 4, 2); CheckDFSAdminDecommissionStatus(decommissioningNodes.SubList(0, 2), fileSys, admin ); } } // Call refreshNodes on FSNamesystem with empty exclude file. // This will remove the datanodes from decommissioning list and // make them available again. WriteConfigFile(localFileSys, excludeFile, null); dm.RefreshNodes(conf); st1.Close(); CleanupFile(fileSys, file1); CleanupFile(fileSys, file2); }
public virtual void TestSortByDistance() { DatanodeDescriptor[] testNodes = new DatanodeDescriptor[3]; // array contains both local node & local rack node testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[2]; testNodes[2] = dataNodes[0]; cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF))); cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length); NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[0]); NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[1]); NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[2]); // array contains both local node & local rack node & decommissioned node DatanodeDescriptor[] dtestNodes = new DatanodeDescriptor[5]; dtestNodes[0] = dataNodes[8]; dtestNodes[1] = dataNodes[12]; dtestNodes[2] = dataNodes[11]; dtestNodes[3] = dataNodes[9]; dtestNodes[4] = dataNodes[10]; cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF))); cluster.SortByDistance(dataNodes[8], dtestNodes, dtestNodes.Length - 2); NUnit.Framework.Assert.IsTrue(dtestNodes[0] == dataNodes[8]); NUnit.Framework.Assert.IsTrue(dtestNodes[1] == dataNodes[11]); NUnit.Framework.Assert.IsTrue(dtestNodes[2] == dataNodes[12]); NUnit.Framework.Assert.IsTrue(dtestNodes[3] == dataNodes[9]); NUnit.Framework.Assert.IsTrue(dtestNodes[4] == dataNodes[10]); // array contains local node testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[3]; testNodes[2] = dataNodes[0]; cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF))); cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length); NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[0]); NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[1]); NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[3]); // array contains local rack node testNodes[0] = dataNodes[5]; testNodes[1] = dataNodes[3]; testNodes[2] = dataNodes[1]; cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF))); cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length); NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[1]); NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[3]); NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[5]); // array contains local rack node which happens to be in position 0 testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[5]; testNodes[2] = dataNodes[3]; cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF))); cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length); NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[1]); NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[3]); NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[5]); // Same as previous, but with a different random seed to test randomization testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[5]; testNodes[2] = dataNodes[3]; cluster.SetRandomSeed(unchecked ((int)(0xDEAD))); cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length); // sortByDistance does not take the "data center" layer into consideration // and it doesn't sort by getDistance, so 1, 5, 3 is also valid here NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[1]); NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[5]); NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[3]); // Array of just rack-local nodes // Expect a random first node DatanodeDescriptor first = null; bool foundRandom = false; for (int i = 5; i <= 7; i++) { testNodes[0] = dataNodes[5]; testNodes[1] = dataNodes[6]; testNodes[2] = dataNodes[7]; cluster.SortByDistance(dataNodes[i], testNodes, testNodes.Length); if (first == null) { first = testNodes[0]; } else { if (first != testNodes[0]) { foundRandom = true; break; } } } NUnit.Framework.Assert.IsTrue("Expected to find a different first location", foundRandom ); // Array of just remote nodes // Expect random first node first = null; for (int i_1 = 1; i_1 <= 4; i_1++) { testNodes[0] = dataNodes[13]; testNodes[1] = dataNodes[14]; testNodes[2] = dataNodes[15]; cluster.SortByDistance(dataNodes[i_1], testNodes, testNodes.Length); if (first == null) { first = testNodes[0]; } else { if (first != testNodes[0]) { foundRandom = true; break; } } } NUnit.Framework.Assert.IsTrue("Expected to find a different first location", foundRandom ); }
/// <summary>Test race between delete operation and commitBlockSynchronization method. /// </summary> /// <remarks> /// Test race between delete operation and commitBlockSynchronization method. /// See HDFS-6825. /// </remarks> /// <param name="hasSnapshot"/> /// <exception cref="System.Exception"/> private void TestDeleteAndCommitBlockSynchronizationRace(bool hasSnapshot) { Log.Info("Start testing, hasSnapshot: " + hasSnapshot); AList <AbstractMap.SimpleImmutableEntry <string, bool> > testList = new AList <AbstractMap.SimpleImmutableEntry <string, bool> >(); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file", false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file1" , true)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file" , false)); testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file1" , true)); Path rootPath = new Path("/"); Configuration conf = new Configuration(); // Disable permissions so that another user can recover the lease. conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false); conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize); FSDataOutputStream stm = null; IDictionary <DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap = new Dictionary <DataNode, DatanodeProtocolClientSideTranslatorPB>(); try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build(); cluster.WaitActive(); DistributedFileSystem fs = cluster.GetFileSystem(); int stId = 0; foreach (AbstractMap.SimpleImmutableEntry <string, bool> stest in testList) { string testPath = stest.Key; bool mkSameDir = stest.Value; Log.Info("test on " + testPath + " mkSameDir: " + mkSameDir + " snapshot: " + hasSnapshot ); Path fPath = new Path(testPath); //find grandest non-root parent Path grandestNonRootParent = fPath; while (!grandestNonRootParent.GetParent().Equals(rootPath)) { grandestNonRootParent = grandestNonRootParent.GetParent(); } stm = fs.Create(fPath); Log.Info("test on " + testPath + " created " + fPath); // write a half block AppendTestUtil.Write(stm, 0, BlockSize / 2); stm.Hflush(); if (hasSnapshot) { SnapshotTestHelper.CreateSnapshot(fs, rootPath, "st" + stId.ToString()); ++stId; } // Look into the block manager on the active node for the block // under construction. NameNode nn = cluster.GetNameNode(); ExtendedBlock blk = DFSTestUtil.GetFirstBlock(fs, fPath); DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn, blk); Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary); // Find the corresponding DN daemon, and spy on its connection to the // active. DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort()); DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap[primaryDN]; if (nnSpy == null) { nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN, nn); dnMap[primaryDN] = nnSpy; } // Delay the commitBlockSynchronization call GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log); Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject (), (string[])Org.Mockito.Mockito.AnyObject()); // new genstamp // new length // close file // delete block // new targets // new target storages fs.RecoverLease(fPath); Log.Info("Waiting for commitBlockSynchronization call from primary"); delayer.WaitForCall(); Log.Info("Deleting recursively " + grandestNonRootParent); fs.Delete(grandestNonRootParent, true); if (mkSameDir && !grandestNonRootParent.ToString().Equals(testPath)) { Log.Info("Recreate dir " + grandestNonRootParent + " testpath: " + testPath); fs.Mkdirs(grandestNonRootParent); } delayer.Proceed(); Log.Info("Now wait for result"); delayer.WaitForResult(); Exception t = delayer.GetThrown(); if (t != null) { Log.Info("Result exception (snapshot: " + hasSnapshot + "): " + t); } } // end of loop each fPath Log.Info("Now check we can restart"); cluster.RestartNameNodes(); Log.Info("Restart finished"); } finally { if (stm != null) { IOUtils.CloseStream(stm); } if (cluster != null) { cluster.Shutdown(); } } }
public virtual void TestXceiverCount() { Configuration conf = new HdfsConfiguration(); // retry one time, if close fails conf.SetInt(DFSConfigKeys.DfsClientBlockWriteLocatefollowingblockRetriesKey, 1); MiniDFSCluster cluster = null; int nodes = 8; int fileCount = 5; short fileRepl = 3; try { cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(nodes).Build(); cluster.WaitActive(); FSNamesystem namesystem = cluster.GetNamesystem(); DatanodeManager dnm = namesystem.GetBlockManager().GetDatanodeManager(); IList <DataNode> datanodes = cluster.GetDataNodes(); DistributedFileSystem fs = cluster.GetFileSystem(); // trigger heartbeats in case not already sent TriggerHeartbeats(datanodes); // check that all nodes are live and in service int expectedTotalLoad = nodes; // xceiver server adds 1 to load int expectedInServiceNodes = nodes; int expectedInServiceLoad = nodes; CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); // shutdown half the nodes and force a heartbeat check to ensure // counts are accurate for (int i = 0; i < nodes / 2; i++) { DataNode dn = datanodes[i]; DatanodeDescriptor dnd = dnm.GetDatanode(dn.GetDatanodeId()); dn.Shutdown(); DFSTestUtil.SetDatanodeDead(dnd); BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager()); //Verify decommission of dead node won't impact nodesInService metrics. dnm.GetDecomManager().StartDecommission(dnd); expectedInServiceNodes--; NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, namesystem.GetNumLiveDataNodes ()); NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem )); //Verify recommission of dead node won't impact nodesInService metrics. dnm.GetDecomManager().StopDecommission(dnd); NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem )); } // restart the nodes to verify that counts are correct after // node re-registration cluster.RestartDataNodes(); cluster.WaitActive(); datanodes = cluster.GetDataNodes(); expectedInServiceNodes = nodes; NUnit.Framework.Assert.AreEqual(nodes, datanodes.Count); CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); // create streams and hsync to force datastreamers to start DFSOutputStream[] streams = new DFSOutputStream[fileCount]; for (int i_1 = 0; i_1 < fileCount; i_1++) { streams[i_1] = (DFSOutputStream)fs.Create(new Path("/f" + i_1), fileRepl).GetWrappedStream (); streams[i_1].Write(Sharpen.Runtime.GetBytesForString("1")); streams[i_1].Hsync(); // the load for writers is 2 because both the write xceiver & packet // responder threads are counted in the load expectedTotalLoad += 2 * fileRepl; expectedInServiceLoad += 2 * fileRepl; } // force nodes to send load update TriggerHeartbeats(datanodes); CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); // decomm a few nodes, substract their load from the expected load, // trigger heartbeat to force load update for (int i_2 = 0; i_2 < fileRepl; i_2++) { expectedInServiceNodes--; DatanodeDescriptor dnd = dnm.GetDatanode(datanodes[i_2].GetDatanodeId()); expectedInServiceLoad -= dnd.GetXceiverCount(); dnm.GetDecomManager().StartDecommission(dnd); DataNodeTestUtils.TriggerHeartbeat(datanodes[i_2]); Sharpen.Thread.Sleep(100); CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); } // check expected load while closing each stream. recalc expected // load based on whether the nodes in the pipeline are decomm for (int i_3 = 0; i_3 < fileCount; i_3++) { int decomm = 0; foreach (DatanodeInfo dni in streams[i_3].GetPipeline()) { DatanodeDescriptor dnd = dnm.GetDatanode(dni); expectedTotalLoad -= 2; if (dnd.IsDecommissionInProgress() || dnd.IsDecommissioned()) { decomm++; } else { expectedInServiceLoad -= 2; } } try { streams[i_3].Close(); } catch (IOException ioe) { // nodes will go decommissioned even if there's a UC block whose // other locations are decommissioned too. we'll ignore that // bug for now if (decomm < fileRepl) { throw; } } TriggerHeartbeats(datanodes); // verify node count and loads CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad); } // shutdown each node, verify node counts based on decomm state for (int i_4 = 0; i_4 < nodes; i_4++) { DataNode dn = datanodes[i_4]; dn.Shutdown(); // force it to appear dead so live count decreases DatanodeDescriptor dnDesc = dnm.GetDatanode(dn.GetDatanodeId()); DFSTestUtil.SetDatanodeDead(dnDesc); BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager()); NUnit.Framework.Assert.AreEqual(nodes - 1 - i_4, namesystem.GetNumLiveDataNodes() ); // first few nodes are already out of service if (i_4 >= fileRepl) { expectedInServiceNodes--; } NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem )); // live nodes always report load of 1. no nodes is load 0 double expectedXceiverAvg = (i_4 == nodes - 1) ? 0.0 : 1.0; NUnit.Framework.Assert.AreEqual((double)expectedXceiverAvg, GetInServiceXceiverAverage (namesystem), Epsilon); } // final sanity check CheckClusterHealth(0, namesystem, 0.0, 0, 0.0); } finally { if (cluster != null) { cluster.Shutdown(); } } }