public virtual void TestBlocksRemovedWhileInSafeModeEditsArriveFirst() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); DFSTestUtil.CreateFile(fs, new Path("/test"), 10 * BlockSize, (short)3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup. nn0.GetRpcServer().RollEditLog(); Banner("Restarting standby"); RestartStandby(); // It will initially have all of the blocks necessary. string status = nn1.GetNamesystem().GetSafemode(); NUnit.Framework.Assert.IsTrue("Bad safemode status: '" + status + "'", status.StartsWith ("Safe mode is ON. The reported blocks 10 has reached the threshold " + "0.9990 of total blocks 10. The number of live datanodes 3 has " + "reached the minimum number 0. In safe mode extension. " + "Safe mode will be turned off automatically" )); // Delete those blocks while the SBN is in safe mode. // Immediately roll the edit log before the actual deletions are sent // to the DNs. Banner("Removing the blocks without rolling the edit log"); fs.Delete(new Path("/test"), true); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); // Should see removal of the blocks as well as their contribution to safe block count. AssertSafeMode(nn1, 0, 0, 3, 0); Banner("Triggering sending deletions to DNs and Deletion Reports"); BlockManagerTestUtil.ComputeAllPendingWork(nn0.GetNamesystem().GetBlockManager()); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); // No change in assertion status here, but some of the consistency checks // in safemode will fire here if we accidentally decrement safe block count // below 0. AssertSafeMode(nn1, 0, 0, 3, 0); }
public virtual void TestBlocksRemovedWhileInSafeMode() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); DFSTestUtil.CreateFile(fs, new Path("/test"), 10 * BlockSize, (short)3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup. nn0.GetRpcServer().RollEditLog(); Banner("Restarting standby"); RestartStandby(); // It will initially have all of the blocks necessary. AssertSafeMode(nn1, 10, 10, 3, 0); // Delete those blocks while the SBN is in safe mode. // This doesn't affect the SBN, since deletions are not // ACKed when due to block removals. Banner("Removing the blocks without rolling the edit log"); fs.Delete(new Path("/test"), true); BlockManagerTestUtil.ComputeAllPendingWork(nn0.GetNamesystem().GetBlockManager()); Banner("Triggering deletions on DNs and Deletion Reports"); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); AssertSafeMode(nn1, 10, 10, 3, 0); // When we catch up to active namespace, it will restore back // to 0 blocks. Banner("Waiting for standby to catch up to active namespace"); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); AssertSafeMode(nn1, 0, 0, 3, 0); }
/// <exception cref="System.Exception"/> public virtual void TestStandbyIsHot() { Configuration conf = new Configuration(); // We read from the standby to watch block locations HAUtil.SetAllowStandbyReads(conf, true); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); NameNode nn1 = cluster.GetNameNode(0); NameNode nn2 = cluster.GetNameNode(1); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); Sharpen.Thread.Sleep(1000); System.Console.Error.WriteLine("=================================="); DFSTestUtil.WriteFile(fs, TestFilePath, TestFileData); // Have to force an edit log roll so that the standby catches up nn1.GetRpcServer().RollEditLog(); System.Console.Error.WriteLine("=================================="); // Block locations should show up on standby. Log.Info("Waiting for block locations to appear on standby node"); WaitForBlockLocations(cluster, nn2, TestFile, 3); // Trigger immediate heartbeats and block reports so // that the active "trusts" all of the DNs cluster.TriggerHeartbeats(); cluster.TriggerBlockReports(); // Change replication Log.Info("Changing replication to 1"); fs.SetReplication(TestFilePath, (short)1); BlockManagerTestUtil.ComputeAllPendingWork(nn1.GetNamesystem().GetBlockManager()); WaitForBlockLocations(cluster, nn1, TestFile, 1); nn1.GetRpcServer().RollEditLog(); Log.Info("Waiting for lowered replication to show up on standby"); WaitForBlockLocations(cluster, nn2, TestFile, 1); // Change back to 3 Log.Info("Changing replication to 3"); fs.SetReplication(TestFilePath, (short)3); BlockManagerTestUtil.ComputeAllPendingWork(nn1.GetNamesystem().GetBlockManager()); nn1.GetRpcServer().RollEditLog(); Log.Info("Waiting for higher replication to show up on standby"); WaitForBlockLocations(cluster, nn2, TestFile, 3); } finally { cluster.Shutdown(); } }
/// <exception cref="System.Exception"/> public override void DoAnAction() { foreach (DataNode dn in this._enclosing.cluster.GetDataNodes()) { DataNodeTestUtils.TriggerDeletionReport(dn); DataNodeTestUtils.TriggerHeartbeat(dn); } for (int i = 0; i < 2; i++) { NameNode nn = this._enclosing.cluster.GetNameNode(i); BlockManagerTestUtil.ComputeAllPendingWork(nn.GetNamesystem().GetBlockManager()); } Sharpen.Thread.Sleep(interval); }
public virtual void TestAppendWhileInSafeMode() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); // Make 4.5 blocks so that append() will re-open an existing block // instead of just adding a new one DFSTestUtil.CreateFile(fs, new Path("/test"), 4 * BlockSize + BlockSize / 2, (short )3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup. nn0.GetRpcServer().RollEditLog(); Banner("Restarting standby"); RestartStandby(); // It will initially have all of the blocks necessary. AssertSafeMode(nn1, 5, 5, 3, 0); // Append to a block while SBN is in safe mode. This should // not affect safemode initially, since the DN message // will get queued. FSDataOutputStream stm = fs.Append(new Path("/test")); try { AssertSafeMode(nn1, 5, 5, 3, 0); // if we roll edits now, the SBN should see that it's under construction // and change its total count and safe count down by one, since UC // blocks are not counted by safe mode. HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); AssertSafeMode(nn1, 4, 4, 3, 0); } finally { IOUtils.CloseStream(stm); } // Delete those blocks while the SBN is in safe mode. // This will not ACK the deletions to the SBN, so it won't // notice until we roll the edit log. Banner("Removing the blocks without rolling the edit log"); fs.Delete(new Path("/test"), true); BlockManagerTestUtil.ComputeAllPendingWork(nn0.GetNamesystem().GetBlockManager()); Banner("Triggering deletions on DNs and Deletion Reports"); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); AssertSafeMode(nn1, 4, 4, 3, 0); // When we roll the edit log, the deletions will go through. Banner("Waiting for standby to catch up to active namespace"); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); AssertSafeMode(nn1, 0, 0, 3, 0); }
public virtual void TestBlocksRemovedBeforeStandbyRestart() { Banner("Starting with NN0 active and NN1 standby, creating some blocks"); DFSTestUtil.CreateFile(fs, new Path("/test"), 5 * BlockSize, (short)3, 1L); // Roll edit log so that, when the SBN restarts, it will load // the namespace during startup. nn0.GetRpcServer().RollEditLog(); // Delete those blocks again, so they won't get reported to the SBN // once it starts up Banner("Removing the blocks without rolling the edit log"); fs.Delete(new Path("/test"), true); BlockManagerTestUtil.ComputeAllPendingWork(nn0.GetNamesystem().GetBlockManager()); cluster.TriggerHeartbeats(); Banner("Restarting standby"); RestartStandby(); AssertSafeMode(nn1, 0, 5, 3, 0); Banner("Waiting for standby to catch up to active namespace"); HATestUtil.WaitForStandbyToCatchUp(nn0, nn1); AssertSafeMode(nn1, 0, 0, 3, 0); }
/// <exception cref="System.Exception"/> public virtual void TestInvalidateBlock() { Configuration conf = new Configuration(); HAUtil.SetAllowStandbyReads(conf, true); conf.SetInt(DFSConfigKeys.DfsHaTaileditsPeriodKey, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology .SimpleHATopology()).NumDataNodes(3).Build(); try { cluster.WaitActive(); cluster.TransitionToActive(0); NameNode nn1 = cluster.GetNameNode(0); NameNode nn2 = cluster.GetNameNode(1); FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf); Sharpen.Thread.Sleep(1000); Log.Info("=================================="); DFSTestUtil.WriteFile(fs, TestFilePath, TestFileData); // Have to force an edit log roll so that the standby catches up nn1.GetRpcServer().RollEditLog(); Log.Info("=================================="); // delete the file fs.Delete(TestFilePath, false); BlockManagerTestUtil.ComputeAllPendingWork(nn1.GetNamesystem().GetBlockManager()); nn1.GetRpcServer().RollEditLog(); // standby nn doesn't need to invalidate blocks. NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetBlockManager().GetPendingDeletionBlocksCount ()); cluster.TriggerHeartbeats(); cluster.TriggerBlockReports(); // standby nn doesn't need to invalidate blocks. NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetBlockManager().GetPendingDeletionBlocksCount ()); } finally { cluster.Shutdown(); } }
public virtual void TestNormalReplicaOffline() { // Stop the datanode hosting the NORMAL replica cluster.StopDataNode(normalDataNode.GetXferAddr()); // Force NameNode to detect that the datanode is down BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), normalDataNode.GetXferAddr ()); // The live replica count should now be zero (since the NORMAL replica is offline) NumberReplicas numberReplicas = blockManager.CountNodes(block); Assert.AssertThat(numberReplicas.LiveReplicas(), CoreMatchers.Is(0)); // The block should be reported as under-replicated BlockManagerTestUtil.UpdateState(blockManager); Assert.AssertThat(blockManager.GetUnderReplicatedBlocksCount(), CoreMatchers.Is(1L )); // The BlockManager should be able to heal the replication count back to 1 // by triggering an inter-datanode replication from one of the READ_ONLY_SHARED replicas BlockManagerTestUtil.ComputeAllPendingWork(blockManager); DFSTestUtil.WaitForReplication(cluster, extendedBlock, 1, 1, 0); // There should now be 2 *locations* for the block, and 1 *replica* Assert.AssertThat(GetLocatedBlock().GetLocations().Length, CoreMatchers.Is(2)); ValidateNumberReplicas(1); }