/// <summary> /// verifies two things: /// 1. /// </summary> /// <remarks> /// verifies two things: /// 1. number of locations of each block in the name node /// matches number of actual files /// 2. block files + pending block equals to total number of blocks that a file has /// including the replication (HDFS file has 30 blocks, repl=2 - total 60 /// </remarks> /// <param name="fn">- file name</param> /// <param name="fs">- file size</param> /// <exception cref="System.IO.IOException"/> private void Verify(string fn, int fs) { // now count how many physical blocks are there int totalReal = CountRealBlocks(block_map); System.Console.Out.WriteLine("countRealBlocks counted " + totalReal + " blocks"); // count how many blocks store in NN structures. int totalNN = CountNNBlocks(block_map, fn, fs); System.Console.Out.WriteLine("countNNBlocks counted " + totalNN + " blocks"); foreach (string bid in block_map.Keys) { TestDataNodeVolumeFailure.BlockLocs bl = block_map[bid]; // System.out.println(bid + "->" + bl.num_files + "vs." + bl.num_locs); // number of physical files (1 or 2) should be same as number of datanodes // in the list of the block locations NUnit.Framework.Assert.AreEqual("Num files should match num locations", bl.num_files , bl.num_locs); } NUnit.Framework.Assert.AreEqual("Num physical blocks should match num stored in the NN" , totalReal, totalNN); // now check the number of under-replicated blocks FSNamesystem fsn = cluster.GetNamesystem(); // force update of all the metric counts by calling computeDatanodeWork BlockManagerTestUtil.GetComputedDatanodeWork(fsn.GetBlockManager()); // get all the counts long underRepl = fsn.GetUnderReplicatedBlocks(); long pendRepl = fsn.GetPendingReplicationBlocks(); long totalRepl = underRepl + pendRepl; System.Console.Out.WriteLine("underreplicated after = " + underRepl + " and pending repl =" + pendRepl + "; total underRepl = " + totalRepl); System.Console.Out.WriteLine("total blocks (real and replicating):" + (totalReal + totalRepl) + " vs. all files blocks " + blocks_num * 2); // together all the blocks should be equal to all real + all underreplicated NUnit.Framework.Assert.AreEqual("Incorrect total block count", totalReal + totalRepl , blocks_num * repl); }
public virtual void TestNNClearsCommandsOnFailoverWithReplChanges() { // Make lots of blocks to increase chances of triggering a bug. DFSTestUtil.CreateFile(fs, TestFilePath, 30 * SmallBlock, (short)1, 1L); Banner("rolling NN1's edit log, forcing catch-up"); HATestUtil.WaitForStandbyToCatchUp(nn1, nn2); // Get some new replicas reported so that NN2 now considers // them over-replicated and schedules some more deletions nn1.GetRpcServer().SetReplication(TestFile, (short)2); while (BlockManagerTestUtil.GetComputedDatanodeWork(nn1.GetNamesystem().GetBlockManager ()) > 0) { Log.Info("Getting more replication work computed"); } BlockManager bm1 = nn1.GetNamesystem().GetBlockManager(); while (bm1.GetPendingReplicationBlocksCount() > 0) { BlockManagerTestUtil.UpdateState(bm1); cluster.TriggerHeartbeats(); Sharpen.Thread.Sleep(1000); } Banner("triggering BRs"); cluster.TriggerBlockReports(); nn1.GetRpcServer().SetReplication(TestFile, (short)1); Banner("computing invalidation on nn1"); BlockManagerTestUtil.ComputeInvalidationWork(nn1.GetNamesystem().GetBlockManager( )); DoMetasave(nn1); Banner("computing invalidation on nn2"); BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager( )); DoMetasave(nn2); // Dump some info for debugging purposes. Banner("Metadata immediately before failover"); DoMetasave(nn2); // Transition nn2 to active even though nn1 still thinks it's active Banner("Failing to NN2 but let NN1 continue to think it's active"); NameNodeAdapter.AbortEditLogs(nn1); NameNodeAdapter.EnterSafeMode(nn1, false); BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager( )); cluster.TransitionToActive(1); // Check that the standby picked up the replication change. NUnit.Framework.Assert.AreEqual(1, nn2.GetRpcServer().GetFileInfo(TestFile).GetReplication ()); // Dump some info for debugging purposes. Banner("Metadata immediately after failover"); DoMetasave(nn2); Banner("Triggering heartbeats and block reports so that fencing is completed"); cluster.TriggerHeartbeats(); cluster.TriggerBlockReports(); Banner("Metadata after nodes have all block-reported"); DoMetasave(nn2); // Force a rescan of postponedMisreplicatedBlocks. BlockManager nn2BM = nn2.GetNamesystem().GetBlockManager(); BlockManagerTestUtil.CheckHeartbeat(nn2BM); BlockManagerTestUtil.RescanPostponedMisreplicatedBlocks(nn2BM); // The block should no longer be postponed. NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPostponedMisreplicatedBlocks ()); // Wait for NN2 to enact its deletions (replication monitor has to run, etc) BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager( )); HATestUtil.WaitForNNToIssueDeletions(nn2); cluster.TriggerHeartbeats(); HATestUtil.WaitForDNDeletions(cluster); cluster.TriggerDeletionReports(); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetUnderReplicatedBlocks() ); NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPendingReplicationBlocks ()); Banner("Making sure the file is still readable"); FileSystem fs2 = cluster.GetFileSystem(1); DFSTestUtil.ReadFile(fs2, TestFilePath); }
/// <summary>Test write a file, verifies and closes it.</summary> /// <remarks> /// Test write a file, verifies and closes it. Then a couple of random blocks /// is removed and BlockReport is forced; the FSNamesystem is pushed to /// recalculate required DN's activities such as replications and so on. /// The number of missing and under-replicated blocks should be the same in /// case of a single-DN cluster. /// </remarks> /// <exception cref="System.IO.IOException">in case of errors</exception> public virtual void BlockReport_02() { string MethodName = GenericTestUtils.GetMethodName(); Log.Info("Running test " + MethodName); Path filePath = new Path("/" + MethodName + ".dat"); DFSTestUtil.CreateFile(fs, filePath, FileSize, ReplFactor, rand.NextLong()); // mock around with newly created blocks and delete some FilePath dataDir = new FilePath(cluster.GetDataDirectory()); NUnit.Framework.Assert.IsTrue(dataDir.IsDirectory()); IList <ExtendedBlock> blocks2Remove = new AList <ExtendedBlock>(); IList <int> removedIndex = new AList <int>(); IList <LocatedBlock> lBlocks = cluster.GetNameNodeRpc().GetBlockLocations(filePath .ToString(), FileStart, FileSize).GetLocatedBlocks(); while (removedIndex.Count != 2) { int newRemoveIndex = rand.Next(lBlocks.Count); if (!removedIndex.Contains(newRemoveIndex)) { removedIndex.AddItem(newRemoveIndex); } } foreach (int aRemovedIndex in removedIndex) { blocks2Remove.AddItem(lBlocks[aRemovedIndex].GetBlock()); } if (Log.IsDebugEnabled()) { Log.Debug("Number of blocks allocated " + lBlocks.Count); } DataNode dn0 = cluster.GetDataNodes()[DnN0]; foreach (ExtendedBlock b in blocks2Remove) { if (Log.IsDebugEnabled()) { Log.Debug("Removing the block " + b.GetBlockName()); } foreach (FilePath f in FindAllFiles(dataDir, new BlockReportTestBase.MyFileFilter (this, b.GetBlockName(), true))) { DataNodeTestUtils.GetFSDataset(dn0).UnfinalizeBlock(b); if (!f.Delete()) { Log.Warn("Couldn't delete " + b.GetBlockName()); } else { Log.Debug("Deleted file " + f.ToString()); } } } WaitTil(TimeUnit.Seconds.ToMillis(DnRescanExtraWait)); // all blocks belong to the same file, hence same BP string poolId = cluster.GetNamesystem().GetBlockPoolId(); DatanodeRegistration dnR = dn0.GetDNRegistrationForBP(poolId); StorageBlockReport[] reports = GetBlockReports(dn0, poolId, false, false); SendBlockReports(dnR, poolId, reports); BlockManagerTestUtil.GetComputedDatanodeWork(cluster.GetNamesystem().GetBlockManager ()); PrintStats(); NUnit.Framework.Assert.AreEqual("Wrong number of MissingBlocks is found", blocks2Remove .Count, cluster.GetNamesystem().GetMissingBlocksCount()); NUnit.Framework.Assert.AreEqual("Wrong number of UnderReplicatedBlocks is found", blocks2Remove.Count, cluster.GetNamesystem().GetUnderReplicatedBlocks()); }