예제 #1
0
        /// <summary>
        /// verifies two things:
        /// 1.
        /// </summary>
        /// <remarks>
        /// verifies two things:
        /// 1. number of locations of each block in the name node
        /// matches number of actual files
        /// 2. block files + pending block equals to total number of blocks that a file has
        /// including the replication (HDFS file has 30 blocks, repl=2 - total 60
        /// </remarks>
        /// <param name="fn">- file name</param>
        /// <param name="fs">- file size</param>
        /// <exception cref="System.IO.IOException"/>
        private void Verify(string fn, int fs)
        {
            // now count how many physical blocks are there
            int totalReal = CountRealBlocks(block_map);

            System.Console.Out.WriteLine("countRealBlocks counted " + totalReal + " blocks");
            // count how many blocks store in NN structures.
            int totalNN = CountNNBlocks(block_map, fn, fs);

            System.Console.Out.WriteLine("countNNBlocks counted " + totalNN + " blocks");
            foreach (string bid in block_map.Keys)
            {
                TestDataNodeVolumeFailure.BlockLocs bl = block_map[bid];
                // System.out.println(bid + "->" + bl.num_files + "vs." + bl.num_locs);
                // number of physical files (1 or 2) should be same as number of datanodes
                // in the list of the block locations
                NUnit.Framework.Assert.AreEqual("Num files should match num locations", bl.num_files
                                                , bl.num_locs);
            }
            NUnit.Framework.Assert.AreEqual("Num physical blocks should match num stored in the NN"
                                            , totalReal, totalNN);
            // now check the number of under-replicated blocks
            FSNamesystem fsn = cluster.GetNamesystem();

            // force update of all the metric counts by calling computeDatanodeWork
            BlockManagerTestUtil.GetComputedDatanodeWork(fsn.GetBlockManager());
            // get all the counts
            long underRepl = fsn.GetUnderReplicatedBlocks();
            long pendRepl  = fsn.GetPendingReplicationBlocks();
            long totalRepl = underRepl + pendRepl;

            System.Console.Out.WriteLine("underreplicated after = " + underRepl + " and pending repl ="
                                         + pendRepl + "; total underRepl = " + totalRepl);
            System.Console.Out.WriteLine("total blocks (real and replicating):" + (totalReal
                                                                                   + totalRepl) + " vs. all files blocks " + blocks_num * 2);
            // together all the blocks should be equal to all real + all underreplicated
            NUnit.Framework.Assert.AreEqual("Incorrect total block count", totalReal + totalRepl
                                            , blocks_num * repl);
        }
예제 #2
0
        public virtual void TestNNClearsCommandsOnFailoverWithReplChanges()
        {
            // Make lots of blocks to increase chances of triggering a bug.
            DFSTestUtil.CreateFile(fs, TestFilePath, 30 * SmallBlock, (short)1, 1L);
            Banner("rolling NN1's edit log, forcing catch-up");
            HATestUtil.WaitForStandbyToCatchUp(nn1, nn2);
            // Get some new replicas reported so that NN2 now considers
            // them over-replicated and schedules some more deletions
            nn1.GetRpcServer().SetReplication(TestFile, (short)2);
            while (BlockManagerTestUtil.GetComputedDatanodeWork(nn1.GetNamesystem().GetBlockManager
                                                                    ()) > 0)
            {
                Log.Info("Getting more replication work computed");
            }
            BlockManager bm1 = nn1.GetNamesystem().GetBlockManager();

            while (bm1.GetPendingReplicationBlocksCount() > 0)
            {
                BlockManagerTestUtil.UpdateState(bm1);
                cluster.TriggerHeartbeats();
                Sharpen.Thread.Sleep(1000);
            }
            Banner("triggering BRs");
            cluster.TriggerBlockReports();
            nn1.GetRpcServer().SetReplication(TestFile, (short)1);
            Banner("computing invalidation on nn1");
            BlockManagerTestUtil.ComputeInvalidationWork(nn1.GetNamesystem().GetBlockManager(
                                                             ));
            DoMetasave(nn1);
            Banner("computing invalidation on nn2");
            BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager(
                                                             ));
            DoMetasave(nn2);
            // Dump some info for debugging purposes.
            Banner("Metadata immediately before failover");
            DoMetasave(nn2);
            // Transition nn2 to active even though nn1 still thinks it's active
            Banner("Failing to NN2 but let NN1 continue to think it's active");
            NameNodeAdapter.AbortEditLogs(nn1);
            NameNodeAdapter.EnterSafeMode(nn1, false);
            BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager(
                                                             ));
            cluster.TransitionToActive(1);
            // Check that the standby picked up the replication change.
            NUnit.Framework.Assert.AreEqual(1, nn2.GetRpcServer().GetFileInfo(TestFile).GetReplication
                                                ());
            // Dump some info for debugging purposes.
            Banner("Metadata immediately after failover");
            DoMetasave(nn2);
            Banner("Triggering heartbeats and block reports so that fencing is completed");
            cluster.TriggerHeartbeats();
            cluster.TriggerBlockReports();
            Banner("Metadata after nodes have all block-reported");
            DoMetasave(nn2);
            // Force a rescan of postponedMisreplicatedBlocks.
            BlockManager nn2BM = nn2.GetNamesystem().GetBlockManager();

            BlockManagerTestUtil.CheckHeartbeat(nn2BM);
            BlockManagerTestUtil.RescanPostponedMisreplicatedBlocks(nn2BM);
            // The block should no longer be postponed.
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPostponedMisreplicatedBlocks
                                                ());
            // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
            BlockManagerTestUtil.ComputeInvalidationWork(nn2.GetNamesystem().GetBlockManager(
                                                             ));
            HATestUtil.WaitForNNToIssueDeletions(nn2);
            cluster.TriggerHeartbeats();
            HATestUtil.WaitForDNDeletions(cluster);
            cluster.TriggerDeletionReports();
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetUnderReplicatedBlocks()
                                            );
            NUnit.Framework.Assert.AreEqual(0, nn2.GetNamesystem().GetPendingReplicationBlocks
                                                ());
            Banner("Making sure the file is still readable");
            FileSystem fs2 = cluster.GetFileSystem(1);

            DFSTestUtil.ReadFile(fs2, TestFilePath);
        }
예제 #3
0
        /// <summary>Test write a file, verifies and closes it.</summary>
        /// <remarks>
        /// Test write a file, verifies and closes it. Then a couple of random blocks
        /// is removed and BlockReport is forced; the FSNamesystem is pushed to
        /// recalculate required DN's activities such as replications and so on.
        /// The number of missing and under-replicated blocks should be the same in
        /// case of a single-DN cluster.
        /// </remarks>
        /// <exception cref="System.IO.IOException">in case of errors</exception>
        public virtual void BlockReport_02()
        {
            string MethodName = GenericTestUtils.GetMethodName();

            Log.Info("Running test " + MethodName);
            Path filePath = new Path("/" + MethodName + ".dat");

            DFSTestUtil.CreateFile(fs, filePath, FileSize, ReplFactor, rand.NextLong());
            // mock around with newly created blocks and delete some
            FilePath dataDir = new FilePath(cluster.GetDataDirectory());

            NUnit.Framework.Assert.IsTrue(dataDir.IsDirectory());
            IList <ExtendedBlock> blocks2Remove = new AList <ExtendedBlock>();
            IList <int>           removedIndex  = new AList <int>();
            IList <LocatedBlock>  lBlocks       = cluster.GetNameNodeRpc().GetBlockLocations(filePath
                                                                                             .ToString(), FileStart, FileSize).GetLocatedBlocks();

            while (removedIndex.Count != 2)
            {
                int newRemoveIndex = rand.Next(lBlocks.Count);
                if (!removedIndex.Contains(newRemoveIndex))
                {
                    removedIndex.AddItem(newRemoveIndex);
                }
            }
            foreach (int aRemovedIndex in removedIndex)
            {
                blocks2Remove.AddItem(lBlocks[aRemovedIndex].GetBlock());
            }
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Number of blocks allocated " + lBlocks.Count);
            }
            DataNode dn0 = cluster.GetDataNodes()[DnN0];

            foreach (ExtendedBlock b in blocks2Remove)
            {
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("Removing the block " + b.GetBlockName());
                }
                foreach (FilePath f in FindAllFiles(dataDir, new BlockReportTestBase.MyFileFilter
                                                        (this, b.GetBlockName(), true)))
                {
                    DataNodeTestUtils.GetFSDataset(dn0).UnfinalizeBlock(b);
                    if (!f.Delete())
                    {
                        Log.Warn("Couldn't delete " + b.GetBlockName());
                    }
                    else
                    {
                        Log.Debug("Deleted file " + f.ToString());
                    }
                }
            }
            WaitTil(TimeUnit.Seconds.ToMillis(DnRescanExtraWait));
            // all blocks belong to the same file, hence same BP
            string poolId            = cluster.GetNamesystem().GetBlockPoolId();
            DatanodeRegistration dnR = dn0.GetDNRegistrationForBP(poolId);

            StorageBlockReport[] reports = GetBlockReports(dn0, poolId, false, false);
            SendBlockReports(dnR, poolId, reports);
            BlockManagerTestUtil.GetComputedDatanodeWork(cluster.GetNamesystem().GetBlockManager
                                                             ());
            PrintStats();
            NUnit.Framework.Assert.AreEqual("Wrong number of MissingBlocks is found", blocks2Remove
                                            .Count, cluster.GetNamesystem().GetMissingBlocksCount());
            NUnit.Framework.Assert.AreEqual("Wrong number of UnderReplicatedBlocks is found",
                                            blocks2Remove.Count, cluster.GetNamesystem().GetUnderReplicatedBlocks());
        }