Esempio n. 1
0
        /// <summary>
        /// Ensure that the given NameNode marks the specified DataNode as
        /// entirely dead/expired.
        /// </summary>
        /// <param name="nn">the NameNode to manipulate</param>
        /// <param name="dnName">the name of the DataNode</param>
        public static void NoticeDeadDatanode(NameNode nn, string dnName)
        {
            FSNamesystem namesystem = nn.GetNamesystem();

            namesystem.WriteLock();
            try
            {
                DatanodeManager      dnm    = namesystem.GetBlockManager().GetDatanodeManager();
                HeartbeatManager     hbm    = dnm.GetHeartbeatManager();
                DatanodeDescriptor[] dnds   = hbm.GetDatanodes();
                DatanodeDescriptor   theDND = null;
                foreach (DatanodeDescriptor dnd in dnds)
                {
                    if (dnd.GetXferAddr().Equals(dnName))
                    {
                        theDND = dnd;
                    }
                }
                NUnit.Framework.Assert.IsNotNull("Could not find DN with name: " + dnName, theDND
                                                 );
                lock (hbm)
                {
                    DFSTestUtil.SetDatanodeDead(theDND);
                    hbm.HeartbeatCheck();
                }
            }
            finally
            {
                namesystem.WriteUnlock();
            }
        }
Esempio n. 2
0
 internal DecommissionManager(Namesystem namesystem, BlockManager blockManager, HeartbeatManager
                              hbManager)
 {
     this.namesystem   = namesystem;
     this.blockManager = blockManager;
     this.hbManager    = hbManager;
     executor          = Executors.NewScheduledThreadPool(1, new ThreadFactoryBuilder().SetNameFormat
                                                              ("DecommissionMonitor-%d").SetDaemon(true).Build());
     decomNodeBlocks = new SortedDictionary <DatanodeDescriptor, AbstractList <BlockInfoContiguous
                                                                               > >();
     pendingNodes = new List <DatanodeDescriptor>();
 }
        public virtual void TestProcesOverReplicateBlock()
        {
            Configuration conf = new HdfsConfiguration();

            conf.SetLong(DFSConfigKeys.DfsDatanodeScanPeriodHoursKey, 100L);
            conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000L);
            conf.Set(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, Sharpen.Extensions.ToString
                         (2));
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build();
            FileSystem     fs      = cluster.GetFileSystem();

            try
            {
                Path fileName = new Path("/foo1");
                DFSTestUtil.CreateFile(fs, fileName, 2, (short)3, 0L);
                DFSTestUtil.WaitReplication(fs, fileName, (short)3);
                // corrupt the block on datanode 0
                ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName);
                NUnit.Framework.Assert.IsTrue(cluster.CorruptReplica(0, block));
                MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0);
                // remove block scanner log to trigger block scanning
                FilePath scanCursor = new FilePath(new FilePath(MiniDFSCluster.GetFinalizedDir(cluster
                                                                                               .GetInstanceStorageDir(0, 0), cluster.GetNamesystem().GetBlockPoolId()).GetParent
                                                                    ()).GetParent(), "scanner.cursor");
                //wait for one minute for deletion to succeed;
                for (int i = 0; !scanCursor.Delete(); i++)
                {
                    NUnit.Framework.Assert.IsTrue("Could not delete " + scanCursor.GetAbsolutePath()
                                                  + " in one minute", i < 60);
                    try
                    {
                        Sharpen.Thread.Sleep(1000);
                    }
                    catch (Exception)
                    {
                    }
                }
                // restart the datanode so the corrupt replica will be detected
                cluster.RestartDataNode(dnProps);
                DFSTestUtil.WaitReplication(fs, fileName, (short)2);
                string     blockPoolId     = cluster.GetNamesystem().GetBlockPoolId();
                DatanodeID corruptDataNode = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes
                                                                                          ()[2], blockPoolId);
                FSNamesystem     namesystem = cluster.GetNamesystem();
                BlockManager     bm         = namesystem.GetBlockManager();
                HeartbeatManager hm         = bm.GetDatanodeManager().GetHeartbeatManager();
                try
                {
                    namesystem.WriteLock();
                    lock (hm)
                    {
                        // set live datanode's remaining space to be 0
                        // so they will be chosen to be deleted when over-replication occurs
                        string corruptMachineName = corruptDataNode.GetXferAddr();
                        foreach (DatanodeDescriptor datanode in hm.GetDatanodes())
                        {
                            if (!corruptMachineName.Equals(datanode.GetXferAddr()))
                            {
                                datanode.GetStorageInfos()[0].SetUtilizationForTesting(100L, 100L, 0, 100L);
                                datanode.UpdateHeartbeat(BlockManagerTestUtil.GetStorageReportsForDatanode(datanode
                                                                                                           ), 0L, 0L, 0, 0, null);
                            }
                        }
                        // decrease the replication factor to 1;
                        NameNodeAdapter.SetReplication(namesystem, fileName.ToString(), (short)1);
                        // corrupt one won't be chosen to be excess one
                        // without 4910 the number of live replicas would be 0: block gets lost
                        NUnit.Framework.Assert.AreEqual(1, bm.CountNodes(block.GetLocalBlock()).LiveReplicas
                                                            ());
                    }
                }
                finally
                {
                    namesystem.WriteUnlock();
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }
        public virtual void TestHeartbeat()
        {
            Configuration  conf    = new HdfsConfiguration();
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).Build();

            try
            {
                cluster.WaitActive();
                FSNamesystem     namesystem = cluster.GetNamesystem();
                HeartbeatManager hm         = namesystem.GetBlockManager().GetDatanodeManager().GetHeartbeatManager
                                                  ();
                string poolId = namesystem.GetBlockPoolId();
                DatanodeRegistration nodeReg = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes
                                                                                            ()[0], poolId);
                DatanodeDescriptor dd        = NameNodeAdapter.GetDatanode(namesystem, nodeReg);
                string             storageID = DatanodeStorage.GenerateUuid();
                dd.UpdateStorage(new DatanodeStorage(storageID));
                int RemainingBlocks   = 1;
                int MaxReplicateLimit = conf.GetInt(DFSConfigKeys.DfsNamenodeReplicationMaxStreamsKey
                                                    , 2);
                int MaxInvalidateLimit          = DFSConfigKeys.DfsBlockInvalidateLimitDefault;
                int MaxInvalidateBlocks         = 2 * MaxInvalidateLimit + RemainingBlocks;
                int MaxReplicateBlocks          = 2 * MaxReplicateLimit + RemainingBlocks;
                DatanodeStorageInfo[] OneTarget = new DatanodeStorageInfo[] { dd.GetStorageInfo(storageID
                                                                                                ) };
                try
                {
                    namesystem.WriteLock();
                    lock (hm)
                    {
                        for (int i = 0; i < MaxReplicateBlocks; i++)
                        {
                            dd.AddBlockToBeReplicated(new Block(i, 0, GenerationStamp.LastReservedStamp), OneTarget
                                                      );
                        }
                        DatanodeCommand[] cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands
                                                     ();
                        NUnit.Framework.Assert.AreEqual(1, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaTransfer, cmds[0].GetAction()
                                                        );
                        NUnit.Framework.Assert.AreEqual(MaxReplicateLimit, ((BlockCommand)cmds[0]).GetBlocks
                                                            ().Length);
                        AList <Block> blockList = new AList <Block>(MaxInvalidateBlocks);
                        for (int i_1 = 0; i_1 < MaxInvalidateBlocks; i_1++)
                        {
                            blockList.AddItem(new Block(i_1, 0, GenerationStamp.LastReservedStamp));
                        }
                        dd.AddBlocksToBeInvalidated(blockList);
                        cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands();
                        NUnit.Framework.Assert.AreEqual(2, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaTransfer, cmds[0].GetAction()
                                                        );
                        NUnit.Framework.Assert.AreEqual(MaxReplicateLimit, ((BlockCommand)cmds[0]).GetBlocks
                                                            ().Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaInvalidate, cmds[1].GetAction
                                                            ());
                        NUnit.Framework.Assert.AreEqual(MaxInvalidateLimit, ((BlockCommand)cmds[1]).GetBlocks
                                                            ().Length);
                        cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands();
                        NUnit.Framework.Assert.AreEqual(2, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaTransfer, cmds[0].GetAction()
                                                        );
                        NUnit.Framework.Assert.AreEqual(RemainingBlocks, ((BlockCommand)cmds[0]).GetBlocks
                                                            ().Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaInvalidate, cmds[1].GetAction
                                                            ());
                        NUnit.Framework.Assert.AreEqual(MaxInvalidateLimit, ((BlockCommand)cmds[1]).GetBlocks
                                                            ().Length);
                        cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands();
                        NUnit.Framework.Assert.AreEqual(1, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaInvalidate, cmds[0].GetAction
                                                            ());
                        NUnit.Framework.Assert.AreEqual(RemainingBlocks, ((BlockCommand)cmds[0]).GetBlocks
                                                            ().Length);
                        cmds = NameNodeAdapter.SendHeartBeat(nodeReg, dd, namesystem).GetCommands();
                        NUnit.Framework.Assert.AreEqual(0, cmds.Length);
                    }
                }
                finally
                {
                    namesystem.WriteUnlock();
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }
        public virtual void TestHeartbeatBlockRecovery()
        {
            Configuration  conf    = new HdfsConfiguration();
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                FSNamesystem     namesystem = cluster.GetNamesystem();
                HeartbeatManager hm         = namesystem.GetBlockManager().GetDatanodeManager().GetHeartbeatManager
                                                  ();
                string poolId = namesystem.GetBlockPoolId();
                DatanodeRegistration nodeReg1 = DataNodeTestUtils.GetDNRegistrationForBP(cluster.
                                                                                         GetDataNodes()[0], poolId);
                DatanodeDescriptor dd1 = NameNodeAdapter.GetDatanode(namesystem, nodeReg1);
                dd1.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid()));
                DatanodeRegistration nodeReg2 = DataNodeTestUtils.GetDNRegistrationForBP(cluster.
                                                                                         GetDataNodes()[1], poolId);
                DatanodeDescriptor dd2 = NameNodeAdapter.GetDatanode(namesystem, nodeReg2);
                dd2.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid()));
                DatanodeRegistration nodeReg3 = DataNodeTestUtils.GetDNRegistrationForBP(cluster.
                                                                                         GetDataNodes()[2], poolId);
                DatanodeDescriptor dd3 = NameNodeAdapter.GetDatanode(namesystem, nodeReg3);
                dd3.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid()));
                try
                {
                    namesystem.WriteLock();
                    lock (hm)
                    {
                        NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem);
                        NameNodeAdapter.SendHeartBeat(nodeReg2, dd2, namesystem);
                        NameNodeAdapter.SendHeartBeat(nodeReg3, dd3, namesystem);
                        // Test with all alive nodes.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd1, 0);
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd2, 0);
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0);
                        DatanodeStorageInfo[] storages = new DatanodeStorageInfo[] { dd1.GetStorageInfos(
                                                                                         )[0], dd2.GetStorageInfos()[0], dd3.GetStorageInfos()[0] };
                        BlockInfoContiguousUnderConstruction blockInfo = new BlockInfoContiguousUnderConstruction
                                                                             (new Block(0, 0, GenerationStamp.LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState
                                                                             .UnderRecovery, storages);
                        dd1.AddBlockToBeRecovered(blockInfo);
                        DatanodeCommand[] cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem)
                                                 .GetCommands();
                        NUnit.Framework.Assert.AreEqual(1, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction
                                                            ());
                        BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand)cmds[0];
                        NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count);
                        DatanodeInfo[] recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks
                                                                                         (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations();
                        NUnit.Framework.Assert.AreEqual(3, recoveringNodes.Length);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd2);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[2], dd3);
                        // Test with one stale node.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd1, 0);
                        // More than the default stale interval of 30 seconds.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -40 * 1000);
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0);
                        blockInfo = new BlockInfoContiguousUnderConstruction(new Block(0, 0, GenerationStamp
                                                                                       .LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState.UnderRecovery, storages
                                                                             );
                        dd1.AddBlockToBeRecovered(blockInfo);
                        cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem).GetCommands();
                        NUnit.Framework.Assert.AreEqual(1, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction
                                                            ());
                        recoveryCommand = (BlockRecoveryCommand)cmds[0];
                        NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count);
                        recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks
                                                                          (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations();
                        NUnit.Framework.Assert.AreEqual(2, recoveringNodes.Length);
                        // dd2 is skipped.
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd3);
                        // Test with all stale node.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -60 * 1000);
                        // More than the default stale interval of 30 seconds.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -40 * 1000);
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -80 * 1000);
                        blockInfo = new BlockInfoContiguousUnderConstruction(new Block(0, 0, GenerationStamp
                                                                                       .LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState.UnderRecovery, storages
                                                                             );
                        dd1.AddBlockToBeRecovered(blockInfo);
                        cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem).GetCommands();
                        NUnit.Framework.Assert.AreEqual(1, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction
                                                            ());
                        recoveryCommand = (BlockRecoveryCommand)cmds[0];
                        NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count);
                        recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks
                                                                          (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations();
                        // Only dd1 is included since it heart beated and hence its not stale
                        // when the list of recovery blocks is constructed.
                        NUnit.Framework.Assert.AreEqual(3, recoveringNodes.Length);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd2);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[2], dd3);
                    }
                }
                finally
                {
                    namesystem.WriteUnlock();
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }
Esempio n. 6
0
        public virtual void TestNodeCount()
        {
            // start a mini dfs cluster of 2 nodes
            Configuration  conf    = new HdfsConfiguration();
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(ReplicationFactor
                                                                                   ).Build();

            try
            {
                FSNamesystem     namesystem = cluster.GetNamesystem();
                BlockManager     bm         = namesystem.GetBlockManager();
                HeartbeatManager hm         = bm.GetDatanodeManager().GetHeartbeatManager();
                FileSystem       fs         = cluster.GetFileSystem();
                // populate the cluster with a one block file
                Path FilePath = new Path("/testfile");
                DFSTestUtil.CreateFile(fs, FilePath, 1L, ReplicationFactor, 1L);
                DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor);
                ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, FilePath);
                // keep a copy of all datanode descriptor
                DatanodeDescriptor[] datanodes = hm.GetDatanodes();
                // start two new nodes
                cluster.StartDataNodes(conf, 2, true, null, null);
                cluster.WaitActive();
                // bring down first datanode
                DatanodeDescriptor datanode = datanodes[0];
                MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(datanode.GetXferAddr
                                                                                    ());
                // make sure that NN detects that the datanode is down
                BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), datanode.GetXferAddr
                                                            ());
                // the block will be replicated
                DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor);
                // restart the first datanode
                cluster.RestartDataNode(dnprop);
                cluster.WaitActive();
                // check if excessive replica is detected (transient)
                InitializeTimeout(Timeout);
                while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() == 0)
                {
                    CheckTimeout("excess replicas not detected");
                }
                // find out a non-excess node
                DatanodeDescriptor nonExcessDN = null;
                foreach (DatanodeStorageInfo storage in bm.blocksMap.GetStorages(block.GetLocalBlock
                                                                                     ()))
                {
                    DatanodeDescriptor  dn     = storage.GetDatanodeDescriptor();
                    ICollection <Block> blocks = bm.excessReplicateMap[dn.GetDatanodeUuid()];
                    if (blocks == null || !blocks.Contains(block.GetLocalBlock()))
                    {
                        nonExcessDN = dn;
                        break;
                    }
                }
                NUnit.Framework.Assert.IsTrue(nonExcessDN != null);
                // bring down non excessive datanode
                dnprop = cluster.StopDataNode(nonExcessDN.GetXferAddr());
                // make sure that NN detects that the datanode is down
                BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), nonExcessDN.GetXferAddr
                                                            ());
                // The block should be replicated
                InitializeTimeout(Timeout);
                while (CountNodes(block.GetLocalBlock(), namesystem).LiveReplicas() != ReplicationFactor
                       )
                {
                    CheckTimeout("live replica count not correct", 1000);
                }
                // restart the first datanode
                cluster.RestartDataNode(dnprop);
                cluster.WaitActive();
                // check if excessive replica is detected (transient)
                InitializeTimeout(Timeout);
                while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() != 2)
                {
                    CheckTimeout("excess replica count not equal to 2");
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }
Esempio n. 7
0
 internal Monitor(HeartbeatManager _enclosing)
 {
     this._enclosing = _enclosing;
 }