Example #1
0
        /// <summary>
        /// Ensure that the given NameNode marks the specified DataNode as
        /// entirely dead/expired.
        /// </summary>
        /// <param name="nn">the NameNode to manipulate</param>
        /// <param name="dnName">the name of the DataNode</param>
        public static void NoticeDeadDatanode(NameNode nn, string dnName)
        {
            FSNamesystem namesystem = nn.GetNamesystem();

            namesystem.WriteLock();
            try
            {
                DatanodeManager      dnm    = namesystem.GetBlockManager().GetDatanodeManager();
                HeartbeatManager     hbm    = dnm.GetHeartbeatManager();
                DatanodeDescriptor[] dnds   = hbm.GetDatanodes();
                DatanodeDescriptor   theDND = null;
                foreach (DatanodeDescriptor dnd in dnds)
                {
                    if (dnd.GetXferAddr().Equals(dnName))
                    {
                        theDND = dnd;
                    }
                }
                NUnit.Framework.Assert.IsNotNull("Could not find DN with name: " + dnName, theDND
                                                 );
                lock (hbm)
                {
                    DFSTestUtil.SetDatanodeDead(theDND);
                    hbm.HeartbeatCheck();
                }
            }
            finally
            {
                namesystem.WriteUnlock();
            }
        }
        public virtual void TestProcesOverReplicateBlock()
        {
            Configuration conf = new HdfsConfiguration();

            conf.SetLong(DFSConfigKeys.DfsDatanodeScanPeriodHoursKey, 100L);
            conf.SetLong(DFSConfigKeys.DfsBlockreportIntervalMsecKey, 1000L);
            conf.Set(DFSConfigKeys.DfsNamenodeReplicationPendingTimeoutSecKey, Sharpen.Extensions.ToString
                         (2));
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build();
            FileSystem     fs      = cluster.GetFileSystem();

            try
            {
                Path fileName = new Path("/foo1");
                DFSTestUtil.CreateFile(fs, fileName, 2, (short)3, 0L);
                DFSTestUtil.WaitReplication(fs, fileName, (short)3);
                // corrupt the block on datanode 0
                ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName);
                NUnit.Framework.Assert.IsTrue(cluster.CorruptReplica(0, block));
                MiniDFSCluster.DataNodeProperties dnProps = cluster.StopDataNode(0);
                // remove block scanner log to trigger block scanning
                FilePath scanCursor = new FilePath(new FilePath(MiniDFSCluster.GetFinalizedDir(cluster
                                                                                               .GetInstanceStorageDir(0, 0), cluster.GetNamesystem().GetBlockPoolId()).GetParent
                                                                    ()).GetParent(), "scanner.cursor");
                //wait for one minute for deletion to succeed;
                for (int i = 0; !scanCursor.Delete(); i++)
                {
                    NUnit.Framework.Assert.IsTrue("Could not delete " + scanCursor.GetAbsolutePath()
                                                  + " in one minute", i < 60);
                    try
                    {
                        Sharpen.Thread.Sleep(1000);
                    }
                    catch (Exception)
                    {
                    }
                }
                // restart the datanode so the corrupt replica will be detected
                cluster.RestartDataNode(dnProps);
                DFSTestUtil.WaitReplication(fs, fileName, (short)2);
                string     blockPoolId     = cluster.GetNamesystem().GetBlockPoolId();
                DatanodeID corruptDataNode = DataNodeTestUtils.GetDNRegistrationForBP(cluster.GetDataNodes
                                                                                          ()[2], blockPoolId);
                FSNamesystem     namesystem = cluster.GetNamesystem();
                BlockManager     bm         = namesystem.GetBlockManager();
                HeartbeatManager hm         = bm.GetDatanodeManager().GetHeartbeatManager();
                try
                {
                    namesystem.WriteLock();
                    lock (hm)
                    {
                        // set live datanode's remaining space to be 0
                        // so they will be chosen to be deleted when over-replication occurs
                        string corruptMachineName = corruptDataNode.GetXferAddr();
                        foreach (DatanodeDescriptor datanode in hm.GetDatanodes())
                        {
                            if (!corruptMachineName.Equals(datanode.GetXferAddr()))
                            {
                                datanode.GetStorageInfos()[0].SetUtilizationForTesting(100L, 100L, 0, 100L);
                                datanode.UpdateHeartbeat(BlockManagerTestUtil.GetStorageReportsForDatanode(datanode
                                                                                                           ), 0L, 0L, 0, 0, null);
                            }
                        }
                        // decrease the replication factor to 1;
                        NameNodeAdapter.SetReplication(namesystem, fileName.ToString(), (short)1);
                        // corrupt one won't be chosen to be excess one
                        // without 4910 the number of live replicas would be 0: block gets lost
                        NUnit.Framework.Assert.AreEqual(1, bm.CountNodes(block.GetLocalBlock()).LiveReplicas
                                                            ());
                    }
                }
                finally
                {
                    namesystem.WriteUnlock();
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }
Example #3
0
        public virtual void TestNodeCount()
        {
            // start a mini dfs cluster of 2 nodes
            Configuration  conf    = new HdfsConfiguration();
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(ReplicationFactor
                                                                                   ).Build();

            try
            {
                FSNamesystem     namesystem = cluster.GetNamesystem();
                BlockManager     bm         = namesystem.GetBlockManager();
                HeartbeatManager hm         = bm.GetDatanodeManager().GetHeartbeatManager();
                FileSystem       fs         = cluster.GetFileSystem();
                // populate the cluster with a one block file
                Path FilePath = new Path("/testfile");
                DFSTestUtil.CreateFile(fs, FilePath, 1L, ReplicationFactor, 1L);
                DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor);
                ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, FilePath);
                // keep a copy of all datanode descriptor
                DatanodeDescriptor[] datanodes = hm.GetDatanodes();
                // start two new nodes
                cluster.StartDataNodes(conf, 2, true, null, null);
                cluster.WaitActive();
                // bring down first datanode
                DatanodeDescriptor datanode = datanodes[0];
                MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(datanode.GetXferAddr
                                                                                    ());
                // make sure that NN detects that the datanode is down
                BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), datanode.GetXferAddr
                                                            ());
                // the block will be replicated
                DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor);
                // restart the first datanode
                cluster.RestartDataNode(dnprop);
                cluster.WaitActive();
                // check if excessive replica is detected (transient)
                InitializeTimeout(Timeout);
                while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() == 0)
                {
                    CheckTimeout("excess replicas not detected");
                }
                // find out a non-excess node
                DatanodeDescriptor nonExcessDN = null;
                foreach (DatanodeStorageInfo storage in bm.blocksMap.GetStorages(block.GetLocalBlock
                                                                                     ()))
                {
                    DatanodeDescriptor  dn     = storage.GetDatanodeDescriptor();
                    ICollection <Block> blocks = bm.excessReplicateMap[dn.GetDatanodeUuid()];
                    if (blocks == null || !blocks.Contains(block.GetLocalBlock()))
                    {
                        nonExcessDN = dn;
                        break;
                    }
                }
                NUnit.Framework.Assert.IsTrue(nonExcessDN != null);
                // bring down non excessive datanode
                dnprop = cluster.StopDataNode(nonExcessDN.GetXferAddr());
                // make sure that NN detects that the datanode is down
                BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), nonExcessDN.GetXferAddr
                                                            ());
                // The block should be replicated
                InitializeTimeout(Timeout);
                while (CountNodes(block.GetLocalBlock(), namesystem).LiveReplicas() != ReplicationFactor
                       )
                {
                    CheckTimeout("live replica count not correct", 1000);
                }
                // restart the first datanode
                cluster.RestartDataNode(dnprop);
                cluster.WaitActive();
                // check if excessive replica is detected (transient)
                InitializeTimeout(Timeout);
                while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() != 2)
                {
                    CheckTimeout("excess replica count not equal to 2");
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }