예제 #1
0
        /// <summary>
        /// Ensure that the given NameNode marks the specified DataNode as
        /// entirely dead/expired.
        /// </summary>
        /// <param name="nn">the NameNode to manipulate</param>
        /// <param name="dnName">the name of the DataNode</param>
        public static void NoticeDeadDatanode(NameNode nn, string dnName)
        {
            FSNamesystem namesystem = nn.GetNamesystem();

            namesystem.WriteLock();
            try
            {
                DatanodeManager      dnm    = namesystem.GetBlockManager().GetDatanodeManager();
                HeartbeatManager     hbm    = dnm.GetHeartbeatManager();
                DatanodeDescriptor[] dnds   = hbm.GetDatanodes();
                DatanodeDescriptor   theDND = null;
                foreach (DatanodeDescriptor dnd in dnds)
                {
                    if (dnd.GetXferAddr().Equals(dnName))
                    {
                        theDND = dnd;
                    }
                }
                NUnit.Framework.Assert.IsNotNull("Could not find DN with name: " + dnName, theDND
                                                 );
                lock (hbm)
                {
                    DFSTestUtil.SetDatanodeDead(theDND);
                    hbm.HeartbeatCheck();
                }
            }
            finally
            {
                namesystem.WriteUnlock();
            }
        }
예제 #2
0
        public virtual void TestIncludeExcludeLists()
        {
            BlockManager    bm   = Org.Mockito.Mockito.Mock <BlockManager>();
            FSNamesystem    fsn  = Org.Mockito.Mockito.Mock <FSNamesystem>();
            Configuration   conf = new Configuration();
            HostFileManager hm   = new HostFileManager();

            HostFileManager.HostSet includedNodes = new HostFileManager.HostSet();
            HostFileManager.HostSet excludedNodes = new HostFileManager.HostSet();
            includedNodes.Add(Entry("127.0.0.1:12345"));
            includedNodes.Add(Entry("localhost:12345"));
            includedNodes.Add(Entry("127.0.0.1:12345"));
            includedNodes.Add(Entry("127.0.0.2"));
            excludedNodes.Add(Entry("127.0.0.1:12346"));
            excludedNodes.Add(Entry("127.0.30.1:12346"));
            NUnit.Framework.Assert.AreEqual(2, includedNodes.Size());
            NUnit.Framework.Assert.AreEqual(2, excludedNodes.Size());
            hm.Refresh(includedNodes, excludedNodes);
            DatanodeManager dm = new DatanodeManager(bm, fsn, conf);

            Whitebox.SetInternalState(dm, "hostFileManager", hm);
            IDictionary <string, DatanodeDescriptor> dnMap = (IDictionary <string, DatanodeDescriptor
                                                                           >)Whitebox.GetInternalState(dm, "datanodeMap");

            // After the de-duplication, there should be only one DN from the included
            // nodes declared as dead.
            NUnit.Framework.Assert.AreEqual(2, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .All).Count);
            NUnit.Framework.Assert.AreEqual(2, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            dnMap["uuid-foo"] = new DatanodeDescriptor(new DatanodeID("127.0.0.1", "localhost"
                                                                      , "uuid-foo", 12345, 1020, 1021, 1022));
            NUnit.Framework.Assert.AreEqual(1, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            dnMap["uuid-bar"] = new DatanodeDescriptor(new DatanodeID("127.0.0.2", "127.0.0.2"
                                                                      , "uuid-bar", 12345, 1020, 1021, 1022));
            NUnit.Framework.Assert.AreEqual(0, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            DatanodeDescriptor spam = new DatanodeDescriptor(new DatanodeID("127.0.0" + ".3",
                                                                            "127.0.0.3", "uuid-spam", 12345, 1020, 1021, 1022));

            DFSTestUtil.SetDatanodeDead(spam);
            includedNodes.Add(Entry("127.0.0.3:12345"));
            dnMap["uuid-spam"] = spam;
            NUnit.Framework.Assert.AreEqual(1, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            Sharpen.Collections.Remove(dnMap, "uuid-spam");
            NUnit.Framework.Assert.AreEqual(1, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            excludedNodes.Add(Entry("127.0.0.3"));
            NUnit.Framework.Assert.AreEqual(0, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
        }
예제 #3
0
        public virtual void TestXceiverCount()
        {
            Configuration conf = new HdfsConfiguration();

            // retry one time, if close fails
            conf.SetInt(DFSConfigKeys.DfsClientBlockWriteLocatefollowingblockRetriesKey, 1);
            MiniDFSCluster cluster   = null;
            int            nodes     = 8;
            int            fileCount = 5;
            short          fileRepl  = 3;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(nodes).Build();
                cluster.WaitActive();
                FSNamesystem          namesystem = cluster.GetNamesystem();
                DatanodeManager       dnm        = namesystem.GetBlockManager().GetDatanodeManager();
                IList <DataNode>      datanodes  = cluster.GetDataNodes();
                DistributedFileSystem fs         = cluster.GetFileSystem();
                // trigger heartbeats in case not already sent
                TriggerHeartbeats(datanodes);
                // check that all nodes are live and in service
                int expectedTotalLoad = nodes;
                // xceiver server adds 1 to load
                int expectedInServiceNodes = nodes;
                int expectedInServiceLoad  = nodes;
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // shutdown half the nodes and force a heartbeat check to ensure
                // counts are accurate
                for (int i = 0; i < nodes / 2; i++)
                {
                    DataNode           dn  = datanodes[i];
                    DatanodeDescriptor dnd = dnm.GetDatanode(dn.GetDatanodeId());
                    dn.Shutdown();
                    DFSTestUtil.SetDatanodeDead(dnd);
                    BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager());
                    //Verify decommission of dead node won't impact nodesInService metrics.
                    dnm.GetDecomManager().StartDecommission(dnd);
                    expectedInServiceNodes--;
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, namesystem.GetNumLiveDataNodes
                                                        ());
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                    //Verify recommission of dead node won't impact nodesInService metrics.
                    dnm.GetDecomManager().StopDecommission(dnd);
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                }
                // restart the nodes to verify that counts are correct after
                // node re-registration
                cluster.RestartDataNodes();
                cluster.WaitActive();
                datanodes = cluster.GetDataNodes();
                expectedInServiceNodes = nodes;
                NUnit.Framework.Assert.AreEqual(nodes, datanodes.Count);
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // create streams and hsync to force datastreamers to start
                DFSOutputStream[] streams = new DFSOutputStream[fileCount];
                for (int i_1 = 0; i_1 < fileCount; i_1++)
                {
                    streams[i_1] = (DFSOutputStream)fs.Create(new Path("/f" + i_1), fileRepl).GetWrappedStream
                                       ();
                    streams[i_1].Write(Sharpen.Runtime.GetBytesForString("1"));
                    streams[i_1].Hsync();
                    // the load for writers is 2 because both the write xceiver & packet
                    // responder threads are counted in the load
                    expectedTotalLoad     += 2 * fileRepl;
                    expectedInServiceLoad += 2 * fileRepl;
                }
                // force nodes to send load update
                TriggerHeartbeats(datanodes);
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // decomm a few nodes, substract their load from the expected load,
                // trigger heartbeat to force load update
                for (int i_2 = 0; i_2 < fileRepl; i_2++)
                {
                    expectedInServiceNodes--;
                    DatanodeDescriptor dnd = dnm.GetDatanode(datanodes[i_2].GetDatanodeId());
                    expectedInServiceLoad -= dnd.GetXceiverCount();
                    dnm.GetDecomManager().StartDecommission(dnd);
                    DataNodeTestUtils.TriggerHeartbeat(datanodes[i_2]);
                    Sharpen.Thread.Sleep(100);
                    CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                       expectedInServiceLoad);
                }
                // check expected load while closing each stream.  recalc expected
                // load based on whether the nodes in the pipeline are decomm
                for (int i_3 = 0; i_3 < fileCount; i_3++)
                {
                    int decomm = 0;
                    foreach (DatanodeInfo dni in streams[i_3].GetPipeline())
                    {
                        DatanodeDescriptor dnd = dnm.GetDatanode(dni);
                        expectedTotalLoad -= 2;
                        if (dnd.IsDecommissionInProgress() || dnd.IsDecommissioned())
                        {
                            decomm++;
                        }
                        else
                        {
                            expectedInServiceLoad -= 2;
                        }
                    }
                    try
                    {
                        streams[i_3].Close();
                    }
                    catch (IOException ioe)
                    {
                        // nodes will go decommissioned even if there's a UC block whose
                        // other locations are decommissioned too.  we'll ignore that
                        // bug for now
                        if (decomm < fileRepl)
                        {
                            throw;
                        }
                    }
                    TriggerHeartbeats(datanodes);
                    // verify node count and loads
                    CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                       expectedInServiceLoad);
                }
                // shutdown each node, verify node counts based on decomm state
                for (int i_4 = 0; i_4 < nodes; i_4++)
                {
                    DataNode dn = datanodes[i_4];
                    dn.Shutdown();
                    // force it to appear dead so live count decreases
                    DatanodeDescriptor dnDesc = dnm.GetDatanode(dn.GetDatanodeId());
                    DFSTestUtil.SetDatanodeDead(dnDesc);
                    BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager());
                    NUnit.Framework.Assert.AreEqual(nodes - 1 - i_4, namesystem.GetNumLiveDataNodes()
                                                    );
                    // first few nodes are already out of service
                    if (i_4 >= fileRepl)
                    {
                        expectedInServiceNodes--;
                    }
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                    // live nodes always report load of 1.  no nodes is load 0
                    double expectedXceiverAvg = (i_4 == nodes - 1) ? 0.0 : 1.0;
                    NUnit.Framework.Assert.AreEqual((double)expectedXceiverAvg, GetInServiceXceiverAverage
                                                        (namesystem), Epsilon);
                }
                // final sanity check
                CheckClusterHealth(0, namesystem, 0.0, 0, 0.0);
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }