Exemple #1
0
        /// <summary>Verify the support for decommissioning a datanode that is already dead.</summary>
        /// <remarks>
        /// Verify the support for decommissioning a datanode that is already dead.
        /// Under this scenario the datanode should immediately be marked as
        /// DECOMMISSIONED
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestDecommissionDeadDN()
        {
            Logger log = Logger.GetLogger(typeof(DecommissionManager));

            log.SetLevel(Level.Debug);
            DatanodeID dnID   = cluster.GetDataNodes()[0].GetDatanodeId();
            string     dnName = dnID.GetXferAddr();

            MiniDFSCluster.DataNodeProperties stoppedDN = cluster.StopDataNode(0);
            DFSTestUtil.WaitForDatanodeState(cluster, dnID.GetDatanodeUuid(), false, 30000);
            FSNamesystem       fsn          = cluster.GetNamesystem();
            DatanodeManager    dm           = fsn.GetBlockManager().GetDatanodeManager();
            DatanodeDescriptor dnDescriptor = dm.GetDatanode(dnID);

            DecommissionNode(fsn, localFileSys, dnName);
            dm.RefreshNodes(conf);
            BlockManagerTestUtil.RecheckDecommissionState(dm);
            NUnit.Framework.Assert.IsTrue(dnDescriptor.IsDecommissioned());
            // Add the node back
            cluster.RestartDataNode(stoppedDN, true);
            cluster.WaitActive();
            // Call refreshNodes on FSNamesystem with empty exclude file to remove the
            // datanode from decommissioning list and make it available again.
            WriteConfigFile(localFileSys, excludeFile, null);
            dm.RefreshNodes(conf);
        }
Exemple #2
0
        public virtual void TestXceiverCount()
        {
            Configuration conf = new HdfsConfiguration();

            // retry one time, if close fails
            conf.SetInt(DFSConfigKeys.DfsClientBlockWriteLocatefollowingblockRetriesKey, 1);
            MiniDFSCluster cluster   = null;
            int            nodes     = 8;
            int            fileCount = 5;
            short          fileRepl  = 3;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(nodes).Build();
                cluster.WaitActive();
                FSNamesystem          namesystem = cluster.GetNamesystem();
                DatanodeManager       dnm        = namesystem.GetBlockManager().GetDatanodeManager();
                IList <DataNode>      datanodes  = cluster.GetDataNodes();
                DistributedFileSystem fs         = cluster.GetFileSystem();
                // trigger heartbeats in case not already sent
                TriggerHeartbeats(datanodes);
                // check that all nodes are live and in service
                int expectedTotalLoad = nodes;
                // xceiver server adds 1 to load
                int expectedInServiceNodes = nodes;
                int expectedInServiceLoad  = nodes;
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // shutdown half the nodes and force a heartbeat check to ensure
                // counts are accurate
                for (int i = 0; i < nodes / 2; i++)
                {
                    DataNode           dn  = datanodes[i];
                    DatanodeDescriptor dnd = dnm.GetDatanode(dn.GetDatanodeId());
                    dn.Shutdown();
                    DFSTestUtil.SetDatanodeDead(dnd);
                    BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager());
                    //Verify decommission of dead node won't impact nodesInService metrics.
                    dnm.GetDecomManager().StartDecommission(dnd);
                    expectedInServiceNodes--;
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, namesystem.GetNumLiveDataNodes
                                                        ());
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                    //Verify recommission of dead node won't impact nodesInService metrics.
                    dnm.GetDecomManager().StopDecommission(dnd);
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                }
                // restart the nodes to verify that counts are correct after
                // node re-registration
                cluster.RestartDataNodes();
                cluster.WaitActive();
                datanodes = cluster.GetDataNodes();
                expectedInServiceNodes = nodes;
                NUnit.Framework.Assert.AreEqual(nodes, datanodes.Count);
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // create streams and hsync to force datastreamers to start
                DFSOutputStream[] streams = new DFSOutputStream[fileCount];
                for (int i_1 = 0; i_1 < fileCount; i_1++)
                {
                    streams[i_1] = (DFSOutputStream)fs.Create(new Path("/f" + i_1), fileRepl).GetWrappedStream
                                       ();
                    streams[i_1].Write(Sharpen.Runtime.GetBytesForString("1"));
                    streams[i_1].Hsync();
                    // the load for writers is 2 because both the write xceiver & packet
                    // responder threads are counted in the load
                    expectedTotalLoad     += 2 * fileRepl;
                    expectedInServiceLoad += 2 * fileRepl;
                }
                // force nodes to send load update
                TriggerHeartbeats(datanodes);
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // decomm a few nodes, substract their load from the expected load,
                // trigger heartbeat to force load update
                for (int i_2 = 0; i_2 < fileRepl; i_2++)
                {
                    expectedInServiceNodes--;
                    DatanodeDescriptor dnd = dnm.GetDatanode(datanodes[i_2].GetDatanodeId());
                    expectedInServiceLoad -= dnd.GetXceiverCount();
                    dnm.GetDecomManager().StartDecommission(dnd);
                    DataNodeTestUtils.TriggerHeartbeat(datanodes[i_2]);
                    Sharpen.Thread.Sleep(100);
                    CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                       expectedInServiceLoad);
                }
                // check expected load while closing each stream.  recalc expected
                // load based on whether the nodes in the pipeline are decomm
                for (int i_3 = 0; i_3 < fileCount; i_3++)
                {
                    int decomm = 0;
                    foreach (DatanodeInfo dni in streams[i_3].GetPipeline())
                    {
                        DatanodeDescriptor dnd = dnm.GetDatanode(dni);
                        expectedTotalLoad -= 2;
                        if (dnd.IsDecommissionInProgress() || dnd.IsDecommissioned())
                        {
                            decomm++;
                        }
                        else
                        {
                            expectedInServiceLoad -= 2;
                        }
                    }
                    try
                    {
                        streams[i_3].Close();
                    }
                    catch (IOException ioe)
                    {
                        // nodes will go decommissioned even if there's a UC block whose
                        // other locations are decommissioned too.  we'll ignore that
                        // bug for now
                        if (decomm < fileRepl)
                        {
                            throw;
                        }
                    }
                    TriggerHeartbeats(datanodes);
                    // verify node count and loads
                    CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                       expectedInServiceLoad);
                }
                // shutdown each node, verify node counts based on decomm state
                for (int i_4 = 0; i_4 < nodes; i_4++)
                {
                    DataNode dn = datanodes[i_4];
                    dn.Shutdown();
                    // force it to appear dead so live count decreases
                    DatanodeDescriptor dnDesc = dnm.GetDatanode(dn.GetDatanodeId());
                    DFSTestUtil.SetDatanodeDead(dnDesc);
                    BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager());
                    NUnit.Framework.Assert.AreEqual(nodes - 1 - i_4, namesystem.GetNumLiveDataNodes()
                                                    );
                    // first few nodes are already out of service
                    if (i_4 >= fileRepl)
                    {
                        expectedInServiceNodes--;
                    }
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                    // live nodes always report load of 1.  no nodes is load 0
                    double expectedXceiverAvg = (i_4 == nodes - 1) ? 0.0 : 1.0;
                    NUnit.Framework.Assert.AreEqual((double)expectedXceiverAvg, GetInServiceXceiverAverage
                                                        (namesystem), Epsilon);
                }
                // final sanity check
                CheckClusterHealth(0, namesystem, 0.0, 0, 0.0);
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }