Exemplo n.º 1
0
 public virtual void TestStaleNodes()
 {
     // Set two datanodes as stale
     for (int i = 0; i < 2; i++)
     {
         DataNode dn = cluster.GetDataNodes()[i];
         DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true);
         long staleInterval = Conf.GetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey
                                           , DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalDefault);
         DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager(
             ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId());
         DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, -(staleInterval + 1));
     }
     // Let HeartbeatManager to check heartbeat
     BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager
                                             ());
     MetricsAsserts.AssertGauge("StaleDataNodes", 2, MetricsAsserts.GetMetrics(NsMetrics
                                                                               ));
     // Reset stale datanodes
     for (int i_1 = 0; i_1 < 2; i_1++)
     {
         DataNode dn = cluster.GetDataNodes()[i_1];
         DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, false);
         DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager(
             ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId());
         DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, 0);
     }
     // Let HeartbeatManager to refresh
     BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager
                                             ());
     MetricsAsserts.AssertGauge("StaleDataNodes", 0, MetricsAsserts.GetMetrics(NsMetrics
                                                                               ));
 }
        public virtual void TestInitializeBlockRecovery()
        {
            DatanodeStorageInfo s1  = DFSTestUtil.CreateDatanodeStorageInfo("10.10.1.1", "s1");
            DatanodeDescriptor  dd1 = s1.GetDatanodeDescriptor();
            DatanodeStorageInfo s2  = DFSTestUtil.CreateDatanodeStorageInfo("10.10.1.2", "s2");
            DatanodeDescriptor  dd2 = s2.GetDatanodeDescriptor();
            DatanodeStorageInfo s3  = DFSTestUtil.CreateDatanodeStorageInfo("10.10.1.3", "s3");
            DatanodeDescriptor  dd3 = s3.GetDatanodeDescriptor();

            dd1.isAlive = dd2.isAlive = dd3.isAlive = true;
            BlockInfoContiguousUnderConstruction blockInfo = new BlockInfoContiguousUnderConstruction
                                                                 (new Block(0, 0, GenerationStamp.LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState
                                                                 .UnderConstruction, new DatanodeStorageInfo[] { s1, s2, s3 });

            // Recovery attempt #1.
            DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -3 * 1000);
            DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -1 * 1000);
            DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -2 * 1000);
            blockInfo.InitializeBlockRecovery(1);
            BlockInfoContiguousUnderConstruction[] blockInfoRecovery = dd2.GetLeaseRecoveryCommand
                                                                           (1);
            NUnit.Framework.Assert.AreEqual(blockInfoRecovery[0], blockInfo);
            // Recovery attempt #2.
            DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -2 * 1000);
            DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -1 * 1000);
            DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -3 * 1000);
            blockInfo.InitializeBlockRecovery(2);
            blockInfoRecovery = dd1.GetLeaseRecoveryCommand(1);
            NUnit.Framework.Assert.AreEqual(blockInfoRecovery[0], blockInfo);
            // Recovery attempt #3.
            DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -2 * 1000);
            DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -1 * 1000);
            DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -3 * 1000);
            blockInfo.InitializeBlockRecovery(3);
            blockInfoRecovery = dd3.GetLeaseRecoveryCommand(1);
            NUnit.Framework.Assert.AreEqual(blockInfoRecovery[0], blockInfo);
            // Recovery attempt #4.
            // Reset everything. And again pick DN with most recent heart beat.
            DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -2 * 1000);
            DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -1 * 1000);
            DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0);
            blockInfo.InitializeBlockRecovery(3);
            blockInfoRecovery = dd3.GetLeaseRecoveryCommand(1);
            NUnit.Framework.Assert.AreEqual(blockInfoRecovery[0], blockInfo);
        }
Exemplo n.º 3
0
        public virtual void TestHeartbeatBlockRecovery()
        {
            Configuration  conf    = new HdfsConfiguration();
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                FSNamesystem     namesystem = cluster.GetNamesystem();
                HeartbeatManager hm         = namesystem.GetBlockManager().GetDatanodeManager().GetHeartbeatManager
                                                  ();
                string poolId = namesystem.GetBlockPoolId();
                DatanodeRegistration nodeReg1 = DataNodeTestUtils.GetDNRegistrationForBP(cluster.
                                                                                         GetDataNodes()[0], poolId);
                DatanodeDescriptor dd1 = NameNodeAdapter.GetDatanode(namesystem, nodeReg1);
                dd1.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid()));
                DatanodeRegistration nodeReg2 = DataNodeTestUtils.GetDNRegistrationForBP(cluster.
                                                                                         GetDataNodes()[1], poolId);
                DatanodeDescriptor dd2 = NameNodeAdapter.GetDatanode(namesystem, nodeReg2);
                dd2.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid()));
                DatanodeRegistration nodeReg3 = DataNodeTestUtils.GetDNRegistrationForBP(cluster.
                                                                                         GetDataNodes()[2], poolId);
                DatanodeDescriptor dd3 = NameNodeAdapter.GetDatanode(namesystem, nodeReg3);
                dd3.UpdateStorage(new DatanodeStorage(DatanodeStorage.GenerateUuid()));
                try
                {
                    namesystem.WriteLock();
                    lock (hm)
                    {
                        NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem);
                        NameNodeAdapter.SendHeartBeat(nodeReg2, dd2, namesystem);
                        NameNodeAdapter.SendHeartBeat(nodeReg3, dd3, namesystem);
                        // Test with all alive nodes.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd1, 0);
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd2, 0);
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0);
                        DatanodeStorageInfo[] storages = new DatanodeStorageInfo[] { dd1.GetStorageInfos(
                                                                                         )[0], dd2.GetStorageInfos()[0], dd3.GetStorageInfos()[0] };
                        BlockInfoContiguousUnderConstruction blockInfo = new BlockInfoContiguousUnderConstruction
                                                                             (new Block(0, 0, GenerationStamp.LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState
                                                                             .UnderRecovery, storages);
                        dd1.AddBlockToBeRecovered(blockInfo);
                        DatanodeCommand[] cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem)
                                                 .GetCommands();
                        NUnit.Framework.Assert.AreEqual(1, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction
                                                            ());
                        BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand)cmds[0];
                        NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count);
                        DatanodeInfo[] recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks
                                                                                         (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations();
                        NUnit.Framework.Assert.AreEqual(3, recoveringNodes.Length);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd2);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[2], dd3);
                        // Test with one stale node.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd1, 0);
                        // More than the default stale interval of 30 seconds.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -40 * 1000);
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd3, 0);
                        blockInfo = new BlockInfoContiguousUnderConstruction(new Block(0, 0, GenerationStamp
                                                                                       .LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState.UnderRecovery, storages
                                                                             );
                        dd1.AddBlockToBeRecovered(blockInfo);
                        cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem).GetCommands();
                        NUnit.Framework.Assert.AreEqual(1, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction
                                                            ());
                        recoveryCommand = (BlockRecoveryCommand)cmds[0];
                        NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count);
                        recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks
                                                                          (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations();
                        NUnit.Framework.Assert.AreEqual(2, recoveringNodes.Length);
                        // dd2 is skipped.
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd3);
                        // Test with all stale node.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd1, -60 * 1000);
                        // More than the default stale interval of 30 seconds.
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd2, -40 * 1000);
                        DFSTestUtil.ResetLastUpdatesWithOffset(dd3, -80 * 1000);
                        blockInfo = new BlockInfoContiguousUnderConstruction(new Block(0, 0, GenerationStamp
                                                                                       .LastReservedStamp), (short)3, HdfsServerConstants.BlockUCState.UnderRecovery, storages
                                                                             );
                        dd1.AddBlockToBeRecovered(blockInfo);
                        cmds = NameNodeAdapter.SendHeartBeat(nodeReg1, dd1, namesystem).GetCommands();
                        NUnit.Framework.Assert.AreEqual(1, cmds.Length);
                        NUnit.Framework.Assert.AreEqual(DatanodeProtocol.DnaRecoverblock, cmds[0].GetAction
                                                            ());
                        recoveryCommand = (BlockRecoveryCommand)cmds[0];
                        NUnit.Framework.Assert.AreEqual(1, recoveryCommand.GetRecoveringBlocks().Count);
                        recoveringNodes = Sharpen.Collections.ToArray(recoveryCommand.GetRecoveringBlocks
                                                                          (), new BlockRecoveryCommand.RecoveringBlock[0])[0].GetLocations();
                        // Only dd1 is included since it heart beated and hence its not stale
                        // when the list of recovery blocks is constructed.
                        NUnit.Framework.Assert.AreEqual(3, recoveringNodes.Length);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[0], dd1);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[1], dd2);
                        NUnit.Framework.Assert.AreEqual(recoveringNodes[2], dd3);
                    }
                }
                finally
                {
                    namesystem.WriteUnlock();
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }
Exemplo n.º 4
0
        public virtual void TestReadSelectNonStaleDatanode()
        {
            HdfsConfiguration conf = new HdfsConfiguration();

            conf.SetBoolean(DFSConfigKeys.DfsNamenodeAvoidStaleDatanodeForReadKey, true);
            long staleInterval = 30 * 1000 * 60;

            conf.SetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey, staleInterval);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes
                                                                                   ).Racks(racks).Build();

            cluster.WaitActive();
            IPEndPoint addr   = new IPEndPoint("localhost", cluster.GetNameNodePort());
            DFSClient  client = new DFSClient(addr, conf);
            IList <DatanodeDescriptor> nodeInfoList = cluster.GetNameNode().GetNamesystem().GetBlockManager
                                                          ().GetDatanodeManager().GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                                                           .Live);

            NUnit.Framework.Assert.AreEqual("Unexpected number of datanodes", numDatanodes, nodeInfoList
                                            .Count);
            FileSystem         fileSys = cluster.GetFileSystem();
            FSDataOutputStream stm     = null;

            try
            {
                // do the writing but do not close the FSDataOutputStream
                // in order to mimic the ongoing writing
                Path fileName = new Path("/file1");
                stm = fileSys.Create(fileName, true, fileSys.GetConf().GetInt(CommonConfigurationKeys
                                                                              .IoFileBufferSizeKey, 4096), (short)3, blockSize);
                stm.Write(new byte[(blockSize * 3) / 2]);
                // We do not close the stream so that
                // the writing seems to be still ongoing
                stm.Hflush();
                LocatedBlocks blocks = client.GetNamenode().GetBlockLocations(fileName.ToString()
                                                                              , 0, blockSize);
                DatanodeInfo[] nodes = blocks.Get(0).GetLocations();
                NUnit.Framework.Assert.AreEqual(nodes.Length, 3);
                DataNode           staleNode     = null;
                DatanodeDescriptor staleNodeInfo = null;
                // stop the heartbeat of the first node
                staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName());
                NUnit.Framework.Assert.IsNotNull(staleNode);
                // set the first node as stale
                staleNodeInfo = cluster.GetNameNode().GetNamesystem().GetBlockManager().GetDatanodeManager
                                    ().GetDatanode(staleNode.GetDatanodeId());
                DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, -(staleInterval + 1));
                LocatedBlocks blocksAfterStale = client.GetNamenode().GetBlockLocations(fileName.
                                                                                        ToString(), 0, blockSize);
                DatanodeInfo[] nodesAfterStale = blocksAfterStale.Get(0).GetLocations();
                NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3);
                NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName
                                                    ());
                // restart the staleNode's heartbeat
                DataNodeTestUtils.SetHeartbeatsDisabledForTests(staleNode, false);
                // reset the first node as non-stale, so as to avoid two stale nodes
                DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, 0);
                LocatedBlock lastBlock = client.GetLocatedBlocks(fileName.ToString(), 0, long.MaxValue
                                                                 ).GetLastLocatedBlock();
                nodes = lastBlock.GetLocations();
                NUnit.Framework.Assert.AreEqual(nodes.Length, 3);
                // stop the heartbeat of the first node for the last block
                staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName());
                NUnit.Framework.Assert.IsNotNull(staleNode);
                // set the node as stale
                DatanodeDescriptor dnDesc = cluster.GetNameNode().GetNamesystem().GetBlockManager
                                                ().GetDatanodeManager().GetDatanode(staleNode.GetDatanodeId());
                DFSTestUtil.ResetLastUpdatesWithOffset(dnDesc, -(staleInterval + 1));
                LocatedBlock lastBlockAfterStale = client.GetLocatedBlocks(fileName.ToString(), 0
                                                                           , long.MaxValue).GetLastLocatedBlock();
                nodesAfterStale = lastBlockAfterStale.GetLocations();
                NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3);
                NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName
                                                    ());
            }
            finally
            {
                if (stm != null)
                {
                    stm.Close();
                }
                client.Close();
                cluster.Shutdown();
            }
        }