Esempio n. 1
0
 public virtual void TestStaleNodes()
 {
     // Set two datanodes as stale
     for (int i = 0; i < 2; i++)
     {
         DataNode dn = cluster.GetDataNodes()[i];
         DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true);
         long staleInterval = Conf.GetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey
                                           , DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalDefault);
         DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager(
             ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId());
         DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, -(staleInterval + 1));
     }
     // Let HeartbeatManager to check heartbeat
     BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager
                                             ());
     MetricsAsserts.AssertGauge("StaleDataNodes", 2, MetricsAsserts.GetMetrics(NsMetrics
                                                                               ));
     // Reset stale datanodes
     for (int i_1 = 0; i_1 < 2; i_1++)
     {
         DataNode dn = cluster.GetDataNodes()[i_1];
         DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, false);
         DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager(
             ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId());
         DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, 0);
     }
     // Let HeartbeatManager to refresh
     BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager
                                             ());
     MetricsAsserts.AssertGauge("StaleDataNodes", 0, MetricsAsserts.GetMetrics(NsMetrics
                                                                               ));
 }
Esempio n. 2
0
 /// <summary>Stop the heartbeat of a datanode in the MiniDFSCluster</summary>
 /// <param name="cluster">The MiniDFSCluster</param>
 /// <param name="hostName">The hostName of the datanode to be stopped</param>
 /// <returns>The DataNode whose heartbeat has been stopped</returns>
 private DataNode StopDataNodeHeartbeat(MiniDFSCluster cluster, string hostName)
 {
     foreach (DataNode dn in cluster.GetDataNodes())
     {
         if (dn.GetDatanodeId().GetHostName().Equals(hostName))
         {
             DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true);
             return(dn);
         }
     }
     return(null);
 }
        public virtual void TestPendingAndInvalidate()
        {
            Configuration Conf = new HdfsConfiguration();

            Conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, 1024);
            Conf.SetLong(DFSConfigKeys.DfsHeartbeatIntervalKey, DfsReplicationInterval);
            Conf.SetInt(DFSConfigKeys.DfsNamenodeReplicationIntervalKey, DfsReplicationInterval
                        );
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(Conf).NumDataNodes(DatanodeCount
                                                                                   ).Build();

            cluster.WaitActive();
            FSNamesystem          namesystem = cluster.GetNamesystem();
            BlockManager          bm         = namesystem.GetBlockManager();
            DistributedFileSystem fs         = cluster.GetFileSystem();

            try
            {
                // 1. create a file
                Path filePath = new Path("/tmp.txt");
                DFSTestUtil.CreateFile(fs, filePath, 1024, (short)3, 0L);
                // 2. disable the heartbeats
                foreach (DataNode dn in cluster.GetDataNodes())
                {
                    DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true);
                }
                // 3. mark a couple of blocks as corrupt
                LocatedBlock block = NameNodeAdapter.GetBlockLocations(cluster.GetNameNode(), filePath
                                                                       .ToString(), 0, 1).Get(0);
                cluster.GetNamesystem().WriteLock();
                try
                {
                    bm.FindAndMarkBlockAsCorrupt(block.GetBlock(), block.GetLocations()[0], "STORAGE_ID"
                                                 , "TEST");
                    bm.FindAndMarkBlockAsCorrupt(block.GetBlock(), block.GetLocations()[1], "STORAGE_ID"
                                                 , "TEST");
                }
                finally
                {
                    cluster.GetNamesystem().WriteUnlock();
                }
                BlockManagerTestUtil.ComputeAllPendingWork(bm);
                BlockManagerTestUtil.UpdateState(bm);
                NUnit.Framework.Assert.AreEqual(bm.GetPendingReplicationBlocksCount(), 1L);
                NUnit.Framework.Assert.AreEqual(bm.pendingReplications.GetNumReplicas(block.GetBlock
                                                                                          ().GetLocalBlock()), 2);
                // 4. delete the file
                fs.Delete(filePath, true);
                // retry at most 10 times, each time sleep for 1s. Note that 10s is much
                // less than the default pending record timeout (5~10min)
                int  retries    = 10;
                long pendingNum = bm.GetPendingReplicationBlocksCount();
                while (pendingNum != 0 && retries-- > 0)
                {
                    Sharpen.Thread.Sleep(1000);
                    // let NN do the deletion
                    BlockManagerTestUtil.UpdateState(bm);
                    pendingNum = bm.GetPendingReplicationBlocksCount();
                }
                NUnit.Framework.Assert.AreEqual(pendingNum, 0L);
            }
            finally
            {
                cluster.Shutdown();
            }
        }
        public virtual void TestBlockReceived()
        {
            Configuration conf = new HdfsConfiguration();

            conf.SetLong(DFSConfigKeys.DfsBlockSizeKey, 1024);
            MiniDFSCluster cluster = null;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(DatanodeCount).Build();
                cluster.WaitActive();
                DistributedFileSystem hdfs       = cluster.GetFileSystem();
                FSNamesystem          fsn        = cluster.GetNamesystem();
                BlockManager          blkManager = fsn.GetBlockManager();
                string file       = "/tmp.txt";
                Path   filePath   = new Path(file);
                short  replFactor = 1;
                DFSTestUtil.CreateFile(hdfs, filePath, 1024L, replFactor, 0);
                // temporarily stop the heartbeat
                AList <DataNode> datanodes = cluster.GetDataNodes();
                for (int i = 0; i < DatanodeCount; i++)
                {
                    DataNodeTestUtils.SetHeartbeatsDisabledForTests(datanodes[i], true);
                }
                hdfs.SetReplication(filePath, (short)DatanodeCount);
                BlockManagerTestUtil.ComputeAllPendingWork(blkManager);
                NUnit.Framework.Assert.AreEqual(1, blkManager.pendingReplications.Size());
                INodeFile fileNode = fsn.GetFSDirectory().GetINode4Write(file).AsFile();
                Block[]   blocks   = fileNode.GetBlocks();
                NUnit.Framework.Assert.AreEqual(DatanodeCount - 1, blkManager.pendingReplications
                                                .GetNumReplicas(blocks[0]));
                LocatedBlock locatedBlock = hdfs.GetClient().GetLocatedBlocks(file, 0).Get(0);
                DatanodeInfo existingDn   = (locatedBlock.GetLocations())[0];
                int          reportDnNum  = 0;
                string       poolId       = cluster.GetNamesystem().GetBlockPoolId();
                // let two datanodes (other than the one that already has the data) to
                // report to NN
                for (int i_1 = 0; i_1 < DatanodeCount && reportDnNum < 2; i_1++)
                {
                    if (!datanodes[i_1].GetDatanodeId().Equals(existingDn))
                    {
                        DatanodeRegistration           dnR    = datanodes[i_1].GetDNRegistrationForBP(poolId);
                        StorageReceivedDeletedBlocks[] report = new StorageReceivedDeletedBlocks[] { new
                                                                                                     StorageReceivedDeletedBlocks("Fake-storage-ID-Ignored", new ReceivedDeletedBlockInfo
                                                                                                                                  [] { new ReceivedDeletedBlockInfo(blocks[0], ReceivedDeletedBlockInfo.BlockStatus
                                                                                                                                                                    .ReceivedBlock, string.Empty) }) };
                        cluster.GetNameNodeRpc().BlockReceivedAndDeleted(dnR, poolId, report);
                        reportDnNum++;
                    }
                }
                NUnit.Framework.Assert.AreEqual(DatanodeCount - 3, blkManager.pendingReplications
                                                .GetNumReplicas(blocks[0]));
                // let the same datanodes report again
                for (int i_2 = 0; i_2 < DatanodeCount && reportDnNum < 2; i_2++)
                {
                    if (!datanodes[i_2].GetDatanodeId().Equals(existingDn))
                    {
                        DatanodeRegistration           dnR    = datanodes[i_2].GetDNRegistrationForBP(poolId);
                        StorageReceivedDeletedBlocks[] report = new StorageReceivedDeletedBlocks[] { new
                                                                                                     StorageReceivedDeletedBlocks("Fake-storage-ID-Ignored", new ReceivedDeletedBlockInfo
                                                                                                                                  [] { new ReceivedDeletedBlockInfo(blocks[0], ReceivedDeletedBlockInfo.BlockStatus
                                                                                                                                                                    .ReceivedBlock, string.Empty) }) };
                        cluster.GetNameNodeRpc().BlockReceivedAndDeleted(dnR, poolId, report);
                        reportDnNum++;
                    }
                }
                NUnit.Framework.Assert.AreEqual(DatanodeCount - 3, blkManager.pendingReplications
                                                .GetNumReplicas(blocks[0]));
                // re-enable heartbeat for the datanode that has data
                for (int i_3 = 0; i_3 < DatanodeCount; i_3++)
                {
                    DataNodeTestUtils.SetHeartbeatsDisabledForTests(datanodes[i_3], false);
                    DataNodeTestUtils.TriggerHeartbeat(datanodes[i_3]);
                }
                Sharpen.Thread.Sleep(5000);
                NUnit.Framework.Assert.AreEqual(0, blkManager.pendingReplications.Size());
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
Esempio n. 5
0
        public virtual void TestReadSelectNonStaleDatanode()
        {
            HdfsConfiguration conf = new HdfsConfiguration();

            conf.SetBoolean(DFSConfigKeys.DfsNamenodeAvoidStaleDatanodeForReadKey, true);
            long staleInterval = 30 * 1000 * 60;

            conf.SetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey, staleInterval);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes
                                                                                   ).Racks(racks).Build();

            cluster.WaitActive();
            IPEndPoint addr   = new IPEndPoint("localhost", cluster.GetNameNodePort());
            DFSClient  client = new DFSClient(addr, conf);
            IList <DatanodeDescriptor> nodeInfoList = cluster.GetNameNode().GetNamesystem().GetBlockManager
                                                          ().GetDatanodeManager().GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                                                           .Live);

            NUnit.Framework.Assert.AreEqual("Unexpected number of datanodes", numDatanodes, nodeInfoList
                                            .Count);
            FileSystem         fileSys = cluster.GetFileSystem();
            FSDataOutputStream stm     = null;

            try
            {
                // do the writing but do not close the FSDataOutputStream
                // in order to mimic the ongoing writing
                Path fileName = new Path("/file1");
                stm = fileSys.Create(fileName, true, fileSys.GetConf().GetInt(CommonConfigurationKeys
                                                                              .IoFileBufferSizeKey, 4096), (short)3, blockSize);
                stm.Write(new byte[(blockSize * 3) / 2]);
                // We do not close the stream so that
                // the writing seems to be still ongoing
                stm.Hflush();
                LocatedBlocks blocks = client.GetNamenode().GetBlockLocations(fileName.ToString()
                                                                              , 0, blockSize);
                DatanodeInfo[] nodes = blocks.Get(0).GetLocations();
                NUnit.Framework.Assert.AreEqual(nodes.Length, 3);
                DataNode           staleNode     = null;
                DatanodeDescriptor staleNodeInfo = null;
                // stop the heartbeat of the first node
                staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName());
                NUnit.Framework.Assert.IsNotNull(staleNode);
                // set the first node as stale
                staleNodeInfo = cluster.GetNameNode().GetNamesystem().GetBlockManager().GetDatanodeManager
                                    ().GetDatanode(staleNode.GetDatanodeId());
                DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, -(staleInterval + 1));
                LocatedBlocks blocksAfterStale = client.GetNamenode().GetBlockLocations(fileName.
                                                                                        ToString(), 0, blockSize);
                DatanodeInfo[] nodesAfterStale = blocksAfterStale.Get(0).GetLocations();
                NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3);
                NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName
                                                    ());
                // restart the staleNode's heartbeat
                DataNodeTestUtils.SetHeartbeatsDisabledForTests(staleNode, false);
                // reset the first node as non-stale, so as to avoid two stale nodes
                DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, 0);
                LocatedBlock lastBlock = client.GetLocatedBlocks(fileName.ToString(), 0, long.MaxValue
                                                                 ).GetLastLocatedBlock();
                nodes = lastBlock.GetLocations();
                NUnit.Framework.Assert.AreEqual(nodes.Length, 3);
                // stop the heartbeat of the first node for the last block
                staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName());
                NUnit.Framework.Assert.IsNotNull(staleNode);
                // set the node as stale
                DatanodeDescriptor dnDesc = cluster.GetNameNode().GetNamesystem().GetBlockManager
                                                ().GetDatanodeManager().GetDatanode(staleNode.GetDatanodeId());
                DFSTestUtil.ResetLastUpdatesWithOffset(dnDesc, -(staleInterval + 1));
                LocatedBlock lastBlockAfterStale = client.GetLocatedBlocks(fileName.ToString(), 0
                                                                           , long.MaxValue).GetLastLocatedBlock();
                nodesAfterStale = lastBlockAfterStale.GetLocations();
                NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3);
                NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName
                                                    ());
            }
            finally
            {
                if (stm != null)
                {
                    stm.Close();
                }
                client.Close();
                cluster.Shutdown();
            }
        }
Esempio n. 6
0
        /// <exception cref="System.Exception"/>
        public virtual void HardLeaseRecoveryRestartHelper(bool doRename, int size)
        {
            if (size < 0)
            {
                size = AppendTestUtil.NextInt(FileSize + 1);
            }
            //create a file
            string fileStr = "/hardLeaseRecovery";

            AppendTestUtil.Log.Info("filestr=" + fileStr);
            Path filePath          = new Path(fileStr);
            FSDataOutputStream stm = dfs.Create(filePath, true, BufSize, ReplicationNum, BlockSize
                                                );

            NUnit.Framework.Assert.IsTrue(dfs.dfs.Exists(fileStr));
            // write bytes into the file.
            AppendTestUtil.Log.Info("size=" + size);
            stm.Write(buffer, 0, size);
            string originalLeaseHolder = NameNodeAdapter.GetLeaseHolderForPath(cluster.GetNameNode
                                                                                   (), fileStr);

            NUnit.Framework.Assert.IsFalse("original lease holder should not be the NN", originalLeaseHolder
                                           .Equals(HdfsServerConstants.NamenodeLeaseHolder));
            // hflush file
            AppendTestUtil.Log.Info("hflush");
            stm.Hflush();
            // check visible length
            HdfsDataInputStream @in = (HdfsDataInputStream)dfs.Open(filePath);

            NUnit.Framework.Assert.AreEqual(size, @in.GetVisibleLength());
            @in.Close();
            if (doRename)
            {
                fileStr += ".renamed";
                Path renamedPath = new Path(fileStr);
                NUnit.Framework.Assert.IsTrue(dfs.Rename(filePath, renamedPath));
                filePath = renamedPath;
            }
            // kill the lease renewal thread
            AppendTestUtil.Log.Info("leasechecker.interruptAndJoin()");
            dfs.dfs.GetLeaseRenewer().InterruptAndJoin();
            // Make sure the DNs don't send a heartbeat for a while, so the blocks
            // won't actually get completed during lease recovery.
            foreach (DataNode dn in cluster.GetDataNodes())
            {
                DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true);
            }
            // set the hard limit to be 1 second
            cluster.SetLeasePeriod(LongLeasePeriod, ShortLeasePeriod);
            // Make sure lease recovery begins.
            Sharpen.Thread.Sleep(HdfsServerConstants.NamenodeLeaseRecheckInterval * 2);
            CheckLease(fileStr, size);
            cluster.RestartNameNode(false);
            CheckLease(fileStr, size);
            // Let the DNs send heartbeats again.
            foreach (DataNode dn_1 in cluster.GetDataNodes())
            {
                DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn_1, false);
            }
            cluster.WaitActive();
            // set the hard limit to be 1 second, to initiate lease recovery.
            cluster.SetLeasePeriod(LongLeasePeriod, ShortLeasePeriod);
            // wait for lease recovery to complete
            LocatedBlocks locatedBlocks;

            do
            {
                Sharpen.Thread.Sleep(ShortLeasePeriod);
                locatedBlocks = dfs.dfs.GetLocatedBlocks(fileStr, 0L, size);
            }while (locatedBlocks.IsUnderConstruction());
            NUnit.Framework.Assert.AreEqual(size, locatedBlocks.GetFileLength());
            // make sure that the client can't write data anymore.
            try
            {
                stm.Write('b');
                stm.Hflush();
                NUnit.Framework.Assert.Fail("Should not be able to flush after we've lost the lease"
                                            );
            }
            catch (IOException e)
            {
                Log.Info("Expceted exception on write/hflush", e);
            }
            try
            {
                stm.Close();
                NUnit.Framework.Assert.Fail("Should not be able to close after we've lost the lease"
                                            );
            }
            catch (IOException e)
            {
                Log.Info("Expected exception on close", e);
            }
            // verify data
            AppendTestUtil.Log.Info("File size is good. Now validating sizes from datanodes..."
                                    );
            AppendTestUtil.CheckFullFile(dfs, filePath, size, buffer, fileStr);
        }