Esempio n. 1
0
        public virtual void TestBlocksScheduledCounter()
        {
            cluster = new MiniDFSCluster.Builder(new HdfsConfiguration()).Build();
            cluster.WaitActive();
            fs = cluster.GetFileSystem();
            //open a file an write a few bytes:
            FSDataOutputStream @out = fs.Create(new Path("/testBlockScheduledCounter"));

            for (int i = 0; i < 1024; i++)
            {
                @out.Write(i);
            }
            // flush to make sure a block is allocated.
            @out.Hflush();
            AList <DatanodeDescriptor> dnList = new AList <DatanodeDescriptor>();
            DatanodeManager            dm     = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                                    ();

            dm.FetchDatanodes(dnList, dnList, false);
            DatanodeDescriptor dn = dnList[0];

            NUnit.Framework.Assert.AreEqual(1, dn.GetBlocksScheduled());
            // close the file and the counter should go to zero.
            @out.Close();
            NUnit.Framework.Assert.AreEqual(0, dn.GetBlocksScheduled());
        }
Esempio n. 2
0
        /// <summary>Verify the support for decommissioning a datanode that is already dead.</summary>
        /// <remarks>
        /// Verify the support for decommissioning a datanode that is already dead.
        /// Under this scenario the datanode should immediately be marked as
        /// DECOMMISSIONED
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestDecommissionDeadDN()
        {
            Logger log = Logger.GetLogger(typeof(DecommissionManager));

            log.SetLevel(Level.Debug);
            DatanodeID dnID   = cluster.GetDataNodes()[0].GetDatanodeId();
            string     dnName = dnID.GetXferAddr();

            MiniDFSCluster.DataNodeProperties stoppedDN = cluster.StopDataNode(0);
            DFSTestUtil.WaitForDatanodeState(cluster, dnID.GetDatanodeUuid(), false, 30000);
            FSNamesystem       fsn          = cluster.GetNamesystem();
            DatanodeManager    dm           = fsn.GetBlockManager().GetDatanodeManager();
            DatanodeDescriptor dnDescriptor = dm.GetDatanode(dnID);

            DecommissionNode(fsn, localFileSys, dnName);
            dm.RefreshNodes(conf);
            BlockManagerTestUtil.RecheckDecommissionState(dm);
            NUnit.Framework.Assert.IsTrue(dnDescriptor.IsDecommissioned());
            // Add the node back
            cluster.RestartDataNode(stoppedDN, true);
            cluster.WaitActive();
            // Call refreshNodes on FSNamesystem with empty exclude file to remove the
            // datanode from decommissioning list and make it available again.
            WriteConfigFile(localFileSys, excludeFile, null);
            dm.RefreshNodes(conf);
        }
Esempio n. 3
0
        /// <summary>
        /// Checks NameNode tracking of a particular DataNode for correct reporting of
        /// failed volumes.
        /// </summary>
        /// <param name="dm">DatanodeManager to check</param>
        /// <param name="dn">DataNode to check</param>
        /// <param name="expectCapacityKnown">
        /// if true, then expect that the capacities of the
        /// volumes were known before the failures, and therefore the lost capacity
        /// can be reported
        /// </param>
        /// <param name="expectedFailedVolumes">expected locations of failed volumes</param>
        /// <exception cref="System.Exception">if there is any failure</exception>
        private void CheckFailuresAtNameNode(DatanodeManager dm, DataNode dn, bool expectCapacityKnown
                                             , params string[] expectedFailedVolumes)
        {
            DatanodeDescriptor dd = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                        ().GetDatanode(dn.GetDatanodeId());

            NUnit.Framework.Assert.AreEqual(expectedFailedVolumes.Length, dd.GetVolumeFailures
                                                ());
            VolumeFailureSummary volumeFailureSummary = dd.GetVolumeFailureSummary();

            if (expectedFailedVolumes.Length > 0)
            {
                Assert.AssertArrayEquals(expectedFailedVolumes, volumeFailureSummary.GetFailedStorageLocations
                                             ());
                NUnit.Framework.Assert.IsTrue(volumeFailureSummary.GetLastVolumeFailureDate() > 0
                                              );
                long expectedCapacityLost = GetExpectedCapacityLost(expectCapacityKnown, expectedFailedVolumes
                                                                    .Length);
                NUnit.Framework.Assert.AreEqual(expectedCapacityLost, volumeFailureSummary.GetEstimatedCapacityLostTotal
                                                    ());
            }
            else
            {
                NUnit.Framework.Assert.IsNull(volumeFailureSummary);
            }
        }
Esempio n. 4
0
 public virtual void TestStaleNodes()
 {
     // Set two datanodes as stale
     for (int i = 0; i < 2; i++)
     {
         DataNode dn = cluster.GetDataNodes()[i];
         DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, true);
         long staleInterval = Conf.GetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey
                                           , DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalDefault);
         DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager(
             ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId());
         DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, -(staleInterval + 1));
     }
     // Let HeartbeatManager to check heartbeat
     BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager
                                             ());
     MetricsAsserts.AssertGauge("StaleDataNodes", 2, MetricsAsserts.GetMetrics(NsMetrics
                                                                               ));
     // Reset stale datanodes
     for (int i_1 = 0; i_1 < 2; i_1++)
     {
         DataNode dn = cluster.GetDataNodes()[i_1];
         DataNodeTestUtils.SetHeartbeatsDisabledForTests(dn, false);
         DatanodeDescriptor dnDes = cluster.GetNameNode().GetNamesystem().GetBlockManager(
             ).GetDatanodeManager().GetDatanode(dn.GetDatanodeId());
         DFSTestUtil.ResetLastUpdatesWithOffset(dnDes, 0);
     }
     // Let HeartbeatManager to refresh
     BlockManagerTestUtil.CheckHeartbeat(cluster.GetNameNode().GetNamesystem().GetBlockManager
                                             ());
     MetricsAsserts.AssertGauge("StaleDataNodes", 0, MetricsAsserts.GetMetrics(NsMetrics
                                                                               ));
 }
Esempio n. 5
0
 private void CheckDecommissionStatus(DatanodeDescriptor decommNode, int expectedUnderRep
                                      , int expectedDecommissionOnly, int expectedUnderRepInOpenFiles)
 {
     NUnit.Framework.Assert.AreEqual("Unexpected num under-replicated blocks", expectedUnderRep
                                     , decommNode.decommissioningStatus.GetUnderReplicatedBlocks());
     NUnit.Framework.Assert.AreEqual("Unexpected number of decom-only replicas", expectedDecommissionOnly
                                     , decommNode.decommissioningStatus.GetDecommissionOnlyReplicas());
     NUnit.Framework.Assert.AreEqual("Unexpected number of replicas in under-replicated open files"
                                     , expectedUnderRepInOpenFiles, decommNode.decommissioningStatus.GetUnderReplicatedInOpenFiles
                                         ());
 }
Esempio n. 6
0
        public virtual void TestContains()
        {
            DatanodeDescriptor nodeNotInMap = DFSTestUtil.GetDatanodeDescriptor("8.8.8.8", "/d2/r4"
                                                                                );

            for (int i = 0; i < dataNodes.Length; i++)
            {
                NUnit.Framework.Assert.IsTrue(cluster.Contains(dataNodes[i]));
            }
            NUnit.Framework.Assert.IsFalse(cluster.Contains(nodeNotInMap));
        }
Esempio n. 7
0
        public virtual void TestCreateInvalidTopology()
        {
            NetworkTopology invalCluster = new NetworkTopology();

            DatanodeDescriptor[] invalDataNodes = new DatanodeDescriptor[] { DFSTestUtil.GetDatanodeDescriptor
                                                                                 ("1.1.1.1", "/d1/r1"), DFSTestUtil.GetDatanodeDescriptor("2.2.2.2", "/d1/r1"), DFSTestUtil
                                                                             .GetDatanodeDescriptor("3.3.3.3", "/d1") };
            invalCluster.Add(invalDataNodes[0]);
            invalCluster.Add(invalDataNodes[1]);
            try
            {
                invalCluster.Add(invalDataNodes[2]);
                NUnit.Framework.Assert.Fail("expected InvalidTopologyException");
            }
            catch (NetworkTopology.InvalidTopologyException e)
            {
                NUnit.Framework.Assert.IsTrue(e.Message.StartsWith("Failed to add "));
                NUnit.Framework.Assert.IsTrue(e.Message.Contains("You cannot have a rack and a non-rack node at the same "
                                                                 + "level of the network topology."));
            }
        }
Esempio n. 8
0
 /// <exception cref="System.IO.IOException"/>
 public static HeartbeatResponse SendHeartBeat(DatanodeRegistration nodeReg, DatanodeDescriptor
                                               dd, FSNamesystem namesystem)
 {
     return(namesystem.HandleHeartbeat(nodeReg, BlockManagerTestUtil.GetStorageReportsForDatanode
                                           (dd), dd.GetCacheCapacity(), dd.GetCacheRemaining(), 0, 0, 0, null));
 }
        /// <summary>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed.
        /// </summary>
        /// <remarks>
        /// Test the scenario where the NN fails over after issuing a block
        /// synchronization request, but before it is committed. The
        /// DN running the recovery should then fail to commit the synchronization
        /// and a later retry will succeed.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestFailoverRightBeforeCommitSynchronization()
        {
            Configuration conf = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm     = null;
            MiniDFSCluster     cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology
                                                                                     .SimpleHATopology()).NumDataNodes(3).Build();

            try
            {
                cluster.WaitActive();
                cluster.TransitionToActive(0);
                Sharpen.Thread.Sleep(500);
                Log.Info("Starting with NN 0 active");
                FileSystem fs = HATestUtil.ConfigureFailoverFs(cluster, conf);
                stm = fs.Create(TestPath);
                // write a half block
                AppendTestUtil.Write(stm, 0, BlockSize / 2);
                stm.Hflush();
                // Look into the block manager on the active node for the block
                // under construction.
                NameNode           nn0             = cluster.GetNameNode(0);
                ExtendedBlock      blk             = DFSTestUtil.GetFirstBlock(fs, TestPath);
                DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn0, blk);
                Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary);
                // Find the corresponding DN daemon, and spy on its connection to the
                // active.
                DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort());
                DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.SpyOnBposToNN(primaryDN
                                                                                               , nn0);
                // Delay the commitBlockSynchronization call
                GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log);
                Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito
                                                                                             .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito
                                                                                             .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject
                                                                                                 (), (string[])Org.Mockito.Mockito.AnyObject());
                // new genstamp
                // new length
                // close file
                // delete block
                // new targets
                // new target storages
                DistributedFileSystem fsOtherUser = CreateFsAsOtherUser(cluster, conf);
                NUnit.Framework.Assert.IsFalse(fsOtherUser.RecoverLease(TestPath));
                Log.Info("Waiting for commitBlockSynchronization call from primary");
                delayer.WaitForCall();
                Log.Info("Failing over to NN 1");
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                // Let the commitBlockSynchronization call go through, and check that
                // it failed with the correct exception.
                delayer.Proceed();
                delayer.WaitForResult();
                Exception t = delayer.GetThrown();
                if (t == null)
                {
                    NUnit.Framework.Assert.Fail("commitBlockSynchronization call did not fail on standby"
                                                );
                }
                GenericTestUtils.AssertExceptionContains("Operation category WRITE is not supported"
                                                         , t);
                // Now, if we try again to recover the block, it should succeed on the new
                // active.
                LoopRecoverLease(fsOtherUser, TestPath);
                AppendTestUtil.Check(fs, TestPath, BlockSize / 2);
            }
            finally
            {
                IOUtils.CloseStream(stm);
                cluster.Shutdown();
            }
        }
Esempio n. 10
0
        public virtual void TestDeletedBlockWhenAddBlockIsInEdit()
        {
            Configuration conf = new HdfsConfiguration();

            cluster = new MiniDFSCluster.Builder(conf).NnTopology(MiniDFSNNTopology.SimpleHATopology
                                                                      ()).NumDataNodes(1).Build();
            DFSClient client = null;

            try
            {
                cluster.WaitActive();
                NUnit.Framework.Assert.AreEqual("Number of namenodes is not 2", 2, cluster.GetNumNameNodes
                                                    ());
                // Transitioning the namenode 0 to active.
                cluster.TransitionToActive(0);
                NUnit.Framework.Assert.IsTrue("Namenode 0 should be in active state", cluster.GetNameNode
                                                  (0).IsActiveState());
                NUnit.Framework.Assert.IsTrue("Namenode 1 should be in standby state", cluster.GetNameNode
                                                  (1).IsStandbyState());
                // Trigger heartbeat to mark DatanodeStorageInfo#heartbeatedSinceFailover
                // to true.
                DataNodeTestUtils.TriggerHeartbeat(cluster.GetDataNodes()[0]);
                FileSystem fs = cluster.GetFileSystem(0);
                // Trigger blockReport to mark DatanodeStorageInfo#blockContentsStale
                // to false.
                cluster.GetDataNodes()[0].TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental
                                                                 (false).Build());
                Path fileName = new Path("/tmp.txt");
                // create a file with one block
                DFSTestUtil.CreateFile(fs, fileName, 10L, (short)1, 1234L);
                DFSTestUtil.WaitReplication(fs, fileName, (short)1);
                client = new DFSClient(cluster.GetFileSystem(0).GetUri(), conf);
                IList <LocatedBlock> locatedBlocks = client.GetNamenode().GetBlockLocations("/tmp.txt"
                                                                                            , 0, 10L).GetLocatedBlocks();
                NUnit.Framework.Assert.IsTrue(locatedBlocks.Count == 1);
                NUnit.Framework.Assert.IsTrue(locatedBlocks[0].GetLocations().Length == 1);
                // add a second datanode to the cluster
                cluster.StartDataNodes(conf, 1, true, null, null, null, null);
                NUnit.Framework.Assert.AreEqual("Number of datanodes should be 2", 2, cluster.GetDataNodes
                                                    ().Count);
                DataNode           dn0          = cluster.GetDataNodes()[0];
                DataNode           dn1          = cluster.GetDataNodes()[1];
                string             activeNNBPId = cluster.GetNamesystem(0).GetBlockPoolId();
                DatanodeDescriptor sourceDnDesc = NameNodeAdapter.GetDatanode(cluster.GetNamesystem
                                                                                  (0), dn0.GetDNRegistrationForBP(activeNNBPId));
                DatanodeDescriptor destDnDesc = NameNodeAdapter.GetDatanode(cluster.GetNamesystem
                                                                                (0), dn1.GetDNRegistrationForBP(activeNNBPId));
                ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, fileName);
                Log.Info("replaceBlock:  " + ReplaceBlock(block, (DatanodeInfo)sourceDnDesc, (DatanodeInfo
                                                                                              )sourceDnDesc, (DatanodeInfo)destDnDesc));
                // Waiting for the FsDatasetAsyncDsikService to delete the block
                Sharpen.Thread.Sleep(3000);
                // Triggering the incremental block report to report the deleted block to
                // namnemode
                cluster.GetDataNodes()[0].TriggerBlockReport(new BlockReportOptions.Factory().SetIncremental
                                                                 (true).Build());
                cluster.TransitionToStandby(0);
                cluster.TransitionToActive(1);
                NUnit.Framework.Assert.IsTrue("Namenode 1 should be in active state", cluster.GetNameNode
                                                  (1).IsActiveState());
                NUnit.Framework.Assert.IsTrue("Namenode 0 should be in standby state", cluster.GetNameNode
                                                  (0).IsStandbyState());
                client.Close();
                // Opening a new client for new active  namenode
                client = new DFSClient(cluster.GetFileSystem(1).GetUri(), conf);
                IList <LocatedBlock> locatedBlocks1 = client.GetNamenode().GetBlockLocations("/tmp.txt"
                                                                                             , 0, 10L).GetLocatedBlocks();
                NUnit.Framework.Assert.AreEqual(1, locatedBlocks1.Count);
                NUnit.Framework.Assert.AreEqual("The block should be only on 1 datanode ", 1, locatedBlocks1
                                                [0].GetLocations().Length);
            }
            finally
            {
                IOUtils.Cleanup(null, client);
                cluster.Shutdown();
            }
        }
Esempio n. 11
0
        public virtual void TestReadSelectNonStaleDatanode()
        {
            HdfsConfiguration conf = new HdfsConfiguration();

            conf.SetBoolean(DFSConfigKeys.DfsNamenodeAvoidStaleDatanodeForReadKey, true);
            long staleInterval = 30 * 1000 * 60;

            conf.SetLong(DFSConfigKeys.DfsNamenodeStaleDatanodeIntervalKey, staleInterval);
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(numDatanodes
                                                                                   ).Racks(racks).Build();

            cluster.WaitActive();
            IPEndPoint addr   = new IPEndPoint("localhost", cluster.GetNameNodePort());
            DFSClient  client = new DFSClient(addr, conf);
            IList <DatanodeDescriptor> nodeInfoList = cluster.GetNameNode().GetNamesystem().GetBlockManager
                                                          ().GetDatanodeManager().GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                                                           .Live);

            NUnit.Framework.Assert.AreEqual("Unexpected number of datanodes", numDatanodes, nodeInfoList
                                            .Count);
            FileSystem         fileSys = cluster.GetFileSystem();
            FSDataOutputStream stm     = null;

            try
            {
                // do the writing but do not close the FSDataOutputStream
                // in order to mimic the ongoing writing
                Path fileName = new Path("/file1");
                stm = fileSys.Create(fileName, true, fileSys.GetConf().GetInt(CommonConfigurationKeys
                                                                              .IoFileBufferSizeKey, 4096), (short)3, blockSize);
                stm.Write(new byte[(blockSize * 3) / 2]);
                // We do not close the stream so that
                // the writing seems to be still ongoing
                stm.Hflush();
                LocatedBlocks blocks = client.GetNamenode().GetBlockLocations(fileName.ToString()
                                                                              , 0, blockSize);
                DatanodeInfo[] nodes = blocks.Get(0).GetLocations();
                NUnit.Framework.Assert.AreEqual(nodes.Length, 3);
                DataNode           staleNode     = null;
                DatanodeDescriptor staleNodeInfo = null;
                // stop the heartbeat of the first node
                staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName());
                NUnit.Framework.Assert.IsNotNull(staleNode);
                // set the first node as stale
                staleNodeInfo = cluster.GetNameNode().GetNamesystem().GetBlockManager().GetDatanodeManager
                                    ().GetDatanode(staleNode.GetDatanodeId());
                DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, -(staleInterval + 1));
                LocatedBlocks blocksAfterStale = client.GetNamenode().GetBlockLocations(fileName.
                                                                                        ToString(), 0, blockSize);
                DatanodeInfo[] nodesAfterStale = blocksAfterStale.Get(0).GetLocations();
                NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3);
                NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName
                                                    ());
                // restart the staleNode's heartbeat
                DataNodeTestUtils.SetHeartbeatsDisabledForTests(staleNode, false);
                // reset the first node as non-stale, so as to avoid two stale nodes
                DFSTestUtil.ResetLastUpdatesWithOffset(staleNodeInfo, 0);
                LocatedBlock lastBlock = client.GetLocatedBlocks(fileName.ToString(), 0, long.MaxValue
                                                                 ).GetLastLocatedBlock();
                nodes = lastBlock.GetLocations();
                NUnit.Framework.Assert.AreEqual(nodes.Length, 3);
                // stop the heartbeat of the first node for the last block
                staleNode = this.StopDataNodeHeartbeat(cluster, nodes[0].GetHostName());
                NUnit.Framework.Assert.IsNotNull(staleNode);
                // set the node as stale
                DatanodeDescriptor dnDesc = cluster.GetNameNode().GetNamesystem().GetBlockManager
                                                ().GetDatanodeManager().GetDatanode(staleNode.GetDatanodeId());
                DFSTestUtil.ResetLastUpdatesWithOffset(dnDesc, -(staleInterval + 1));
                LocatedBlock lastBlockAfterStale = client.GetLocatedBlocks(fileName.ToString(), 0
                                                                           , long.MaxValue).GetLastLocatedBlock();
                nodesAfterStale = lastBlockAfterStale.GetLocations();
                NUnit.Framework.Assert.AreEqual(nodesAfterStale.Length, 3);
                NUnit.Framework.Assert.AreEqual(nodesAfterStale[2].GetHostName(), nodes[0].GetHostName
                                                    ());
            }
            finally
            {
                if (stm != null)
                {
                    stm.Close();
                }
                client.Close();
                cluster.Shutdown();
            }
        }
Esempio n. 12
0
        public virtual void TestDecommissionStatus()
        {
            IPEndPoint addr   = new IPEndPoint("localhost", cluster.GetNameNodePort());
            DFSClient  client = new DFSClient(addr, conf);

            DatanodeInfo[] info = client.DatanodeReport(HdfsConstants.DatanodeReportType.Live
                                                        );
            NUnit.Framework.Assert.AreEqual("Number of Datanodes ", 2, info.Length);
            DistributedFileSystem fileSys = cluster.GetFileSystem();
            DFSAdmin admin    = new DFSAdmin(cluster.GetConfiguration(0));
            short    replicas = numDatanodes;
            //
            // Decommission one node. Verify the decommission status
            //
            Path file1 = new Path("decommission.dat");

            WriteFile(fileSys, file1, replicas);
            Path file2             = new Path("decommission1.dat");
            FSDataOutputStream st1 = WriteIncompleteFile(fileSys, file2, replicas);

            foreach (DataNode d in cluster.GetDataNodes())
            {
                DataNodeTestUtils.TriggerBlockReport(d);
            }
            FSNamesystem    fsn = cluster.GetNamesystem();
            DatanodeManager dm  = fsn.GetBlockManager().GetDatanodeManager();

            for (int iteration = 0; iteration < numDatanodes; iteration++)
            {
                string downnode = DecommissionNode(fsn, client, localFileSys, iteration);
                dm.RefreshNodes(conf);
                decommissionedNodes.AddItem(downnode);
                BlockManagerTestUtil.RecheckDecommissionState(dm);
                IList <DatanodeDescriptor> decommissioningNodes = dm.GetDecommissioningNodes();
                if (iteration == 0)
                {
                    NUnit.Framework.Assert.AreEqual(decommissioningNodes.Count, 1);
                    DatanodeDescriptor decommNode = decommissioningNodes[0];
                    CheckDecommissionStatus(decommNode, 3, 0, 1);
                    CheckDFSAdminDecommissionStatus(decommissioningNodes.SubList(0, 1), fileSys, admin
                                                    );
                }
                else
                {
                    NUnit.Framework.Assert.AreEqual(decommissioningNodes.Count, 2);
                    DatanodeDescriptor decommNode1 = decommissioningNodes[0];
                    DatanodeDescriptor decommNode2 = decommissioningNodes[1];
                    // This one is still 3,3,1 since it passed over the UC block
                    // earlier, before node 2 was decommed
                    CheckDecommissionStatus(decommNode1, 3, 3, 1);
                    // This one is 4,4,2 since it has the full state
                    CheckDecommissionStatus(decommNode2, 4, 4, 2);
                    CheckDFSAdminDecommissionStatus(decommissioningNodes.SubList(0, 2), fileSys, admin
                                                    );
                }
            }
            // Call refreshNodes on FSNamesystem with empty exclude file.
            // This will remove the datanodes from decommissioning list and
            // make them available again.
            WriteConfigFile(localFileSys, excludeFile, null);
            dm.RefreshNodes(conf);
            st1.Close();
            CleanupFile(fileSys, file1);
            CleanupFile(fileSys, file2);
        }
Esempio n. 13
0
        public virtual void TestSortByDistance()
        {
            DatanodeDescriptor[] testNodes = new DatanodeDescriptor[3];
            // array contains both local node & local rack node
            testNodes[0] = dataNodes[1];
            testNodes[1] = dataNodes[2];
            testNodes[2] = dataNodes[0];
            cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF)));
            cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length);
            NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[0]);
            NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[1]);
            NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[2]);
            // array contains both local node & local rack node & decommissioned node
            DatanodeDescriptor[] dtestNodes = new DatanodeDescriptor[5];
            dtestNodes[0] = dataNodes[8];
            dtestNodes[1] = dataNodes[12];
            dtestNodes[2] = dataNodes[11];
            dtestNodes[3] = dataNodes[9];
            dtestNodes[4] = dataNodes[10];
            cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF)));
            cluster.SortByDistance(dataNodes[8], dtestNodes, dtestNodes.Length - 2);
            NUnit.Framework.Assert.IsTrue(dtestNodes[0] == dataNodes[8]);
            NUnit.Framework.Assert.IsTrue(dtestNodes[1] == dataNodes[11]);
            NUnit.Framework.Assert.IsTrue(dtestNodes[2] == dataNodes[12]);
            NUnit.Framework.Assert.IsTrue(dtestNodes[3] == dataNodes[9]);
            NUnit.Framework.Assert.IsTrue(dtestNodes[4] == dataNodes[10]);
            // array contains local node
            testNodes[0] = dataNodes[1];
            testNodes[1] = dataNodes[3];
            testNodes[2] = dataNodes[0];
            cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF)));
            cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length);
            NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[0]);
            NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[1]);
            NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[3]);
            // array contains local rack node
            testNodes[0] = dataNodes[5];
            testNodes[1] = dataNodes[3];
            testNodes[2] = dataNodes[1];
            cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF)));
            cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length);
            NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[1]);
            NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[3]);
            NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[5]);
            // array contains local rack node which happens to be in position 0
            testNodes[0] = dataNodes[1];
            testNodes[1] = dataNodes[5];
            testNodes[2] = dataNodes[3];
            cluster.SetRandomSeed(unchecked ((int)(0xDEADBEEF)));
            cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length);
            NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[1]);
            NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[3]);
            NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[5]);
            // Same as previous, but with a different random seed to test randomization
            testNodes[0] = dataNodes[1];
            testNodes[1] = dataNodes[5];
            testNodes[2] = dataNodes[3];
            cluster.SetRandomSeed(unchecked ((int)(0xDEAD)));
            cluster.SortByDistance(dataNodes[0], testNodes, testNodes.Length);
            // sortByDistance does not take the "data center" layer into consideration
            // and it doesn't sort by getDistance, so 1, 5, 3 is also valid here
            NUnit.Framework.Assert.IsTrue(testNodes[0] == dataNodes[1]);
            NUnit.Framework.Assert.IsTrue(testNodes[1] == dataNodes[5]);
            NUnit.Framework.Assert.IsTrue(testNodes[2] == dataNodes[3]);
            // Array of just rack-local nodes
            // Expect a random first node
            DatanodeDescriptor first = null;
            bool foundRandom         = false;

            for (int i = 5; i <= 7; i++)
            {
                testNodes[0] = dataNodes[5];
                testNodes[1] = dataNodes[6];
                testNodes[2] = dataNodes[7];
                cluster.SortByDistance(dataNodes[i], testNodes, testNodes.Length);
                if (first == null)
                {
                    first = testNodes[0];
                }
                else
                {
                    if (first != testNodes[0])
                    {
                        foundRandom = true;
                        break;
                    }
                }
            }
            NUnit.Framework.Assert.IsTrue("Expected to find a different first location", foundRandom
                                          );
            // Array of just remote nodes
            // Expect random first node
            first = null;
            for (int i_1 = 1; i_1 <= 4; i_1++)
            {
                testNodes[0] = dataNodes[13];
                testNodes[1] = dataNodes[14];
                testNodes[2] = dataNodes[15];
                cluster.SortByDistance(dataNodes[i_1], testNodes, testNodes.Length);
                if (first == null)
                {
                    first = testNodes[0];
                }
                else
                {
                    if (first != testNodes[0])
                    {
                        foundRandom = true;
                        break;
                    }
                }
            }
            NUnit.Framework.Assert.IsTrue("Expected to find a different first location", foundRandom
                                          );
        }
Esempio n. 14
0
        /// <summary>Test race between delete operation and commitBlockSynchronization method.
        ///     </summary>
        /// <remarks>
        /// Test race between delete operation and commitBlockSynchronization method.
        /// See HDFS-6825.
        /// </remarks>
        /// <param name="hasSnapshot"/>
        /// <exception cref="System.Exception"/>
        private void TestDeleteAndCommitBlockSynchronizationRace(bool hasSnapshot)
        {
            Log.Info("Start testing, hasSnapshot: " + hasSnapshot);
            AList <AbstractMap.SimpleImmutableEntry <string, bool> > testList = new AList <AbstractMap.SimpleImmutableEntry
                                                                                           <string, bool> >();

            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file",
                                                                                 false));
            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/test-file1"
                                                                                 , true));
            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file"
                                                                                 , false));
            testList.AddItem(new AbstractMap.SimpleImmutableEntry <string, bool>("/testdir/testdir1/test-file1"
                                                                                 , true));
            Path          rootPath = new Path("/");
            Configuration conf     = new Configuration();

            // Disable permissions so that another user can recover the lease.
            conf.SetBoolean(DFSConfigKeys.DfsPermissionsEnabledKey, false);
            conf.SetInt(DFSConfigKeys.DfsBlockSizeKey, BlockSize);
            FSDataOutputStream stm = null;
            IDictionary <DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap = new Dictionary
                                                                                   <DataNode, DatanodeProtocolClientSideTranslatorPB>();

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(3).Build();
                cluster.WaitActive();
                DistributedFileSystem fs = cluster.GetFileSystem();
                int stId = 0;
                foreach (AbstractMap.SimpleImmutableEntry <string, bool> stest in testList)
                {
                    string testPath  = stest.Key;
                    bool   mkSameDir = stest.Value;
                    Log.Info("test on " + testPath + " mkSameDir: " + mkSameDir + " snapshot: " + hasSnapshot
                             );
                    Path fPath = new Path(testPath);
                    //find grandest non-root parent
                    Path grandestNonRootParent = fPath;
                    while (!grandestNonRootParent.GetParent().Equals(rootPath))
                    {
                        grandestNonRootParent = grandestNonRootParent.GetParent();
                    }
                    stm = fs.Create(fPath);
                    Log.Info("test on " + testPath + " created " + fPath);
                    // write a half block
                    AppendTestUtil.Write(stm, 0, BlockSize / 2);
                    stm.Hflush();
                    if (hasSnapshot)
                    {
                        SnapshotTestHelper.CreateSnapshot(fs, rootPath, "st" + stId.ToString());
                        ++stId;
                    }
                    // Look into the block manager on the active node for the block
                    // under construction.
                    NameNode           nn              = cluster.GetNameNode();
                    ExtendedBlock      blk             = DFSTestUtil.GetFirstBlock(fs, fPath);
                    DatanodeDescriptor expectedPrimary = DFSTestUtil.GetExpectedPrimaryNode(nn, blk);
                    Log.Info("Expecting block recovery to be triggered on DN " + expectedPrimary);
                    // Find the corresponding DN daemon, and spy on its connection to the
                    // active.
                    DataNode primaryDN = cluster.GetDataNode(expectedPrimary.GetIpcPort());
                    DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap[primaryDN];
                    if (nnSpy == null)
                    {
                        nnSpy            = DataNodeTestUtils.SpyOnBposToNN(primaryDN, nn);
                        dnMap[primaryDN] = nnSpy;
                    }
                    // Delay the commitBlockSynchronization call
                    GenericTestUtils.DelayAnswer delayer = new GenericTestUtils.DelayAnswer(Log);
                    Org.Mockito.Mockito.DoAnswer(delayer).When(nnSpy).CommitBlockSynchronization(Org.Mockito.Mockito
                                                                                                 .Eq(blk), Org.Mockito.Mockito.AnyInt(), Org.Mockito.Mockito.AnyLong(), Org.Mockito.Mockito
                                                                                                 .Eq(true), Org.Mockito.Mockito.Eq(false), (DatanodeID[])Org.Mockito.Mockito.AnyObject
                                                                                                     (), (string[])Org.Mockito.Mockito.AnyObject());
                    // new genstamp
                    // new length
                    // close file
                    // delete block
                    // new targets
                    // new target storages
                    fs.RecoverLease(fPath);
                    Log.Info("Waiting for commitBlockSynchronization call from primary");
                    delayer.WaitForCall();
                    Log.Info("Deleting recursively " + grandestNonRootParent);
                    fs.Delete(grandestNonRootParent, true);
                    if (mkSameDir && !grandestNonRootParent.ToString().Equals(testPath))
                    {
                        Log.Info("Recreate dir " + grandestNonRootParent + " testpath: " + testPath);
                        fs.Mkdirs(grandestNonRootParent);
                    }
                    delayer.Proceed();
                    Log.Info("Now wait for result");
                    delayer.WaitForResult();
                    Exception t = delayer.GetThrown();
                    if (t != null)
                    {
                        Log.Info("Result exception (snapshot: " + hasSnapshot + "): " + t);
                    }
                }
                // end of loop each fPath
                Log.Info("Now check we can restart");
                cluster.RestartNameNodes();
                Log.Info("Restart finished");
            }
            finally
            {
                if (stm != null)
                {
                    IOUtils.CloseStream(stm);
                }
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
Esempio n. 15
0
        public virtual void TestXceiverCount()
        {
            Configuration conf = new HdfsConfiguration();

            // retry one time, if close fails
            conf.SetInt(DFSConfigKeys.DfsClientBlockWriteLocatefollowingblockRetriesKey, 1);
            MiniDFSCluster cluster   = null;
            int            nodes     = 8;
            int            fileCount = 5;
            short          fileRepl  = 3;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(nodes).Build();
                cluster.WaitActive();
                FSNamesystem          namesystem = cluster.GetNamesystem();
                DatanodeManager       dnm        = namesystem.GetBlockManager().GetDatanodeManager();
                IList <DataNode>      datanodes  = cluster.GetDataNodes();
                DistributedFileSystem fs         = cluster.GetFileSystem();
                // trigger heartbeats in case not already sent
                TriggerHeartbeats(datanodes);
                // check that all nodes are live and in service
                int expectedTotalLoad = nodes;
                // xceiver server adds 1 to load
                int expectedInServiceNodes = nodes;
                int expectedInServiceLoad  = nodes;
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // shutdown half the nodes and force a heartbeat check to ensure
                // counts are accurate
                for (int i = 0; i < nodes / 2; i++)
                {
                    DataNode           dn  = datanodes[i];
                    DatanodeDescriptor dnd = dnm.GetDatanode(dn.GetDatanodeId());
                    dn.Shutdown();
                    DFSTestUtil.SetDatanodeDead(dnd);
                    BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager());
                    //Verify decommission of dead node won't impact nodesInService metrics.
                    dnm.GetDecomManager().StartDecommission(dnd);
                    expectedInServiceNodes--;
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, namesystem.GetNumLiveDataNodes
                                                        ());
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                    //Verify recommission of dead node won't impact nodesInService metrics.
                    dnm.GetDecomManager().StopDecommission(dnd);
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                }
                // restart the nodes to verify that counts are correct after
                // node re-registration
                cluster.RestartDataNodes();
                cluster.WaitActive();
                datanodes = cluster.GetDataNodes();
                expectedInServiceNodes = nodes;
                NUnit.Framework.Assert.AreEqual(nodes, datanodes.Count);
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // create streams and hsync to force datastreamers to start
                DFSOutputStream[] streams = new DFSOutputStream[fileCount];
                for (int i_1 = 0; i_1 < fileCount; i_1++)
                {
                    streams[i_1] = (DFSOutputStream)fs.Create(new Path("/f" + i_1), fileRepl).GetWrappedStream
                                       ();
                    streams[i_1].Write(Sharpen.Runtime.GetBytesForString("1"));
                    streams[i_1].Hsync();
                    // the load for writers is 2 because both the write xceiver & packet
                    // responder threads are counted in the load
                    expectedTotalLoad     += 2 * fileRepl;
                    expectedInServiceLoad += 2 * fileRepl;
                }
                // force nodes to send load update
                TriggerHeartbeats(datanodes);
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // decomm a few nodes, substract their load from the expected load,
                // trigger heartbeat to force load update
                for (int i_2 = 0; i_2 < fileRepl; i_2++)
                {
                    expectedInServiceNodes--;
                    DatanodeDescriptor dnd = dnm.GetDatanode(datanodes[i_2].GetDatanodeId());
                    expectedInServiceLoad -= dnd.GetXceiverCount();
                    dnm.GetDecomManager().StartDecommission(dnd);
                    DataNodeTestUtils.TriggerHeartbeat(datanodes[i_2]);
                    Sharpen.Thread.Sleep(100);
                    CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                       expectedInServiceLoad);
                }
                // check expected load while closing each stream.  recalc expected
                // load based on whether the nodes in the pipeline are decomm
                for (int i_3 = 0; i_3 < fileCount; i_3++)
                {
                    int decomm = 0;
                    foreach (DatanodeInfo dni in streams[i_3].GetPipeline())
                    {
                        DatanodeDescriptor dnd = dnm.GetDatanode(dni);
                        expectedTotalLoad -= 2;
                        if (dnd.IsDecommissionInProgress() || dnd.IsDecommissioned())
                        {
                            decomm++;
                        }
                        else
                        {
                            expectedInServiceLoad -= 2;
                        }
                    }
                    try
                    {
                        streams[i_3].Close();
                    }
                    catch (IOException ioe)
                    {
                        // nodes will go decommissioned even if there's a UC block whose
                        // other locations are decommissioned too.  we'll ignore that
                        // bug for now
                        if (decomm < fileRepl)
                        {
                            throw;
                        }
                    }
                    TriggerHeartbeats(datanodes);
                    // verify node count and loads
                    CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                       expectedInServiceLoad);
                }
                // shutdown each node, verify node counts based on decomm state
                for (int i_4 = 0; i_4 < nodes; i_4++)
                {
                    DataNode dn = datanodes[i_4];
                    dn.Shutdown();
                    // force it to appear dead so live count decreases
                    DatanodeDescriptor dnDesc = dnm.GetDatanode(dn.GetDatanodeId());
                    DFSTestUtil.SetDatanodeDead(dnDesc);
                    BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager());
                    NUnit.Framework.Assert.AreEqual(nodes - 1 - i_4, namesystem.GetNumLiveDataNodes()
                                                    );
                    // first few nodes are already out of service
                    if (i_4 >= fileRepl)
                    {
                        expectedInServiceNodes--;
                    }
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                    // live nodes always report load of 1.  no nodes is load 0
                    double expectedXceiverAvg = (i_4 == nodes - 1) ? 0.0 : 1.0;
                    NUnit.Framework.Assert.AreEqual((double)expectedXceiverAvg, GetInServiceXceiverAverage
                                                        (namesystem), Epsilon);
                }
                // final sanity check
                CheckClusterHealth(0, namesystem, 0.0, 0, 0.0);
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }