public virtual void TestConfigureMinValidVolumes()
        {
            Assume.AssumeTrue(!Runtime.GetProperty("os.name").StartsWith("Windows"));
            // Bring up two additional datanodes that need both of their volumes
            // functioning in order to stay up.
            conf.SetInt(DFSConfigKeys.DfsDatanodeFailedVolumesToleratedKey, 0);
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            // Fail a volume on the 2nd DN
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn2Vol1);
            // Should only get two replicas (the first DN and the 3rd)
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            // Check that this single failure caused a DN to die.
            DFSTestUtil.WaitForDatanodeStatus(dm, 2, 1, 0, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            // If we restore the volume we should still only be able to get
            // two replicas since the DN is still considered dead.
            DataNodeTestUtils.RestoreDataDirFromFailure(dn2Vol1);
            Path file2 = new Path("/test2");

            DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file2, (short)2);
        }
示例#2
0
        /// <summary>
        /// Ensure that the given NameNode marks the specified DataNode as
        /// entirely dead/expired.
        /// </summary>
        /// <param name="nn">the NameNode to manipulate</param>
        /// <param name="dnName">the name of the DataNode</param>
        public static void NoticeDeadDatanode(NameNode nn, string dnName)
        {
            FSNamesystem namesystem = nn.GetNamesystem();

            namesystem.WriteLock();
            try
            {
                DatanodeManager      dnm    = namesystem.GetBlockManager().GetDatanodeManager();
                HeartbeatManager     hbm    = dnm.GetHeartbeatManager();
                DatanodeDescriptor[] dnds   = hbm.GetDatanodes();
                DatanodeDescriptor   theDND = null;
                foreach (DatanodeDescriptor dnd in dnds)
                {
                    if (dnd.GetXferAddr().Equals(dnName))
                    {
                        theDND = dnd;
                    }
                }
                NUnit.Framework.Assert.IsNotNull("Could not find DN with name: " + dnName, theDND
                                                 );
                lock (hbm)
                {
                    DFSTestUtil.SetDatanodeDead(theDND);
                    hbm.HeartbeatCheck();
                }
            }
            finally
            {
                namesystem.WriteUnlock();
            }
        }
示例#3
0
        public virtual void TestBlocksScheduledCounter()
        {
            cluster = new MiniDFSCluster.Builder(new HdfsConfiguration()).Build();
            cluster.WaitActive();
            fs = cluster.GetFileSystem();
            //open a file an write a few bytes:
            FSDataOutputStream @out = fs.Create(new Path("/testBlockScheduledCounter"));

            for (int i = 0; i < 1024; i++)
            {
                @out.Write(i);
            }
            // flush to make sure a block is allocated.
            @out.Hflush();
            AList <DatanodeDescriptor> dnList = new AList <DatanodeDescriptor>();
            DatanodeManager            dm     = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                                    ();

            dm.FetchDatanodes(dnList, dnList, false);
            DatanodeDescriptor dn = dnList[0];

            NUnit.Framework.Assert.AreEqual(1, dn.GetBlocksScheduled());
            // close the file and the counter should go to zero.
            @out.Close();
            NUnit.Framework.Assert.AreEqual(0, dn.GetBlocksScheduled());
        }
示例#4
0
        /// <summary>
        /// Checks NameNode tracking of a particular DataNode for correct reporting of
        /// failed volumes.
        /// </summary>
        /// <param name="dm">DatanodeManager to check</param>
        /// <param name="dn">DataNode to check</param>
        /// <param name="expectCapacityKnown">
        /// if true, then expect that the capacities of the
        /// volumes were known before the failures, and therefore the lost capacity
        /// can be reported
        /// </param>
        /// <param name="expectedFailedVolumes">expected locations of failed volumes</param>
        /// <exception cref="System.Exception">if there is any failure</exception>
        private void CheckFailuresAtNameNode(DatanodeManager dm, DataNode dn, bool expectCapacityKnown
                                             , params string[] expectedFailedVolumes)
        {
            DatanodeDescriptor dd = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                        ().GetDatanode(dn.GetDatanodeId());

            NUnit.Framework.Assert.AreEqual(expectedFailedVolumes.Length, dd.GetVolumeFailures
                                                ());
            VolumeFailureSummary volumeFailureSummary = dd.GetVolumeFailureSummary();

            if (expectedFailedVolumes.Length > 0)
            {
                Assert.AssertArrayEquals(expectedFailedVolumes, volumeFailureSummary.GetFailedStorageLocations
                                             ());
                NUnit.Framework.Assert.IsTrue(volumeFailureSummary.GetLastVolumeFailureDate() > 0
                                              );
                long expectedCapacityLost = GetExpectedCapacityLost(expectCapacityKnown, expectedFailedVolumes
                                                                    .Length);
                NUnit.Framework.Assert.AreEqual(expectedCapacityLost, volumeFailureSummary.GetEstimatedCapacityLostTotal
                                                    ());
            }
            else
            {
                NUnit.Framework.Assert.IsNull(volumeFailureSummary);
            }
        }
示例#5
0
        /// <summary>Verify the support for decommissioning a datanode that is already dead.</summary>
        /// <remarks>
        /// Verify the support for decommissioning a datanode that is already dead.
        /// Under this scenario the datanode should immediately be marked as
        /// DECOMMISSIONED
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestDecommissionDeadDN()
        {
            Logger log = Logger.GetLogger(typeof(DecommissionManager));

            log.SetLevel(Level.Debug);
            DatanodeID dnID   = cluster.GetDataNodes()[0].GetDatanodeId();
            string     dnName = dnID.GetXferAddr();

            MiniDFSCluster.DataNodeProperties stoppedDN = cluster.StopDataNode(0);
            DFSTestUtil.WaitForDatanodeState(cluster, dnID.GetDatanodeUuid(), false, 30000);
            FSNamesystem       fsn          = cluster.GetNamesystem();
            DatanodeManager    dm           = fsn.GetBlockManager().GetDatanodeManager();
            DatanodeDescriptor dnDescriptor = dm.GetDatanode(dnID);

            DecommissionNode(fsn, localFileSys, dnName);
            dm.RefreshNodes(conf);
            BlockManagerTestUtil.RecheckDecommissionState(dm);
            NUnit.Framework.Assert.IsTrue(dnDescriptor.IsDecommissioned());
            // Add the node back
            cluster.RestartDataNode(stoppedDN, true);
            cluster.WaitActive();
            // Call refreshNodes on FSNamesystem with empty exclude file to remove the
            // datanode from decommissioning list and make it available again.
            WriteConfigFile(localFileSys, excludeFile, null);
            dm.RefreshNodes(conf);
        }
示例#6
0
        //The number of times the registration / removal of nodes should happen
        /// <exception cref="System.IO.IOException"/>
        private static DatanodeManager MockDatanodeManager(FSNamesystem fsn, Configuration
                                                           conf)
        {
            BlockManager    bm = Org.Mockito.Mockito.Mock <BlockManager>();
            DatanodeManager dm = new DatanodeManager(bm, fsn, conf);

            return(dm);
        }
        public virtual void TestIncludeExcludeLists()
        {
            BlockManager    bm   = Org.Mockito.Mockito.Mock <BlockManager>();
            FSNamesystem    fsn  = Org.Mockito.Mockito.Mock <FSNamesystem>();
            Configuration   conf = new Configuration();
            HostFileManager hm   = new HostFileManager();

            HostFileManager.HostSet includedNodes = new HostFileManager.HostSet();
            HostFileManager.HostSet excludedNodes = new HostFileManager.HostSet();
            includedNodes.Add(Entry("127.0.0.1:12345"));
            includedNodes.Add(Entry("localhost:12345"));
            includedNodes.Add(Entry("127.0.0.1:12345"));
            includedNodes.Add(Entry("127.0.0.2"));
            excludedNodes.Add(Entry("127.0.0.1:12346"));
            excludedNodes.Add(Entry("127.0.30.1:12346"));
            NUnit.Framework.Assert.AreEqual(2, includedNodes.Size());
            NUnit.Framework.Assert.AreEqual(2, excludedNodes.Size());
            hm.Refresh(includedNodes, excludedNodes);
            DatanodeManager dm = new DatanodeManager(bm, fsn, conf);

            Whitebox.SetInternalState(dm, "hostFileManager", hm);
            IDictionary <string, DatanodeDescriptor> dnMap = (IDictionary <string, DatanodeDescriptor
                                                                           >)Whitebox.GetInternalState(dm, "datanodeMap");

            // After the de-duplication, there should be only one DN from the included
            // nodes declared as dead.
            NUnit.Framework.Assert.AreEqual(2, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .All).Count);
            NUnit.Framework.Assert.AreEqual(2, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            dnMap["uuid-foo"] = new DatanodeDescriptor(new DatanodeID("127.0.0.1", "localhost"
                                                                      , "uuid-foo", 12345, 1020, 1021, 1022));
            NUnit.Framework.Assert.AreEqual(1, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            dnMap["uuid-bar"] = new DatanodeDescriptor(new DatanodeID("127.0.0.2", "127.0.0.2"
                                                                      , "uuid-bar", 12345, 1020, 1021, 1022));
            NUnit.Framework.Assert.AreEqual(0, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            DatanodeDescriptor spam = new DatanodeDescriptor(new DatanodeID("127.0.0" + ".3",
                                                                            "127.0.0.3", "uuid-spam", 12345, 1020, 1021, 1022));

            DFSTestUtil.SetDatanodeDead(spam);
            includedNodes.Add(Entry("127.0.0.3:12345"));
            dnMap["uuid-spam"] = spam;
            NUnit.Framework.Assert.AreEqual(1, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            Sharpen.Collections.Remove(dnMap, "uuid-spam");
            NUnit.Framework.Assert.AreEqual(1, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
            excludedNodes.Add(Entry("127.0.0.3"));
            NUnit.Framework.Assert.AreEqual(0, dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                           .Dead).Count);
        }
示例#8
0
        public virtual void TestMultipleVolFailuresOnNode()
        {
            // Reinitialize the cluster, configured with 4 storage locations per DataNode
            // and tolerating up to 2 failures.
            TearDown();
            InitCluster(3, 4, 2);
            // Calculate the total capacity of all the datanodes. Sleep for three seconds
            // to be sure the datanodes have had a chance to heartbeat their capacities.
            Sharpen.Thread.Sleep(WaitForHeartbeats);
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long     origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long     dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            FilePath dn1Vol1      = new FilePath(dataDir, "data" + (4 * 0 + 1));
            FilePath dn1Vol2      = new FilePath(dataDir, "data" + (4 * 0 + 2));
            FilePath dn2Vol1      = new FilePath(dataDir, "data" + (4 * 1 + 1));
            FilePath dn2Vol2      = new FilePath(dataDir, "data" + (4 * 1 + 2));

            // Make the first two volume directories on the first two datanodes
            // non-accessible.
            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn1Vol2, dn2Vol1, dn2Vol2);
            // Create file1 and wait for 3 replicas (ie all DNs can still store a block).
            // Then assert that all DNs are up, despite the volume failures.
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)3);
            AList <DataNode> dns = cluster.GetDataNodes();

            NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp());
            CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath(), dn1Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath(), dn2Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtDataNode(dns[2], 0, true);
            // Ensure we wait a sufficient amount of time
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // Eventually the NN should report four volume failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 4, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 4);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath(), dn1Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath(), dn2Vol2.GetAbsolutePath
                                        ());
            CheckFailuresAtNameNode(dm, dns[2], true);
        }
示例#9
0
        public virtual void TestReplDueToNodeFailRespectsRackPolicy()
        {
            Configuration conf = GetConf();
            short         ReplicationFactor = 3;
            Path          filePath          = new Path("/testFile");

            // Last datanode is on a different rack
            string[]       racks   = new string[] { "/rack1", "/rack1", "/rack1", "/rack2", "/rack2" };
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length
                                                                                   ).Racks(racks).Build();
            FSNamesystem    ns = cluster.GetNameNode().GetNamesystem();
            DatanodeManager dm = ns.GetBlockManager().GetDatanodeManager();

            try
            {
                // Create a file with one block with a replication factor of 2
                FileSystem fs = cluster.GetFileSystem();
                DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L);
                ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath);
                DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0);
                // Make the last datanode look like it failed to heartbeat by
                // calling removeDatanode and stopping it.
                AList <DataNode> datanodes = cluster.GetDataNodes();
                int        idx             = datanodes.Count - 1;
                DataNode   dataNode        = datanodes[idx];
                DatanodeID dnId            = dataNode.GetDatanodeId();
                cluster.StopDataNode(idx);
                dm.RemoveDatanode(dnId);
                // The block should still have sufficient # replicas, across racks.
                // The last node may not have contained a replica, but if it did
                // it should have been replicated within the same rack.
                DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0);
                // Fail the last datanode again, it's also on rack2 so there is
                // only 1 rack for all the replicas
                datanodes = cluster.GetDataNodes();
                idx       = datanodes.Count - 1;
                dataNode  = datanodes[idx];
                dnId      = dataNode.GetDatanodeId();
                cluster.StopDataNode(idx);
                dm.RemoveDatanode(dnId);
                // Make sure we have enough live replicas even though we are
                // short one rack and therefore need one replica
                DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 1);
            }
            finally
            {
                cluster.Shutdown();
            }
        }
示例#10
0
        public virtual void TestReduceReplFactorDueToRejoinRespectsRackPolicy()
        {
            Configuration conf = GetConf();
            short         ReplicationFactor = 2;
            Path          filePath          = new Path("/testFile");

            // Last datanode is on a different rack
            string[]       racks   = new string[] { "/rack1", "/rack1", "/rack2" };
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(racks.Length
                                                                                   ).Racks(racks).Build();
            FSNamesystem    ns = cluster.GetNameNode().GetNamesystem();
            DatanodeManager dm = ns.GetBlockManager().GetDatanodeManager();

            try
            {
                // Create a file with one block
                FileSystem fs = cluster.GetFileSystem();
                DFSTestUtil.CreateFile(fs, filePath, 1L, ReplicationFactor, 1L);
                ExtendedBlock b = DFSTestUtil.GetFirstBlock(fs, filePath);
                DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0);
                // Make the last (cross rack) datanode look like it failed
                // to heartbeat by stopping it and calling removeDatanode.
                AList <DataNode> datanodes = cluster.GetDataNodes();
                NUnit.Framework.Assert.AreEqual(3, datanodes.Count);
                DataNode   dataNode = datanodes[2];
                DatanodeID dnId     = dataNode.GetDatanodeId();
                cluster.StopDataNode(2);
                dm.RemoveDatanode(dnId);
                // The block gets re-replicated to another datanode so it has a
                // sufficient # replicas, but not across racks, so there should
                // be 1 rack, and 1 needed replica (even though there are 2 hosts
                // available and only 2 replicas required).
                DFSTestUtil.WaitForReplication(cluster, b, 1, ReplicationFactor, 1);
                // Start the "failed" datanode, which has a replica so the block is
                // now over-replicated and therefore a replica should be removed but
                // not on the restarted datanode as that would violate the rack policy.
                string[] rack2 = new string[] { "/rack2" };
                cluster.StartDataNodes(conf, 1, true, null, rack2);
                cluster.WaitActive();
                // The block now has sufficient # replicas, across racks
                DFSTestUtil.WaitForReplication(cluster, b, 2, ReplicationFactor, 0);
            }
            finally
            {
                cluster.Shutdown();
            }
        }
        public virtual void Setup()
        {
            conf = new HdfsConfiguration();
            SimulatedFSDataset.SetFactory(conf);
            Configuration[] overlays = new Configuration[NumDatanodes];
            for (int i = 0; i < overlays.Length; i++)
            {
                overlays[i] = new Configuration();
                if (i == RoNodeIndex)
                {
                    overlays[i].SetEnum(SimulatedFSDataset.ConfigPropertyState, i == RoNodeIndex ? DatanodeStorage.State
                                        .ReadOnlyShared : DatanodeStorage.State.Normal);
                }
            }
            cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(NumDatanodes).DataNodeConfOverlays
                          (overlays).Build();
            fs              = cluster.GetFileSystem();
            blockManager    = cluster.GetNameNode().GetNamesystem().GetBlockManager();
            datanodeManager = blockManager.GetDatanodeManager();
            client          = new DFSClient(new IPEndPoint("localhost", cluster.GetNameNodePort()), cluster
                                            .GetConfiguration(0));
            for (int i_1 = 0; i_1 < NumDatanodes; i_1++)
            {
                DataNode dataNode = cluster.GetDataNodes()[i_1];
                ValidateStorageState(BlockManagerTestUtil.GetStorageReportsForDatanode(datanodeManager
                                                                                       .GetDatanode(dataNode.GetDatanodeId())), i_1 == RoNodeIndex ? DatanodeStorage.State
                                     .ReadOnlyShared : DatanodeStorage.State.Normal);
            }
            // Create a 1 block file
            DFSTestUtil.CreateFile(fs, Path, BlockSize, BlockSize, BlockSize, (short)1, seed);
            LocatedBlock locatedBlock = GetLocatedBlock();

            extendedBlock = locatedBlock.GetBlock();
            block         = extendedBlock.GetLocalBlock();
            Assert.AssertThat(locatedBlock.GetLocations().Length, CoreMatchers.Is(1));
            normalDataNode   = locatedBlock.GetLocations()[0];
            readOnlyDataNode = datanodeManager.GetDatanode(cluster.GetDataNodes()[RoNodeIndex
                                                           ].GetDatanodeId());
            Assert.AssertThat(normalDataNode, CoreMatchers.Is(CoreMatchers.Not(readOnlyDataNode
                                                                               )));
            ValidateNumberReplicas(1);
            // Inject the block into the datanode with READ_ONLY_SHARED storage
            cluster.InjectBlocks(0, RoNodeIndex, Collections.Singleton(block));
            // There should now be 2 *locations* for the block
            // Must wait until the NameNode has processed the block report for the injected blocks
            WaitForLocations(2);
        }
示例#12
0
        public virtual void TestDNSLookups()
        {
            TestDatanodeRegistration.MonitorDNS sm = new TestDatanodeRegistration.MonitorDNS(
                );
            Runtime.SetSecurityManager(sm);
            MiniDFSCluster cluster = null;

            try
            {
                HdfsConfiguration conf = new HdfsConfiguration();
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(8).Build();
                cluster.WaitActive();
                int initialLookups = sm.lookups;
                NUnit.Framework.Assert.IsTrue("dns security manager is active", initialLookups !=
                                              0);
                DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                         ();
                // make sure no lookups occur
                dm.RefreshNodes(conf);
                NUnit.Framework.Assert.AreEqual(initialLookups, sm.lookups);
                dm.RefreshNodes(conf);
                NUnit.Framework.Assert.AreEqual(initialLookups, sm.lookups);
                // ensure none of the reports trigger lookups
                dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType.All);
                NUnit.Framework.Assert.AreEqual(initialLookups, sm.lookups);
                dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType.Live);
                NUnit.Framework.Assert.AreEqual(initialLookups, sm.lookups);
                dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType.Dead);
                NUnit.Framework.Assert.AreEqual(initialLookups, sm.lookups);
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
                Runtime.SetSecurityManager(null);
            }
        }
示例#13
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestRejectUnresolvedDatanodes()
        {
            //Create the DatanodeManager which will be tested
            FSNamesystem fsn = Org.Mockito.Mockito.Mock <FSNamesystem>();

            Org.Mockito.Mockito.When(fsn.HasWriteLock()).ThenReturn(true);
            Configuration conf = new Configuration();

            //Set configuration property for rejecting unresolved topology mapping
            conf.SetBoolean(DFSConfigKeys.DfsRejectUnresolvedDnTopologyMappingKey, true);
            //set TestDatanodeManager.MyResolver to be used for topology resolving
            conf.SetClass(CommonConfigurationKeysPublic.NetTopologyNodeSwitchMappingImplKey,
                          typeof(TestDatanodeManager.MyResolver), typeof(DNSToSwitchMapping));
            //create DatanodeManager
            DatanodeManager dm = new DatanodeManager(Org.Mockito.Mockito.Mock <BlockManager>()
                                                     , fsn, conf);
            //storageID to register.
            string storageID        = "someStorageID-123";
            DatanodeRegistration dr = Org.Mockito.Mockito.Mock <DatanodeRegistration>();

            Org.Mockito.Mockito.When(dr.GetDatanodeUuid()).ThenReturn(storageID);
            try
            {
                //Register this node
                dm.RegisterDatanode(dr);
                NUnit.Framework.Assert.Fail("Expected an UnresolvedTopologyException");
            }
            catch (UnresolvedTopologyException)
            {
                Log.Info("Expected - topology is not resolved and " + "registration is rejected."
                         );
            }
            catch (Exception)
            {
                NUnit.Framework.Assert.Fail("Expected an UnresolvedTopologyException");
            }
        }
示例#14
0
        public static void SetupCluster()
        {
            Configuration conf = new HdfsConfiguration();

            string[] racks = new string[] { "/rack1", "/rack1", "/rack1", "/rack2", "/rack2",
                                            "/rack2" };
            storages  = DFSTestUtil.CreateDatanodeStorageInfos(racks);
            dataNodes = DFSTestUtil.ToDatanodeDescriptor(storages);
            FileSystem.SetDefaultUri(conf, "hdfs://localhost:0");
            conf.Set(DFSConfigKeys.DfsNamenodeHttpAddressKey, "0.0.0.0:0");
            FilePath baseDir = PathUtils.GetTestDir(typeof(TestReplicationPolicy));

            conf.Set(DFSConfigKeys.DfsNamenodeNameDirKey, new FilePath(baseDir, "name").GetPath
                         ());
            conf.SetBoolean(DFSConfigKeys.DfsNamenodeAvoidStaleDatanodeForReadKey, true);
            conf.SetBoolean(DFSConfigKeys.DfsNamenodeAvoidStaleDatanodeForWriteKey, true);
            conf.SetBoolean(DFSConfigKeys.DfsNamenodeReplicationConsiderloadKey, true);
            DFSTestUtil.FormatNameNode(conf);
            namenode = new NameNode(conf);
            int blockSize = 1024;

            dnrList   = new AList <DatanodeRegistration>();
            dnManager = namenode.GetNamesystem().GetBlockManager().GetDatanodeManager();
            // Register DNs
            for (int i = 0; i < 6; i++)
            {
                DatanodeRegistration dnr = new DatanodeRegistration(dataNodes[i], new StorageInfo
                                                                        (HdfsServerConstants.NodeType.DataNode), new ExportedBlockKeys(), VersionInfo.GetVersion
                                                                        ());
                dnrList.AddItem(dnr);
                dnManager.RegisterDatanode(dnr);
                dataNodes[i].GetStorageInfos()[0].SetUtilizationForTesting(2 * HdfsConstants.MinBlocksForWrite
                                                                           * blockSize, 0L, 2 * HdfsConstants.MinBlocksForWrite * blockSize, 0L);
                dataNodes[i].UpdateHeartbeat(BlockManagerTestUtil.GetStorageReportsForDatanode(dataNodes
                                                                                               [i]), 0L, 0L, 0, 0, null);
            }
        }
        public virtual void TestFailedVolumeOnStartupIsCounted()
        {
            Assume.AssumeTrue(!Runtime.GetProperty("os.name").StartsWith("Windows"));
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long     origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            FilePath dir          = new FilePath(cluster.GetInstanceStorageDir(0, 0), "current");

            try
            {
                PrepareDirToFail(dir);
                RestartDatanodes(1, false);
                // The cluster is up..
                NUnit.Framework.Assert.AreEqual(true, cluster.GetDataNodes()[0].IsBPServiceAlive(
                                                    cluster.GetNamesystem().GetBlockPoolId()));
                // but there has been a single volume failure
                DFSTestUtil.WaitForDatanodeStatus(dm, 1, 0, 1, origCapacity / 2, WaitForHeartbeats
                                                  );
            }
            finally
            {
                FileUtil.Chmod(dir.ToString(), "755");
            }
        }
示例#16
0
        public virtual void TestVolFailureStatsPreservedOnNNRestart()
        {
            // Bring up two more datanodes that can tolerate 1 failure
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            // Fail the first volume on both datanodes (we have to keep the
            // third healthy so one node in the pipeline will not fail).
            FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));

            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1);
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            AList <DataNode> dns = cluster.GetDataNodes();

            // The NN reports two volumes failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            // After restarting the NN it still see the two failures
            cluster.RestartNameNode(0);
            cluster.WaitActive();
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
        }
示例#17
0
 /// <summary>Have DatanodeManager check decommission state.</summary>
 /// <param name="dm">the DatanodeManager to manipulate</param>
 /// <exception cref="Sharpen.ExecutionException"/>
 /// <exception cref="System.Exception"/>
 public static void RecheckDecommissionState(DatanodeManager dm)
 {
     dm.GetDecomManager().RunMonitor();
 }
示例#18
0
        public virtual void TestXceiverCount()
        {
            Configuration conf = new HdfsConfiguration();

            // retry one time, if close fails
            conf.SetInt(DFSConfigKeys.DfsClientBlockWriteLocatefollowingblockRetriesKey, 1);
            MiniDFSCluster cluster   = null;
            int            nodes     = 8;
            int            fileCount = 5;
            short          fileRepl  = 3;

            try
            {
                cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(nodes).Build();
                cluster.WaitActive();
                FSNamesystem          namesystem = cluster.GetNamesystem();
                DatanodeManager       dnm        = namesystem.GetBlockManager().GetDatanodeManager();
                IList <DataNode>      datanodes  = cluster.GetDataNodes();
                DistributedFileSystem fs         = cluster.GetFileSystem();
                // trigger heartbeats in case not already sent
                TriggerHeartbeats(datanodes);
                // check that all nodes are live and in service
                int expectedTotalLoad = nodes;
                // xceiver server adds 1 to load
                int expectedInServiceNodes = nodes;
                int expectedInServiceLoad  = nodes;
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // shutdown half the nodes and force a heartbeat check to ensure
                // counts are accurate
                for (int i = 0; i < nodes / 2; i++)
                {
                    DataNode           dn  = datanodes[i];
                    DatanodeDescriptor dnd = dnm.GetDatanode(dn.GetDatanodeId());
                    dn.Shutdown();
                    DFSTestUtil.SetDatanodeDead(dnd);
                    BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager());
                    //Verify decommission of dead node won't impact nodesInService metrics.
                    dnm.GetDecomManager().StartDecommission(dnd);
                    expectedInServiceNodes--;
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, namesystem.GetNumLiveDataNodes
                                                        ());
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                    //Verify recommission of dead node won't impact nodesInService metrics.
                    dnm.GetDecomManager().StopDecommission(dnd);
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                }
                // restart the nodes to verify that counts are correct after
                // node re-registration
                cluster.RestartDataNodes();
                cluster.WaitActive();
                datanodes = cluster.GetDataNodes();
                expectedInServiceNodes = nodes;
                NUnit.Framework.Assert.AreEqual(nodes, datanodes.Count);
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // create streams and hsync to force datastreamers to start
                DFSOutputStream[] streams = new DFSOutputStream[fileCount];
                for (int i_1 = 0; i_1 < fileCount; i_1++)
                {
                    streams[i_1] = (DFSOutputStream)fs.Create(new Path("/f" + i_1), fileRepl).GetWrappedStream
                                       ();
                    streams[i_1].Write(Sharpen.Runtime.GetBytesForString("1"));
                    streams[i_1].Hsync();
                    // the load for writers is 2 because both the write xceiver & packet
                    // responder threads are counted in the load
                    expectedTotalLoad     += 2 * fileRepl;
                    expectedInServiceLoad += 2 * fileRepl;
                }
                // force nodes to send load update
                TriggerHeartbeats(datanodes);
                CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                   expectedInServiceLoad);
                // decomm a few nodes, substract their load from the expected load,
                // trigger heartbeat to force load update
                for (int i_2 = 0; i_2 < fileRepl; i_2++)
                {
                    expectedInServiceNodes--;
                    DatanodeDescriptor dnd = dnm.GetDatanode(datanodes[i_2].GetDatanodeId());
                    expectedInServiceLoad -= dnd.GetXceiverCount();
                    dnm.GetDecomManager().StartDecommission(dnd);
                    DataNodeTestUtils.TriggerHeartbeat(datanodes[i_2]);
                    Sharpen.Thread.Sleep(100);
                    CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                       expectedInServiceLoad);
                }
                // check expected load while closing each stream.  recalc expected
                // load based on whether the nodes in the pipeline are decomm
                for (int i_3 = 0; i_3 < fileCount; i_3++)
                {
                    int decomm = 0;
                    foreach (DatanodeInfo dni in streams[i_3].GetPipeline())
                    {
                        DatanodeDescriptor dnd = dnm.GetDatanode(dni);
                        expectedTotalLoad -= 2;
                        if (dnd.IsDecommissionInProgress() || dnd.IsDecommissioned())
                        {
                            decomm++;
                        }
                        else
                        {
                            expectedInServiceLoad -= 2;
                        }
                    }
                    try
                    {
                        streams[i_3].Close();
                    }
                    catch (IOException ioe)
                    {
                        // nodes will go decommissioned even if there's a UC block whose
                        // other locations are decommissioned too.  we'll ignore that
                        // bug for now
                        if (decomm < fileRepl)
                        {
                            throw;
                        }
                    }
                    TriggerHeartbeats(datanodes);
                    // verify node count and loads
                    CheckClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes,
                                       expectedInServiceLoad);
                }
                // shutdown each node, verify node counts based on decomm state
                for (int i_4 = 0; i_4 < nodes; i_4++)
                {
                    DataNode dn = datanodes[i_4];
                    dn.Shutdown();
                    // force it to appear dead so live count decreases
                    DatanodeDescriptor dnDesc = dnm.GetDatanode(dn.GetDatanodeId());
                    DFSTestUtil.SetDatanodeDead(dnDesc);
                    BlockManagerTestUtil.CheckHeartbeat(namesystem.GetBlockManager());
                    NUnit.Framework.Assert.AreEqual(nodes - 1 - i_4, namesystem.GetNumLiveDataNodes()
                                                    );
                    // first few nodes are already out of service
                    if (i_4 >= fileRepl)
                    {
                        expectedInServiceNodes--;
                    }
                    NUnit.Framework.Assert.AreEqual(expectedInServiceNodes, GetNumDNInService(namesystem
                                                                                              ));
                    // live nodes always report load of 1.  no nodes is load 0
                    double expectedXceiverAvg = (i_4 == nodes - 1) ? 0.0 : 1.0;
                    NUnit.Framework.Assert.AreEqual((double)expectedXceiverAvg, GetInServiceXceiverAverage
                                                        (namesystem), Epsilon);
                }
                // final sanity check
                CheckClusterHealth(0, namesystem, 0.0, 0, 0.0);
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
示例#19
0
        public virtual void TestSuccessiveVolumeFailures()
        {
            // Bring up two more datanodes
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();

            /*
             * Calculate the total capacity of all the datanodes. Sleep for
             * three seconds to be sure the datanodes have had a chance to
             * heartbeat their capacities.
             */
            Sharpen.Thread.Sleep(WaitForHeartbeats);
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long     origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long     dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            FilePath dn1Vol1      = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn2Vol1      = new FilePath(dataDir, "data" + (2 * 1 + 1));
            FilePath dn3Vol1      = new FilePath(dataDir, "data" + (2 * 2 + 1));
            FilePath dn3Vol2      = new FilePath(dataDir, "data" + (2 * 2 + 2));

            /*
             * Make the 1st volume directories on the first two datanodes
             * non-accessible.  We don't make all three 1st volume directories
             * readonly since that would cause the entire pipeline to
             * fail. The client does not retry failed nodes even though
             * perhaps they could succeed because just a single volume failed.
             */
            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1, dn2Vol1);

            /*
             * Create file1 and wait for 3 replicas (ie all DNs can still
             * store a block).  Then assert that all DNs are up, despite the
             * volume failures.
             */
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)3);
            AList <DataNode> dns = cluster.GetDataNodes();

            NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp());

            /*
             * The metrics should confirm the volume failures.
             */
            CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[2], 0, true);
            // Ensure we wait a sufficient amount of time
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // Eventually the NN should report two volume failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[2], true);

            /*
             * Now fail a volume on the third datanode. We should be able to get
             * three replicas since we've already identified the other failures.
             */
            DataNodeTestUtils.InjectDataDirFailure(dn3Vol1);
            Path file2 = new Path("/test2");

            DFSTestUtil.CreateFile(fs, file2, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file2, (short)3);
            NUnit.Framework.Assert.IsTrue("DN3 should still be up", dns[2].IsDatanodeUp());
            CheckFailuresAtDataNode(dns[2], 1, true, dn3Vol1.GetAbsolutePath());
            DataNodeTestUtils.TriggerHeartbeat(dns[2]);
            CheckFailuresAtNameNode(dm, dns[2], true, dn3Vol1.GetAbsolutePath());

            /*
             * Once the datanodes have a chance to heartbeat their new capacity the
             * total capacity should be down by three volumes (assuming the host
             * did not grow or shrink the data volume while the test was running).
             */
            dnCapacity = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 3, origCapacity - (3 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 3);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[2], true, dn3Vol1.GetAbsolutePath());

            /*
             * Now fail the 2nd volume on the 3rd datanode. All its volumes
             * are now failed and so it should report two volume failures
             * and that it's no longer up. Only wait for two replicas since
             * we'll never get a third.
             */
            DataNodeTestUtils.InjectDataDirFailure(dn3Vol2);
            Path file3 = new Path("/test3");

            DFSTestUtil.CreateFile(fs, file3, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file3, (short)2);
            // The DN should consider itself dead
            DFSTestUtil.WaitForDatanodeDeath(dns[2]);
            // And report two failed volumes
            CheckFailuresAtDataNode(dns[2], 2, true, dn3Vol1.GetAbsolutePath(), dn3Vol2.GetAbsolutePath
                                        ());
            // The NN considers the DN dead
            DFSTestUtil.WaitForDatanodeStatus(dm, 2, 1, 2, origCapacity - (4 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());

            /*
             * The datanode never tries to restore the failed volume, even if
             * it's subsequently repaired, but it should see this volume on
             * restart, so file creation should be able to succeed after
             * restoring the data directories and restarting the datanodes.
             */
            DataNodeTestUtils.RestoreDataDirFromFailure(dn1Vol1, dn2Vol1, dn3Vol1, dn3Vol2);
            cluster.RestartDataNodes();
            cluster.WaitActive();
            Path file4 = new Path("/test4");

            DFSTestUtil.CreateFile(fs, file4, 1024, (short)3, 1L);
            DFSTestUtil.WaitReplication(fs, file4, (short)3);

            /*
             * Eventually the capacity should be restored to its original value,
             * and that the volume failure count should be reported as zero by
             * both the metrics and the NN.
             */
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WaitForHeartbeats);
            CheckAggregateFailuresAtNameNode(true, 0);
            dns = cluster.GetDataNodes();
            CheckFailuresAtNameNode(dm, dns[0], true);
            CheckFailuresAtNameNode(dm, dns[1], true);
            CheckFailuresAtNameNode(dm, dns[2], true);
        }
示例#20
0
        public virtual void TestVolumeSize()
        {
            Configuration  conf    = new HdfsConfiguration();
            MiniDFSCluster cluster = null;
            // Set aside fifth of the total capacity as reserved
            long reserved = 10000;

            conf.SetLong(DFSConfigKeys.DfsDatanodeDuReservedKey, reserved);
            try
            {
                cluster = new MiniDFSCluster.Builder(conf).Build();
                cluster.WaitActive();
                FSNamesystem    namesystem = cluster.GetNamesystem();
                DatanodeManager dm         = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                                 ();
                // Ensure the data reported for each data node is right
                IList <DatanodeDescriptor> live = new AList <DatanodeDescriptor>();
                IList <DatanodeDescriptor> dead = new AList <DatanodeDescriptor>();
                dm.FetchDatanodes(live, dead, false);
                NUnit.Framework.Assert.IsTrue(live.Count == 1);
                long  used;
                long  remaining;
                long  configCapacity;
                long  nonDFSUsed;
                long  bpUsed;
                float percentUsed;
                float percentRemaining;
                float percentBpUsed;
                foreach (DatanodeDescriptor datanode in live)
                {
                    used             = datanode.GetDfsUsed();
                    remaining        = datanode.GetRemaining();
                    nonDFSUsed       = datanode.GetNonDfsUsed();
                    configCapacity   = datanode.GetCapacity();
                    percentUsed      = datanode.GetDfsUsedPercent();
                    percentRemaining = datanode.GetRemainingPercent();
                    bpUsed           = datanode.GetBlockPoolUsed();
                    percentBpUsed    = datanode.GetBlockPoolUsedPercent();
                    Log.Info("Datanode configCapacity " + configCapacity + " used " + used + " non DFS used "
                             + nonDFSUsed + " remaining " + remaining + " perentUsed " + percentUsed + " percentRemaining "
                             + percentRemaining);
                    NUnit.Framework.Assert.IsTrue(configCapacity == (used + remaining + nonDFSUsed));
                    NUnit.Framework.Assert.IsTrue(percentUsed == DFSUtil.GetPercentUsed(used, configCapacity
                                                                                        ));
                    NUnit.Framework.Assert.IsTrue(percentRemaining == DFSUtil.GetPercentRemaining(remaining
                                                                                                  , configCapacity));
                    NUnit.Framework.Assert.IsTrue(percentBpUsed == DFSUtil.GetPercentUsed(bpUsed, configCapacity
                                                                                          ));
                }
                DF df = new DF(new FilePath(cluster.GetDataDirectory()), conf);
                //
                // Currently two data directories are created by the data node
                // in the MiniDFSCluster. This results in each data directory having
                // capacity equals to the disk capacity of the data directory.
                // Hence the capacity reported by the data node is twice the disk space
                // the disk capacity
                //
                // So multiply the disk capacity and reserved space by two
                // for accommodating it
                //
                int  numOfDataDirs = 2;
                long diskCapacity  = numOfDataDirs * df.GetCapacity();
                reserved        *= numOfDataDirs;
                configCapacity   = namesystem.GetCapacityTotal();
                used             = namesystem.GetCapacityUsed();
                nonDFSUsed       = namesystem.GetNonDfsUsedSpace();
                remaining        = namesystem.GetCapacityRemaining();
                percentUsed      = namesystem.GetPercentUsed();
                percentRemaining = namesystem.GetPercentRemaining();
                bpUsed           = namesystem.GetBlockPoolUsedSpace();
                percentBpUsed    = namesystem.GetPercentBlockPoolUsed();
                Log.Info("Data node directory " + cluster.GetDataDirectory());
                Log.Info("Name node diskCapacity " + diskCapacity + " configCapacity " + configCapacity
                         + " reserved " + reserved + " used " + used + " remaining " + remaining + " nonDFSUsed "
                         + nonDFSUsed + " remaining " + remaining + " percentUsed " + percentUsed + " percentRemaining "
                         + percentRemaining + " bpUsed " + bpUsed + " percentBpUsed " + percentBpUsed);
                // Ensure new total capacity reported excludes the reserved space
                NUnit.Framework.Assert.IsTrue(configCapacity == diskCapacity - reserved);
                // Ensure new total capacity reported excludes the reserved space
                NUnit.Framework.Assert.IsTrue(configCapacity == (used + remaining + nonDFSUsed));
                // Ensure percent used is calculated based on used and present capacity
                NUnit.Framework.Assert.IsTrue(percentUsed == DFSUtil.GetPercentUsed(used, configCapacity
                                                                                    ));
                // Ensure percent used is calculated based on used and present capacity
                NUnit.Framework.Assert.IsTrue(percentBpUsed == DFSUtil.GetPercentUsed(bpUsed, configCapacity
                                                                                      ));
                // Ensure percent used is calculated based on used and present capacity
                NUnit.Framework.Assert.IsTrue(percentRemaining == ((float)remaining * 100.0f) / (
                                                  float)configCapacity);
            }
            finally
            {
                if (cluster != null)
                {
                    cluster.Shutdown();
                }
            }
        }
示例#21
0
        public virtual void TestDataNodeReconfigureWithVolumeFailures()
        {
            // Bring up two more datanodes
            cluster.StartDataNodes(conf, 2, true, null, null);
            cluster.WaitActive();
            DatanodeManager dm = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                     ();
            long origCapacity = DFSTestUtil.GetLiveDatanodeCapacity(dm);
            long dnCapacity   = DFSTestUtil.GetDatanodeCapacity(dm, 0);
            // Fail the first volume on both datanodes (we have to keep the
            // third healthy so one node in the pipeline will not fail).
            FilePath dn1Vol1 = new FilePath(dataDir, "data" + (2 * 0 + 1));
            FilePath dn1Vol2 = new FilePath(dataDir, "data" + (2 * 0 + 2));
            FilePath dn2Vol1 = new FilePath(dataDir, "data" + (2 * 1 + 1));
            FilePath dn2Vol2 = new FilePath(dataDir, "data" + (2 * 1 + 2));

            DataNodeTestUtils.InjectDataDirFailure(dn1Vol1);
            DataNodeTestUtils.InjectDataDirFailure(dn2Vol1);
            Path file1 = new Path("/test1");

            DFSTestUtil.CreateFile(fs, file1, 1024, (short)2, 1L);
            DFSTestUtil.WaitReplication(fs, file1, (short)2);
            AList <DataNode> dns = cluster.GetDataNodes();

            NUnit.Framework.Assert.IsTrue("DN1 should be up", dns[0].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN2 should be up", dns[1].IsDatanodeUp());
            NUnit.Framework.Assert.IsTrue("DN3 should be up", dns[2].IsDatanodeUp());
            CheckFailuresAtDataNode(dns[0], 1, true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, true, dn2Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[2], 0, true);
            // Ensure we wait a sufficient amount of time
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // The NN reports two volume failures
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(true, 2);
            CheckFailuresAtNameNode(dm, dns[0], true, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], true, dn2Vol1.GetAbsolutePath());
            // Reconfigure again to try to add back the failed volumes.
            ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2);
            ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2);
            DataNodeTestUtils.TriggerHeartbeat(dns[0]);
            DataNodeTestUtils.TriggerHeartbeat(dns[1]);
            CheckFailuresAtDataNode(dns[0], 1, false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, false, dn2Vol1.GetAbsolutePath());
            // Ensure we wait a sufficient amount of time.
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // The NN reports two volume failures again.
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(false, 2);
            CheckFailuresAtNameNode(dm, dns[0], false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], false, dn2Vol1.GetAbsolutePath());
            // Reconfigure a third time with the failed volumes.  Afterwards, we expect
            // the same volume failures to be reported.  (No double-counting.)
            ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2);
            ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2);
            DataNodeTestUtils.TriggerHeartbeat(dns[0]);
            DataNodeTestUtils.TriggerHeartbeat(dns[1]);
            CheckFailuresAtDataNode(dns[0], 1, false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtDataNode(dns[1], 1, false, dn2Vol1.GetAbsolutePath());
            // Ensure we wait a sufficient amount of time.
            System.Diagnostics.Debug.Assert((WaitForHeartbeats * 10) > WaitForDeath);
            // The NN reports two volume failures again.
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WaitForHeartbeats
                                              );
            CheckAggregateFailuresAtNameNode(false, 2);
            CheckFailuresAtNameNode(dm, dns[0], false, dn1Vol1.GetAbsolutePath());
            CheckFailuresAtNameNode(dm, dns[1], false, dn2Vol1.GetAbsolutePath());
            // Replace failed volume with healthy volume and run reconfigure DataNode.
            // The failed volume information should be cleared.
            DataNodeTestUtils.RestoreDataDirFromFailure(dn1Vol1, dn2Vol1);
            ReconfigureDataNode(dns[0], dn1Vol1, dn1Vol2);
            ReconfigureDataNode(dns[1], dn2Vol1, dn2Vol2);
            DataNodeTestUtils.TriggerHeartbeat(dns[0]);
            DataNodeTestUtils.TriggerHeartbeat(dns[1]);
            CheckFailuresAtDataNode(dns[0], 1, true);
            CheckFailuresAtDataNode(dns[1], 1, true);
            DFSTestUtil.WaitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WaitForHeartbeats);
            CheckAggregateFailuresAtNameNode(true, 0);
            CheckFailuresAtNameNode(dm, dns[0], true);
            CheckFailuresAtNameNode(dm, dns[1], true);
        }
        public virtual void TestExcludeDataNodes()
        {
            Configuration conf = WebHdfsTestUtil.CreateConf();

            string[] racks = new string[] { Rack0, Rack0, Rack1, Rack1, Rack2, Rack2 };
            string[] hosts = new string[] { "DataNode1", "DataNode2", "DataNode3", "DataNode4"
                                            , "DataNode5", "DataNode6" };
            int nDataNodes = hosts.Length;

            Log.Info("nDataNodes=" + nDataNodes + ", racks=" + Arrays.AsList(racks) + ", hosts="
                     + Arrays.AsList(hosts));
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).Hosts(hosts).NumDataNodes
                                         (nDataNodes).Racks(racks).Build();

            try
            {
                cluster.WaitActive();
                DistributedFileSystem dfs = cluster.GetFileSystem();
                NameNode        namenode  = cluster.GetNameNode();
                DatanodeManager dm        = namenode.GetNamesystem().GetBlockManager().GetDatanodeManager
                                                ();
                Log.Info("dm=" + dm);
                long   blocksize = DFSConfigKeys.DfsBlockSizeDefault;
                string f         = "/foo";
                //create a file with three replica.
                Path p = new Path(f);
                FSDataOutputStream @out = dfs.Create(p, (short)3);
                @out.Write(1);
                @out.Close();
                //get replica location.
                LocatedBlocks locatedblocks = NameNodeAdapter.GetBlockLocations(namenode, f, 0, 1
                                                                                );
                IList <LocatedBlock> lb = locatedblocks.GetLocatedBlocks();
                NUnit.Framework.Assert.AreEqual(1, lb.Count);
                DatanodeInfo[] locations = lb[0].GetLocations();
                NUnit.Framework.Assert.AreEqual(3, locations.Length);
                //For GETFILECHECKSUM, OPEN and APPEND,
                //the chosen datanode must be different with exclude nodes.
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < 2; i++)
                {
                    sb.Append(locations[i].GetXferAddr());
                    {
                        // test GETFILECHECKSUM
                        DatanodeInfo chosen = NamenodeWebHdfsMethods.ChooseDatanode(namenode, f, GetOpParam.OP
                                                                                    .Getfilechecksum, -1L, blocksize, sb.ToString());
                        for (int j = 0; j <= i; j++)
                        {
                            Assert.AssertNotEquals(locations[j].GetHostName(), chosen.GetHostName());
                        }
                    }
                    {
                        // test OPEN
                        DatanodeInfo chosen = NamenodeWebHdfsMethods.ChooseDatanode(namenode, f, GetOpParam.OP
                                                                                    .Open, 0, blocksize, sb.ToString());
                        for (int j = 0; j <= i; j++)
                        {
                            Assert.AssertNotEquals(locations[j].GetHostName(), chosen.GetHostName());
                        }
                    }
                    {
                        // test APPEND
                        DatanodeInfo chosen = NamenodeWebHdfsMethods.ChooseDatanode(namenode, f, PostOpParam.OP
                                                                                    .Append, -1L, blocksize, sb.ToString());
                        for (int j = 0; j <= i; j++)
                        {
                            Assert.AssertNotEquals(locations[j].GetHostName(), chosen.GetHostName());
                        }
                    }
                    sb.Append(",");
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }
示例#23
0
 protected internal BlockPlacementPolicyWithNodeGroup(Configuration conf, FSClusterStats
                                                      stats, NetworkTopology clusterMap, DatanodeManager datanodeManager)
 {
     Initialize(conf, stats, clusterMap, host2datanodeMap);
 }
示例#24
0
        public virtual void TestRemoveIncludedNode()
        {
            FSNamesystem fsn = Org.Mockito.Mockito.Mock <FSNamesystem>();

            // Set the write lock so that the DatanodeManager can start
            Org.Mockito.Mockito.When(fsn.HasWriteLock()).ThenReturn(true);
            DatanodeManager dm = MockDatanodeManager(fsn, new Configuration());
            HostFileManager hm = new HostFileManager();

            HostFileManager.HostSet noNodes  = new HostFileManager.HostSet();
            HostFileManager.HostSet oneNode  = new HostFileManager.HostSet();
            HostFileManager.HostSet twoNodes = new HostFileManager.HostSet();
            DatanodeRegistration    dr1      = new DatanodeRegistration(new DatanodeID("127.0.0.1", "127.0.0.1"
                                                                                       , "someStorageID-123", 12345, 12345, 12345, 12345), new StorageInfo(HdfsServerConstants.NodeType
                                                                                                                                                           .DataNode), new ExportedBlockKeys(), "test");
            DatanodeRegistration dr2 = new DatanodeRegistration(new DatanodeID("127.0.0.1", "127.0.0.1"
                                                                               , "someStorageID-234", 23456, 23456, 23456, 23456), new StorageInfo(HdfsServerConstants.NodeType
                                                                                                                                                   .DataNode), new ExportedBlockKeys(), "test");

            twoNodes.Add(Entry("127.0.0.1:12345"));
            twoNodes.Add(Entry("127.0.0.1:23456"));
            oneNode.Add(Entry("127.0.0.1:23456"));
            hm.Refresh(twoNodes, noNodes);
            Whitebox.SetInternalState(dm, "hostFileManager", hm);
            // Register two data nodes to simulate them coming up.
            // We need to add two nodes, because if we have only one node, removing it
            // will cause the includes list to be empty, which means all hosts will be
            // allowed.
            dm.RegisterDatanode(dr1);
            dm.RegisterDatanode(dr2);
            // Make sure that both nodes are reported
            IList <DatanodeDescriptor> both = dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                          .All);

            // Sort the list so that we know which one is which
            both.Sort();
            NUnit.Framework.Assert.AreEqual("Incorrect number of hosts reported", 2, both.Count
                                            );
            NUnit.Framework.Assert.AreEqual("Unexpected host or host in unexpected position",
                                            "127.0.0.1:12345", both[0].GetInfoAddr());
            NUnit.Framework.Assert.AreEqual("Unexpected host or host in unexpected position",
                                            "127.0.0.1:23456", both[1].GetInfoAddr());
            // Remove one node from includes, but do not add it to excludes.
            hm.Refresh(oneNode, noNodes);
            // Make sure that only one node is still reported
            IList <DatanodeDescriptor> onlyOne = dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                             .All);

            NUnit.Framework.Assert.AreEqual("Incorrect number of hosts reported", 1, onlyOne.
                                            Count);
            NUnit.Framework.Assert.AreEqual("Unexpected host reported", "127.0.0.1:23456", onlyOne
                                            [0].GetInfoAddr());
            // Remove all nodes from includes
            hm.Refresh(noNodes, noNodes);
            // Check that both nodes are reported again
            IList <DatanodeDescriptor> bothAgain = dm.GetDatanodeListForReport(HdfsConstants.DatanodeReportType
                                                                               .All);

            // Sort the list so that we know which one is which
            bothAgain.Sort();
            NUnit.Framework.Assert.AreEqual("Incorrect number of hosts reported", 2, bothAgain
                                            .Count);
            NUnit.Framework.Assert.AreEqual("Unexpected host or host in unexpected position",
                                            "127.0.0.1:12345", bothAgain[0].GetInfoAddr());
            NUnit.Framework.Assert.AreEqual("Unexpected host or host in unexpected position",
                                            "127.0.0.1:23456", bothAgain[1].GetInfoAddr());
        }
示例#25
0
        public virtual void TestNumVersionsReportedCorrect()
        {
            //Create the DatanodeManager which will be tested
            FSNamesystem fsn = Org.Mockito.Mockito.Mock <FSNamesystem>();

            Org.Mockito.Mockito.When(fsn.HasWriteLock()).ThenReturn(true);
            DatanodeManager dm = new DatanodeManager(Org.Mockito.Mockito.Mock <BlockManager>()
                                                     , fsn, new Configuration());
            //Seed the RNG with a known value so test failures are easier to reproduce
            Random rng  = new Random();
            int    seed = rng.Next();

            rng = new Random(seed);
            Log.Info("Using seed " + seed + " for testing");
            //A map of the Storage IDs to the DN registration it was registered with
            Dictionary <string, DatanodeRegistration> sIdToDnReg = new Dictionary <string, DatanodeRegistration
                                                                                   >();

            for (int i = 0; i < NumIterations; ++i)
            {
                //If true, remove a node for every 3rd time (if there's one)
                if (rng.NextBoolean() && i % 3 == 0 && sIdToDnReg.Count != 0)
                {
                    //Pick a random node.
                    int randomIndex = rng.Next() % sIdToDnReg.Count;
                    //Iterate to that random position
                    IEnumerator <KeyValuePair <string, DatanodeRegistration> > it = sIdToDnReg.GetEnumerator
                                                                                        ();
                    for (int j = 0; j < randomIndex - 1; ++j)
                    {
                        it.Next();
                    }
                    DatanodeRegistration toRemove = it.Next().Value;
                    Log.Info("Removing node " + toRemove.GetDatanodeUuid() + " ip " + toRemove.GetXferAddr
                                 () + " version : " + toRemove.GetSoftwareVersion());
                    //Remove that random node
                    dm.RemoveDatanode(toRemove);
                    it.Remove();
                }
                else
                {
                    // Otherwise register a node. This node may be a new / an old one
                    //Pick a random storageID to register.
                    string storageID        = "someStorageID" + rng.Next(5000);
                    DatanodeRegistration dr = Org.Mockito.Mockito.Mock <DatanodeRegistration>();
                    Org.Mockito.Mockito.When(dr.GetDatanodeUuid()).ThenReturn(storageID);
                    //If this storageID had already been registered before
                    if (sIdToDnReg.Contains(storageID))
                    {
                        dr = sIdToDnReg[storageID];
                        //Half of the times, change the IP address
                        if (rng.NextBoolean())
                        {
                            dr.SetIpAddr(dr.GetIpAddr() + "newIP");
                        }
                    }
                    else
                    {
                        //This storageID has never been registered
                        //Ensure IP address is unique to storageID
                        string ip = "someIP" + storageID;
                        Org.Mockito.Mockito.When(dr.GetIpAddr()).ThenReturn(ip);
                        Org.Mockito.Mockito.When(dr.GetXferAddr()).ThenReturn(ip + ":9000");
                        Org.Mockito.Mockito.When(dr.GetXferPort()).ThenReturn(9000);
                    }
                    //Pick a random version to register with
                    Org.Mockito.Mockito.When(dr.GetSoftwareVersion()).ThenReturn("version" + rng.Next
                                                                                     (5));
                    Log.Info("Registering node storageID: " + dr.GetDatanodeUuid() + ", version: " +
                             dr.GetSoftwareVersion() + ", IP address: " + dr.GetXferAddr());
                    //Register this random node
                    dm.RegisterDatanode(dr);
                    sIdToDnReg[storageID] = dr;
                }
                //Verify DatanodeManager still has the right count
                IDictionary <string, int> mapToCheck = dm.GetDatanodesSoftwareVersions();
                //Remove counts from versions and make sure that after removing all nodes
                //mapToCheck is empty
                foreach (KeyValuePair <string, DatanodeRegistration> it_1 in sIdToDnReg)
                {
                    string ver = it_1.Value.GetSoftwareVersion();
                    if (!mapToCheck.Contains(ver))
                    {
                        throw new Exception("The correct number of datanodes of a " + "version was not found on iteration "
                                            + i);
                    }
                    mapToCheck[ver] = mapToCheck[ver] - 1;
                    if (mapToCheck[ver] == 0)
                    {
                        Sharpen.Collections.Remove(mapToCheck, ver);
                    }
                }
                foreach (KeyValuePair <string, int> entry in mapToCheck)
                {
                    Log.Info("Still in map: " + entry.Key + " has " + entry.Value);
                }
                NUnit.Framework.Assert.AreEqual("The map of version counts returned by DatanodeManager was"
                                                + " not what it was expected to be on iteration " + i, 0, mapToCheck.Count);
            }
        }
        public virtual void TestDataLocality()
        {
            Configuration conf = WebHdfsTestUtil.CreateConf();

            string[] racks      = new string[] { Rack0, Rack0, Rack1, Rack1, Rack2, Rack2 };
            int      nDataNodes = racks.Length;

            Log.Info("nDataNodes=" + nDataNodes + ", racks=" + Arrays.AsList(racks));
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(nDataNodes
                                                                                   ).Racks(racks).Build();

            try
            {
                cluster.WaitActive();
                DistributedFileSystem dfs = cluster.GetFileSystem();
                NameNode        namenode  = cluster.GetNameNode();
                DatanodeManager dm        = namenode.GetNamesystem().GetBlockManager().GetDatanodeManager
                                                ();
                Log.Info("dm=" + dm);
                long   blocksize = DFSConfigKeys.DfsBlockSizeDefault;
                string f         = "/foo";
                {
                    //test CREATE
                    for (int i = 0; i < nDataNodes; i++)
                    {
                        //set client address to a particular datanode
                        DataNode dn     = cluster.GetDataNodes()[i];
                        string   ipAddr = dm.GetDatanode(dn.GetDatanodeId()).GetIpAddr();
                        //The chosen datanode must be the same as the client address
                        DatanodeInfo chosen = NamenodeWebHdfsMethods.ChooseDatanode(namenode, f, PutOpParam.OP
                                                                                    .Create, -1L, blocksize, null);
                        NUnit.Framework.Assert.AreEqual(ipAddr, chosen.GetIpAddr());
                    }
                }
                //create a file with one replica.
                Path p = new Path(f);
                FSDataOutputStream @out = dfs.Create(p, (short)1);
                @out.Write(1);
                @out.Close();
                //get replica location.
                LocatedBlocks locatedblocks = NameNodeAdapter.GetBlockLocations(namenode, f, 0, 1
                                                                                );
                IList <LocatedBlock> lb = locatedblocks.GetLocatedBlocks();
                NUnit.Framework.Assert.AreEqual(1, lb.Count);
                DatanodeInfo[] locations = lb[0].GetLocations();
                NUnit.Framework.Assert.AreEqual(1, locations.Length);
                DatanodeInfo expected = locations[0];
                {
                    //For GETFILECHECKSUM, OPEN and APPEND,
                    //the chosen datanode must be the same as the replica location.
                    //test GETFILECHECKSUM
                    DatanodeInfo chosen = NamenodeWebHdfsMethods.ChooseDatanode(namenode, f, GetOpParam.OP
                                                                                .Getfilechecksum, -1L, blocksize, null);
                    NUnit.Framework.Assert.AreEqual(expected, chosen);
                }
                {
                    //test OPEN
                    DatanodeInfo chosen = NamenodeWebHdfsMethods.ChooseDatanode(namenode, f, GetOpParam.OP
                                                                                .Open, 0, blocksize, null);
                    NUnit.Framework.Assert.AreEqual(expected, chosen);
                }
                {
                    //test APPEND
                    DatanodeInfo chosen = NamenodeWebHdfsMethods.ChooseDatanode(namenode, f, PostOpParam.OP
                                                                                .Append, -1L, blocksize, null);
                    NUnit.Framework.Assert.AreEqual(expected, chosen);
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }
示例#27
0
        public virtual void TestSortLocatedBlocks()
        {
            // create the DatanodeManager which will be tested
            FSNamesystem fsn = Org.Mockito.Mockito.Mock <FSNamesystem>();

            Org.Mockito.Mockito.When(fsn.HasWriteLock()).ThenReturn(true);
            DatanodeManager dm = new DatanodeManager(Org.Mockito.Mockito.Mock <BlockManager>()
                                                     , fsn, new Configuration());

            // register 5 datanodes, each with different storage ID and type
            DatanodeInfo[] locs         = new DatanodeInfo[5];
            string[]       storageIDs   = new string[5];
            StorageType[]  storageTypes = new StorageType[] { StorageType.Archive, StorageType
                                                              .Default, StorageType.Disk, StorageType.RamDisk, StorageType.Ssd };
            for (int i = 0; i < 5; i++)
            {
                // register new datanode
                string uuid             = "UUID-" + i;
                string ip               = "IP-" + i;
                DatanodeRegistration dr = Org.Mockito.Mockito.Mock <DatanodeRegistration>();
                Org.Mockito.Mockito.When(dr.GetDatanodeUuid()).ThenReturn(uuid);
                Org.Mockito.Mockito.When(dr.GetIpAddr()).ThenReturn(ip);
                Org.Mockito.Mockito.When(dr.GetXferAddr()).ThenReturn(ip + ":9000");
                Org.Mockito.Mockito.When(dr.GetXferPort()).ThenReturn(9000);
                Org.Mockito.Mockito.When(dr.GetSoftwareVersion()).ThenReturn("version1");
                dm.RegisterDatanode(dr);
                // get location and storage information
                locs[i]       = dm.GetDatanode(uuid);
                storageIDs[i] = "storageID-" + i;
            }
            // set first 2 locations as decomissioned
            locs[0].SetDecommissioned();
            locs[1].SetDecommissioned();
            // create LocatedBlock with above locations
            ExtendedBlock        b      = new ExtendedBlock("somePoolID", 1234);
            LocatedBlock         block  = new LocatedBlock(b, locs, storageIDs, storageTypes);
            IList <LocatedBlock> blocks = new AList <LocatedBlock>();

            blocks.AddItem(block);
            string targetIp = locs[4].GetIpAddr();

            // sort block locations
            dm.SortLocatedBlocks(targetIp, blocks);
            // check that storage IDs/types are aligned with datanode locs
            DatanodeInfo[] sortedLocs = block.GetLocations();
            storageIDs   = block.GetStorageIDs();
            storageTypes = block.GetStorageTypes();
            Assert.AssertThat(sortedLocs.Length, IS.Is(5));
            Assert.AssertThat(storageIDs.Length, IS.Is(5));
            Assert.AssertThat(storageTypes.Length, IS.Is(5));
            for (int i_1 = 0; i_1 < sortedLocs.Length; i_1++)
            {
                Assert.AssertThat(((DatanodeInfoWithStorage)sortedLocs[i_1]).GetStorageID(), IS.Is
                                      (storageIDs[i_1]));
                Assert.AssertThat(((DatanodeInfoWithStorage)sortedLocs[i_1]).GetStorageType(), IS.Is
                                      (storageTypes[i_1]));
            }
            // Ensure the local node is first.
            Assert.AssertThat(sortedLocs[0].GetIpAddr(), IS.Is(targetIp));
            // Ensure the two decommissioned DNs were moved to the end.
            Assert.AssertThat(sortedLocs[sortedLocs.Length - 1].GetAdminState(), IS.Is(DatanodeInfo.AdminStates
                                                                                       .Decommissioned));
            Assert.AssertThat(sortedLocs[sortedLocs.Length - 2].GetAdminState(), IS.Is(DatanodeInfo.AdminStates
                                                                                       .Decommissioned));
        }
示例#28
0
        public virtual void TestDecommissionStatus()
        {
            IPEndPoint addr   = new IPEndPoint("localhost", cluster.GetNameNodePort());
            DFSClient  client = new DFSClient(addr, conf);

            DatanodeInfo[] info = client.DatanodeReport(HdfsConstants.DatanodeReportType.Live
                                                        );
            NUnit.Framework.Assert.AreEqual("Number of Datanodes ", 2, info.Length);
            DistributedFileSystem fileSys = cluster.GetFileSystem();
            DFSAdmin admin    = new DFSAdmin(cluster.GetConfiguration(0));
            short    replicas = numDatanodes;
            //
            // Decommission one node. Verify the decommission status
            //
            Path file1 = new Path("decommission.dat");

            WriteFile(fileSys, file1, replicas);
            Path file2             = new Path("decommission1.dat");
            FSDataOutputStream st1 = WriteIncompleteFile(fileSys, file2, replicas);

            foreach (DataNode d in cluster.GetDataNodes())
            {
                DataNodeTestUtils.TriggerBlockReport(d);
            }
            FSNamesystem    fsn = cluster.GetNamesystem();
            DatanodeManager dm  = fsn.GetBlockManager().GetDatanodeManager();

            for (int iteration = 0; iteration < numDatanodes; iteration++)
            {
                string downnode = DecommissionNode(fsn, client, localFileSys, iteration);
                dm.RefreshNodes(conf);
                decommissionedNodes.AddItem(downnode);
                BlockManagerTestUtil.RecheckDecommissionState(dm);
                IList <DatanodeDescriptor> decommissioningNodes = dm.GetDecommissioningNodes();
                if (iteration == 0)
                {
                    NUnit.Framework.Assert.AreEqual(decommissioningNodes.Count, 1);
                    DatanodeDescriptor decommNode = decommissioningNodes[0];
                    CheckDecommissionStatus(decommNode, 3, 0, 1);
                    CheckDFSAdminDecommissionStatus(decommissioningNodes.SubList(0, 1), fileSys, admin
                                                    );
                }
                else
                {
                    NUnit.Framework.Assert.AreEqual(decommissioningNodes.Count, 2);
                    DatanodeDescriptor decommNode1 = decommissioningNodes[0];
                    DatanodeDescriptor decommNode2 = decommissioningNodes[1];
                    // This one is still 3,3,1 since it passed over the UC block
                    // earlier, before node 2 was decommed
                    CheckDecommissionStatus(decommNode1, 3, 3, 1);
                    // This one is 4,4,2 since it has the full state
                    CheckDecommissionStatus(decommNode2, 4, 4, 2);
                    CheckDFSAdminDecommissionStatus(decommissioningNodes.SubList(0, 2), fileSys, admin
                                                    );
                }
            }
            // Call refreshNodes on FSNamesystem with empty exclude file.
            // This will remove the datanodes from decommissioning list and
            // make them available again.
            WriteConfigFile(localFileSys, excludeFile, null);
            dm.RefreshNodes(conf);
            st1.Close();
            CleanupFile(fileSys, file1);
            CleanupFile(fileSys, file2);
        }
示例#29
0
        /// <summary>
        /// Check if there are any expired heartbeats, and if so,
        /// whether any blocks have to be re-replicated.
        /// </summary>
        /// <remarks>
        /// Check if there are any expired heartbeats, and if so,
        /// whether any blocks have to be re-replicated.
        /// While removing dead datanodes, make sure that only one datanode is marked
        /// dead at a time within the synchronized section. Otherwise, a cascading
        /// effect causes more datanodes to be declared dead.
        /// Check if there are any failed storage and if so,
        /// Remove all the blocks on the storage. It also covers the following less
        /// common scenarios. After DatanodeStorage is marked FAILED, it is still
        /// possible to receive IBR for this storage.
        /// 1) DN could deliver IBR for failed storage due to its implementation.
        /// a) DN queues a pending IBR request.
        /// b) The storage of the block fails.
        /// c) DN first sends HB, NN will mark the storage FAILED.
        /// d) DN then sends the pending IBR request.
        /// 2) SBN processes block request from pendingDNMessages.
        /// It is possible to have messages in pendingDNMessages that refer
        /// to some failed storage.
        /// a) SBN receives a IBR and put it in pendingDNMessages.
        /// b) The storage of the block fails.
        /// c) Edit log replay get the IBR from pendingDNMessages.
        /// Alternatively, we can resolve these scenarios with the following approaches.
        /// A. Make sure DN don't deliver IBR for failed storage.
        /// B. Remove all blocks in PendingDataNodeMessages for the failed storage
        /// when we remove all blocks from BlocksMap for that storage.
        /// </remarks>
        internal virtual void HeartbeatCheck()
        {
            DatanodeManager dm = blockManager.GetDatanodeManager();

            // It's OK to check safe mode w/o taking the lock here, we re-check
            // for safe mode after taking the lock before removing a datanode.
            if (namesystem.IsInStartupSafeMode())
            {
                return;
            }
            bool allAlive = false;

            while (!allAlive)
            {
                // locate the first dead node.
                DatanodeID dead = null;
                // locate the first failed storage that isn't on a dead node.
                DatanodeStorageInfo failedStorage = null;
                // check the number of stale nodes
                int numOfStaleNodes    = 0;
                int numOfStaleStorages = 0;
                lock (this)
                {
                    foreach (DatanodeDescriptor d in datanodes)
                    {
                        if (dead == null && dm.IsDatanodeDead(d))
                        {
                            stats.IncrExpiredHeartbeats();
                            dead = d;
                        }
                        if (d.IsStale(dm.GetStaleInterval()))
                        {
                            numOfStaleNodes++;
                        }
                        DatanodeStorageInfo[] storageInfos = d.GetStorageInfos();
                        foreach (DatanodeStorageInfo storageInfo in storageInfos)
                        {
                            if (storageInfo.AreBlockContentsStale())
                            {
                                numOfStaleStorages++;
                            }
                            if (failedStorage == null && storageInfo.AreBlocksOnFailedStorage() && d != dead)
                            {
                                failedStorage = storageInfo;
                            }
                        }
                    }
                    // Set the number of stale nodes in the DatanodeManager
                    dm.SetNumStaleNodes(numOfStaleNodes);
                    dm.SetNumStaleStorages(numOfStaleStorages);
                }
                allAlive = dead == null && failedStorage == null;
                if (dead != null)
                {
                    // acquire the fsnamesystem lock, and then remove the dead node.
                    namesystem.WriteLock();
                    try
                    {
                        if (namesystem.IsInStartupSafeMode())
                        {
                            return;
                        }
                        lock (this)
                        {
                            dm.RemoveDeadDatanode(dead);
                        }
                    }
                    finally
                    {
                        namesystem.WriteUnlock();
                    }
                }
                if (failedStorage != null)
                {
                    // acquire the fsnamesystem lock, and remove blocks on the storage.
                    namesystem.WriteLock();
                    try
                    {
                        if (namesystem.IsInStartupSafeMode())
                        {
                            return;
                        }
                        lock (this)
                        {
                            blockManager.RemoveBlocksAssociatedTo(failedStorage);
                        }
                    }
                    finally
                    {
                        namesystem.WriteUnlock();
                    }
                }
            }
        }
示例#30
0
        /// <summary>
        /// Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked
        /// as dead before decommission has completed.
        /// </summary>
        /// <remarks>
        /// Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked
        /// as dead before decommission has completed. That will allow DN to resume
        /// the replication process after it rejoins the cluster.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestDecommissionStatusAfterDNRestart()
        {
            DistributedFileSystem fileSys = (DistributedFileSystem)cluster.GetFileSystem();
            // Create a file with one block. That block has one replica.
            Path f = new Path("decommission.dat");

            DFSTestUtil.CreateFile(fileSys, f, fileSize, fileSize, fileSize, (short)1, seed);
            // Find the DN that owns the only replica.
            RemoteIterator <LocatedFileStatus> fileList = fileSys.ListLocatedStatus(f);

            BlockLocation[] blockLocations = fileList.Next().GetBlockLocations();
            string          dnName         = blockLocations[0].GetNames()[0];
            // Decommission the DN.
            FSNamesystem    fsn = cluster.GetNamesystem();
            DatanodeManager dm  = fsn.GetBlockManager().GetDatanodeManager();

            DecommissionNode(fsn, localFileSys, dnName);
            dm.RefreshNodes(conf);
            // Stop the DN when decommission is in progress.
            // Given DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY is to 1 and the size of
            // the block, it will take much longer time that test timeout value for
            // the decommission to complete. So when stopDataNode is called,
            // decommission should be in progress.
            MiniDFSCluster.DataNodeProperties dataNodeProperties = cluster.StopDataNode(dnName
                                                                                        );
            IList <DatanodeDescriptor> dead = new AList <DatanodeDescriptor>();

            while (true)
            {
                dm.FetchDatanodes(null, dead, false);
                if (dead.Count == 1)
                {
                    break;
                }
                Sharpen.Thread.Sleep(1000);
            }
            // Force removal of the dead node's blocks.
            BlockManagerTestUtil.CheckHeartbeat(fsn.GetBlockManager());
            // Force DatanodeManager to check decommission state.
            BlockManagerTestUtil.RecheckDecommissionState(dm);
            // Verify that the DN remains in DECOMMISSION_INPROGRESS state.
            NUnit.Framework.Assert.IsTrue("the node should be DECOMMISSION_IN_PROGRESSS", dead
                                          [0].IsDecommissionInProgress());
            // Check DatanodeManager#getDecommissionNodes, make sure it returns
            // the node as decommissioning, even if it's dead
            IList <DatanodeDescriptor> decomlist = dm.GetDecommissioningNodes();

            NUnit.Framework.Assert.IsTrue("The node should be be decommissioning", decomlist.
                                          Count == 1);
            // Delete the under-replicated file, which should let the
            // DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED
            CleanupFile(fileSys, f);
            BlockManagerTestUtil.RecheckDecommissionState(dm);
            NUnit.Framework.Assert.IsTrue("the node should be decommissioned", dead[0].IsDecommissioned
                                              ());
            // Add the node back
            cluster.RestartDataNode(dataNodeProperties, true);
            cluster.WaitActive();
            // Call refreshNodes on FSNamesystem with empty exclude file.
            // This will remove the datanodes from decommissioning list and
            // make them available again.
            WriteConfigFile(localFileSys, excludeFile, null);
            dm.RefreshNodes(conf);
        }