/// <summary>Scan through the cached block map.</summary>
 /// <remarks>
 /// Scan through the cached block map.
 /// Any blocks which are under-replicated should be assigned new Datanodes.
 /// Blocks that are over-replicated should be removed from Datanodes.
 /// </remarks>
 private void RescanCachedBlockMap()
 {
     for (IEnumerator <CachedBlock> cbIter = cachedBlocks.GetEnumerator(); cbIter.HasNext
              ();)
     {
         scannedBlocks++;
         CachedBlock cblock = cbIter.Next();
         IList <DatanodeDescriptor> pendingCached = cblock.GetDatanodes(DatanodeDescriptor.CachedBlocksList.Type
                                                                        .PendingCached);
         IList <DatanodeDescriptor> cached = cblock.GetDatanodes(DatanodeDescriptor.CachedBlocksList.Type
                                                                 .Cached);
         IList <DatanodeDescriptor> pendingUncached = cblock.GetDatanodes(DatanodeDescriptor.CachedBlocksList.Type
                                                                          .PendingUncached);
         // Remove nodes from PENDING_UNCACHED if they were actually uncached.
         for (IEnumerator <DatanodeDescriptor> iter = pendingUncached.GetEnumerator(); iter
              .HasNext();)
         {
             DatanodeDescriptor datanode = iter.Next();
             if (!cblock.IsInList(datanode.GetCached()))
             {
                 Log.Trace("Block {}: removing from PENDING_UNCACHED for node {} " + "because the DataNode uncached it."
                           , cblock.GetBlockId(), datanode.GetDatanodeUuid());
                 datanode.GetPendingUncached().Remove(cblock);
                 iter.Remove();
             }
         }
         BlockInfoContiguous blockInfo = blockManager.GetStoredBlock(new Block(cblock.GetBlockId
                                                                                   ()));
         string reason       = FindReasonForNotCaching(cblock, blockInfo);
         int    neededCached = 0;
         if (reason != null)
         {
             Log.Trace("Block {}: can't cache block because it is {}", cblock.GetBlockId(), reason
                       );
         }
         else
         {
             neededCached = cblock.GetReplication();
         }
         int numCached = cached.Count;
         if (numCached >= neededCached)
         {
             // If we have enough replicas, drop all pending cached.
             for (IEnumerator <DatanodeDescriptor> iter_1 = pendingCached.GetEnumerator(); iter_1
                  .HasNext();)
             {
                 DatanodeDescriptor datanode = iter_1.Next();
                 datanode.GetPendingCached().Remove(cblock);
                 iter_1.Remove();
                 Log.Trace("Block {}: removing from PENDING_CACHED for node {}" + "because we already have {} cached replicas and we only"
                           + " need {}", cblock.GetBlockId(), datanode.GetDatanodeUuid(), numCached, neededCached
                           );
             }
         }
         if (numCached < neededCached)
         {
             // If we don't have enough replicas, drop all pending uncached.
             for (IEnumerator <DatanodeDescriptor> iter_1 = pendingUncached.GetEnumerator(); iter_1
                  .HasNext();)
             {
                 DatanodeDescriptor datanode = iter_1.Next();
                 datanode.GetPendingUncached().Remove(cblock);
                 iter_1.Remove();
                 Log.Trace("Block {}: removing from PENDING_UNCACHED for node {} " + "because we only have {} cached replicas and we need "
                           + "{}", cblock.GetBlockId(), datanode.GetDatanodeUuid(), numCached, neededCached
                           );
             }
         }
         int neededUncached = numCached - (pendingUncached.Count + neededCached);
         if (neededUncached > 0)
         {
             AddNewPendingUncached(neededUncached, cblock, cached, pendingUncached);
         }
         else
         {
             int additionalCachedNeeded = neededCached - (numCached + pendingCached.Count);
             if (additionalCachedNeeded > 0)
             {
                 AddNewPendingCached(additionalCachedNeeded, cblock, cached, pendingCached);
             }
         }
         if ((neededCached == 0) && pendingUncached.IsEmpty() && pendingCached.IsEmpty())
         {
             // we have nothing more to do with this block.
             Log.Trace("Block {}: removing from cachedBlocks, since neededCached " + "== 0, and pendingUncached and pendingCached are empty."
                       , cblock.GetBlockId());
             cbIter.Remove();
         }
     }
 }
        /// <summary>Add new entries to the PendingCached list.</summary>
        /// <param name="neededCached">The number of replicas that need to be cached.</param>
        /// <param name="cachedBlock">The block which needs to be cached.</param>
        /// <param name="cached">A list of DataNodes currently caching the block.</param>
        /// <param name="pendingCached">
        /// A list of DataNodes that will soon cache the
        /// block.
        /// </param>
        private void AddNewPendingCached(int neededCached, CachedBlock cachedBlock, IList
                                         <DatanodeDescriptor> cached, IList <DatanodeDescriptor> pendingCached)
        {
            // To figure out which replicas can be cached, we consult the
            // blocksMap.  We don't want to try to cache a corrupt replica, though.
            BlockInfoContiguous blockInfo = blockManager.GetStoredBlock(new Block(cachedBlock
                                                                                  .GetBlockId()));

            if (blockInfo == null)
            {
                Log.Debug("Block {}: can't add new cached replicas," + " because there is no record of this block "
                          + "on the NameNode.", cachedBlock.GetBlockId());
                return;
            }
            if (!blockInfo.IsComplete())
            {
                Log.Debug("Block {}: can't cache this block, because it is not yet" + " complete."
                          , cachedBlock.GetBlockId());
                return;
            }
            // Filter the list of replicas to only the valid targets
            IList <DatanodeDescriptor> possibilities = new List <DatanodeDescriptor>();
            int numReplicas = blockInfo.GetCapacity();
            ICollection <DatanodeDescriptor> corrupt = blockManager.GetCorruptReplicas(blockInfo
                                                                                       );
            int outOfCapacity = 0;

            for (int i = 0; i < numReplicas; i++)
            {
                DatanodeDescriptor datanode = blockInfo.GetDatanode(i);
                if (datanode == null)
                {
                    continue;
                }
                if (datanode.IsDecommissioned() || datanode.IsDecommissionInProgress())
                {
                    continue;
                }
                if (corrupt != null && corrupt.Contains(datanode))
                {
                    continue;
                }
                if (pendingCached.Contains(datanode) || cached.Contains(datanode))
                {
                    continue;
                }
                long pendingBytes = 0;
                // Subtract pending cached blocks from effective capacity
                IEnumerator <CachedBlock> it = datanode.GetPendingCached().GetEnumerator();
                while (it.HasNext())
                {
                    CachedBlock         cBlock = it.Next();
                    BlockInfoContiguous info   = blockManager.GetStoredBlock(new Block(cBlock.GetBlockId
                                                                                           ()));
                    if (info != null)
                    {
                        pendingBytes -= info.GetNumBytes();
                    }
                }
                it = datanode.GetPendingUncached().GetEnumerator();
                // Add pending uncached blocks from effective capacity
                while (it.HasNext())
                {
                    CachedBlock         cBlock = it.Next();
                    BlockInfoContiguous info   = blockManager.GetStoredBlock(new Block(cBlock.GetBlockId
                                                                                           ()));
                    if (info != null)
                    {
                        pendingBytes += info.GetNumBytes();
                    }
                }
                long pendingCapacity = pendingBytes + datanode.GetCacheRemaining();
                if (pendingCapacity < blockInfo.GetNumBytes())
                {
                    Log.Trace("Block {}: DataNode {} is not a valid possibility " + "because the block has size {}, but the DataNode only has {}"
                              + "bytes of cache remaining ({} pending bytes, {} already cached.", blockInfo.GetBlockId
                                  (), datanode.GetDatanodeUuid(), blockInfo.GetNumBytes(), pendingCapacity, pendingBytes
                              , datanode.GetCacheRemaining());
                    outOfCapacity++;
                    continue;
                }
                possibilities.AddItem(datanode);
            }
            IList <DatanodeDescriptor> chosen = ChooseDatanodesForCaching(possibilities, neededCached
                                                                          , blockManager.GetDatanodeManager().GetStaleInterval());

            foreach (DatanodeDescriptor datanode_1 in chosen)
            {
                Log.Trace("Block {}: added to PENDING_CACHED on DataNode {}", blockInfo.GetBlockId
                              (), datanode_1.GetDatanodeUuid());
                pendingCached.AddItem(datanode_1);
                bool added = datanode_1.GetPendingCached().AddItem(cachedBlock);
                System.Diagnostics.Debug.Assert(added);
            }
            // We were unable to satisfy the requested replication factor
            if (neededCached > chosen.Count)
            {
                Log.Debug("Block {}: we only have {} of {} cached replicas." + " {} DataNodes have insufficient cache capacity."
                          , blockInfo.GetBlockId(), (cachedBlock.GetReplication() - neededCached + chosen.
                                                     Count), cachedBlock.GetReplication(), outOfCapacity);
            }
        }
Example #3
0
        public virtual void TestNodeCount()
        {
            // start a mini dfs cluster of 2 nodes
            Configuration  conf    = new HdfsConfiguration();
            MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).NumDataNodes(ReplicationFactor
                                                                                   ).Build();

            try
            {
                FSNamesystem     namesystem = cluster.GetNamesystem();
                BlockManager     bm         = namesystem.GetBlockManager();
                HeartbeatManager hm         = bm.GetDatanodeManager().GetHeartbeatManager();
                FileSystem       fs         = cluster.GetFileSystem();
                // populate the cluster with a one block file
                Path FilePath = new Path("/testfile");
                DFSTestUtil.CreateFile(fs, FilePath, 1L, ReplicationFactor, 1L);
                DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor);
                ExtendedBlock block = DFSTestUtil.GetFirstBlock(fs, FilePath);
                // keep a copy of all datanode descriptor
                DatanodeDescriptor[] datanodes = hm.GetDatanodes();
                // start two new nodes
                cluster.StartDataNodes(conf, 2, true, null, null);
                cluster.WaitActive();
                // bring down first datanode
                DatanodeDescriptor datanode = datanodes[0];
                MiniDFSCluster.DataNodeProperties dnprop = cluster.StopDataNode(datanode.GetXferAddr
                                                                                    ());
                // make sure that NN detects that the datanode is down
                BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), datanode.GetXferAddr
                                                            ());
                // the block will be replicated
                DFSTestUtil.WaitReplication(fs, FilePath, ReplicationFactor);
                // restart the first datanode
                cluster.RestartDataNode(dnprop);
                cluster.WaitActive();
                // check if excessive replica is detected (transient)
                InitializeTimeout(Timeout);
                while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() == 0)
                {
                    CheckTimeout("excess replicas not detected");
                }
                // find out a non-excess node
                DatanodeDescriptor nonExcessDN = null;
                foreach (DatanodeStorageInfo storage in bm.blocksMap.GetStorages(block.GetLocalBlock
                                                                                     ()))
                {
                    DatanodeDescriptor  dn     = storage.GetDatanodeDescriptor();
                    ICollection <Block> blocks = bm.excessReplicateMap[dn.GetDatanodeUuid()];
                    if (blocks == null || !blocks.Contains(block.GetLocalBlock()))
                    {
                        nonExcessDN = dn;
                        break;
                    }
                }
                NUnit.Framework.Assert.IsTrue(nonExcessDN != null);
                // bring down non excessive datanode
                dnprop = cluster.StopDataNode(nonExcessDN.GetXferAddr());
                // make sure that NN detects that the datanode is down
                BlockManagerTestUtil.NoticeDeadDatanode(cluster.GetNameNode(), nonExcessDN.GetXferAddr
                                                            ());
                // The block should be replicated
                InitializeTimeout(Timeout);
                while (CountNodes(block.GetLocalBlock(), namesystem).LiveReplicas() != ReplicationFactor
                       )
                {
                    CheckTimeout("live replica count not correct", 1000);
                }
                // restart the first datanode
                cluster.RestartDataNode(dnprop);
                cluster.WaitActive();
                // check if excessive replica is detected (transient)
                InitializeTimeout(Timeout);
                while (CountNodes(block.GetLocalBlock(), namesystem).ExcessReplicas() != 2)
                {
                    CheckTimeout("excess replica count not equal to 2");
                }
            }
            finally
            {
                cluster.Shutdown();
            }
        }