/// <summary>Scan through the cached block map.</summary> /// <remarks> /// Scan through the cached block map. /// Any blocks which are under-replicated should be assigned new Datanodes. /// Blocks that are over-replicated should be removed from Datanodes. /// </remarks> private void RescanCachedBlockMap() { for (IEnumerator <CachedBlock> cbIter = cachedBlocks.GetEnumerator(); cbIter.HasNext ();) { scannedBlocks++; CachedBlock cblock = cbIter.Next(); IList <DatanodeDescriptor> pendingCached = cblock.GetDatanodes(DatanodeDescriptor.CachedBlocksList.Type .PendingCached); IList <DatanodeDescriptor> cached = cblock.GetDatanodes(DatanodeDescriptor.CachedBlocksList.Type .Cached); IList <DatanodeDescriptor> pendingUncached = cblock.GetDatanodes(DatanodeDescriptor.CachedBlocksList.Type .PendingUncached); // Remove nodes from PENDING_UNCACHED if they were actually uncached. for (IEnumerator <DatanodeDescriptor> iter = pendingUncached.GetEnumerator(); iter .HasNext();) { DatanodeDescriptor datanode = iter.Next(); if (!cblock.IsInList(datanode.GetCached())) { Log.Trace("Block {}: removing from PENDING_UNCACHED for node {} " + "because the DataNode uncached it." , cblock.GetBlockId(), datanode.GetDatanodeUuid()); datanode.GetPendingUncached().Remove(cblock); iter.Remove(); } } BlockInfoContiguous blockInfo = blockManager.GetStoredBlock(new Block(cblock.GetBlockId ())); string reason = FindReasonForNotCaching(cblock, blockInfo); int neededCached = 0; if (reason != null) { Log.Trace("Block {}: can't cache block because it is {}", cblock.GetBlockId(), reason ); } else { neededCached = cblock.GetReplication(); } int numCached = cached.Count; if (numCached >= neededCached) { // If we have enough replicas, drop all pending cached. for (IEnumerator <DatanodeDescriptor> iter_1 = pendingCached.GetEnumerator(); iter_1 .HasNext();) { DatanodeDescriptor datanode = iter_1.Next(); datanode.GetPendingCached().Remove(cblock); iter_1.Remove(); Log.Trace("Block {}: removing from PENDING_CACHED for node {}" + "because we already have {} cached replicas and we only" + " need {}", cblock.GetBlockId(), datanode.GetDatanodeUuid(), numCached, neededCached ); } } if (numCached < neededCached) { // If we don't have enough replicas, drop all pending uncached. for (IEnumerator <DatanodeDescriptor> iter_1 = pendingUncached.GetEnumerator(); iter_1 .HasNext();) { DatanodeDescriptor datanode = iter_1.Next(); datanode.GetPendingUncached().Remove(cblock); iter_1.Remove(); Log.Trace("Block {}: removing from PENDING_UNCACHED for node {} " + "because we only have {} cached replicas and we need " + "{}", cblock.GetBlockId(), datanode.GetDatanodeUuid(), numCached, neededCached ); } } int neededUncached = numCached - (pendingUncached.Count + neededCached); if (neededUncached > 0) { AddNewPendingUncached(neededUncached, cblock, cached, pendingUncached); } else { int additionalCachedNeeded = neededCached - (numCached + pendingCached.Count); if (additionalCachedNeeded > 0) { AddNewPendingCached(additionalCachedNeeded, cblock, cached, pendingCached); } } if ((neededCached == 0) && pendingUncached.IsEmpty() && pendingCached.IsEmpty()) { // we have nothing more to do with this block. Log.Trace("Block {}: removing from cachedBlocks, since neededCached " + "== 0, and pendingUncached and pendingCached are empty." , cblock.GetBlockId()); cbIter.Remove(); } } }
/// <summary>Apply a CacheDirective to a file.</summary> /// <param name="directive">The CacheDirective to apply.</param> /// <param name="file">The file.</param> private void RescanFile(CacheDirective directive, INodeFile file) { BlockInfoContiguous[] blockInfos = file.GetBlocks(); // Increment the "needed" statistics directive.AddFilesNeeded(1); // We don't cache UC blocks, don't add them to the total here long neededTotal = file.ComputeFileSizeNotIncludingLastUcBlock() * directive.GetReplication (); directive.AddBytesNeeded(neededTotal); // The pool's bytesNeeded is incremented as we scan. If the demand // thus far plus the demand of this file would exceed the pool's limit, // do not cache this file. CachePool pool = directive.GetPool(); if (pool.GetBytesNeeded() > pool.GetLimit()) { Log.Debug("Directive {}: not scanning file {} because " + "bytesNeeded for pool {} is {}, but the pool's limit is {}" , directive.GetId(), file.GetFullPathName(), pool.GetPoolName(), pool.GetBytesNeeded (), pool.GetLimit()); return; } long cachedTotal = 0; foreach (BlockInfoContiguous blockInfo in blockInfos) { if (!blockInfo.GetBlockUCState().Equals(HdfsServerConstants.BlockUCState.Complete )) { // We don't try to cache blocks that are under construction. Log.Trace("Directive {}: can't cache block {} because it is in state " + "{}, not COMPLETE." , directive.GetId(), blockInfo, blockInfo.GetBlockUCState()); continue; } Block block = new Block(blockInfo.GetBlockId()); CachedBlock ncblock = new CachedBlock(block.GetBlockId(), directive.GetReplication (), mark); CachedBlock ocblock = cachedBlocks.Get(ncblock); if (ocblock == null) { cachedBlocks.Put(ncblock); ocblock = ncblock; } else { // Update bytesUsed using the current replication levels. // Assumptions: we assume that all the blocks are the same length // on each datanode. We can assume this because we're only caching // blocks in state COMPLETE. // Note that if two directives are caching the same block(s), they will // both get them added to their bytesCached. IList <DatanodeDescriptor> cachedOn = ocblock.GetDatanodes(DatanodeDescriptor.CachedBlocksList.Type .Cached); long cachedByBlock = Math.Min(cachedOn.Count, directive.GetReplication()) * blockInfo .GetNumBytes(); cachedTotal += cachedByBlock; if ((mark != ocblock.GetMark()) || (ocblock.GetReplication() < directive.GetReplication ())) { // // Overwrite the block's replication and mark in two cases: // // 1. If the mark on the CachedBlock is different from the mark for // this scan, that means the block hasn't been updated during this // scan, and we should overwrite whatever is there, since it is no // longer valid. // // 2. If the replication in the CachedBlock is less than what the // directive asks for, we want to increase the block's replication // field to what the directive asks for. // ocblock.SetReplicationAndMark(directive.GetReplication(), mark); } } Log.Trace("Directive {}: setting replication for block {} to {}", directive.GetId (), blockInfo, ocblock.GetReplication()); } // Increment the "cached" statistics directive.AddBytesCached(cachedTotal); if (cachedTotal == neededTotal) { directive.AddFilesCached(1); } Log.Debug("Directive {}: caching {}: {}/{} bytes", directive.GetId(), file.GetFullPathName (), cachedTotal, neededTotal); }