/// <summary> /// This function identifies and returns the hosts that contribute /// most for a given split. /// </summary> /// <remarks> /// This function identifies and returns the hosts that contribute /// most for a given split. For calculating the contribution, rack /// locality is treated on par with host locality, so hosts from racks /// that contribute the most are preferred over hosts on racks that /// contribute less /// </remarks> /// <param name="blkLocations">The list of block locations</param> /// <param name="offset"></param> /// <param name="splitSize"></param> /// <returns> /// two arrays - one of hosts that contribute most to this split, and /// one of hosts that contribute most to this split that have the data /// cached on them /// </returns> /// <exception cref="System.IO.IOException"/> private string[][] GetSplitHostsAndCachedHosts(BlockLocation[] blkLocations, long offset, long splitSize, NetworkTopology clusterMap) { int startIndex = GetBlockIndex(blkLocations, offset); long bytesInThisBlock = blkLocations[startIndex].GetOffset() + blkLocations[startIndex ].GetLength() - offset; //If this is the only block, just return if (bytesInThisBlock >= splitSize) { return(new string[][] { blkLocations[startIndex].GetHosts(), blkLocations[startIndex ].GetCachedHosts() }); } long bytesInFirstBlock = bytesInThisBlock; int index = startIndex + 1; splitSize -= bytesInThisBlock; while (splitSize > 0) { bytesInThisBlock = Math.Min(splitSize, blkLocations[index++].GetLength()); splitSize -= bytesInThisBlock; } long bytesInLastBlock = bytesInThisBlock; int endIndex = index - 1; IDictionary <Node, FileInputFormat.NodeInfo> hostsMap = new IdentityHashMap <Node, FileInputFormat.NodeInfo>(); IDictionary <Node, FileInputFormat.NodeInfo> racksMap = new IdentityHashMap <Node, FileInputFormat.NodeInfo>(); string[] allTopos = new string[0]; // Build the hierarchy and aggregate the contribution of // bytes at each level. See TestGetSplitHosts.java for (index = startIndex; index <= endIndex; index++) { // Establish the bytes in this block if (index == startIndex) { bytesInThisBlock = bytesInFirstBlock; } else { if (index == endIndex) { bytesInThisBlock = bytesInLastBlock; } else { bytesInThisBlock = blkLocations[index].GetLength(); } } allTopos = blkLocations[index].GetTopologyPaths(); // If no topology information is available, just // prefix a fakeRack if (allTopos.Length == 0) { allTopos = FakeRacks(blkLocations, index); } // NOTE: This code currently works only for one level of // hierarchy (rack/host). However, it is relatively easy // to extend this to support aggregation at different // levels foreach (string topo in allTopos) { Node node; Node parentNode; FileInputFormat.NodeInfo nodeInfo; FileInputFormat.NodeInfo parentNodeInfo; node = clusterMap.GetNode(topo); if (node == null) { node = new NodeBase(topo); clusterMap.Add(node); } nodeInfo = hostsMap[node]; if (nodeInfo == null) { nodeInfo = new FileInputFormat.NodeInfo(node); hostsMap[node] = nodeInfo; parentNode = node.GetParent(); parentNodeInfo = racksMap[parentNode]; if (parentNodeInfo == null) { parentNodeInfo = new FileInputFormat.NodeInfo(parentNode); racksMap[parentNode] = parentNodeInfo; } parentNodeInfo.AddLeaf(nodeInfo); } else { nodeInfo = hostsMap[node]; parentNode = node.GetParent(); parentNodeInfo = racksMap[parentNode]; } nodeInfo.AddValue(index, bytesInThisBlock); parentNodeInfo.AddValue(index, bytesInThisBlock); } } // for all topos // for all indices // We don't yet support cached hosts when bytesInThisBlock > splitSize return(new string[][] { IdentifyHosts(allTopos.Length, racksMap), new string[0] }); }