Beispiel #1
0
 internal virtual void AddLeaf(FileInputFormat.NodeInfo nodeInfo)
 {
     leaves.AddItem(nodeInfo);
 }
Beispiel #2
0
        /// <summary>
        /// This function identifies and returns the hosts that contribute
        /// most for a given split.
        /// </summary>
        /// <remarks>
        /// This function identifies and returns the hosts that contribute
        /// most for a given split. For calculating the contribution, rack
        /// locality is treated on par with host locality, so hosts from racks
        /// that contribute the most are preferred over hosts on racks that
        /// contribute less
        /// </remarks>
        /// <param name="blkLocations">The list of block locations</param>
        /// <param name="offset"></param>
        /// <param name="splitSize"></param>
        /// <returns>
        /// two arrays - one of hosts that contribute most to this split, and
        /// one of hosts that contribute most to this split that have the data
        /// cached on them
        /// </returns>
        /// <exception cref="System.IO.IOException"/>
        private string[][] GetSplitHostsAndCachedHosts(BlockLocation[] blkLocations, long
                                                       offset, long splitSize, NetworkTopology clusterMap)
        {
            int  startIndex       = GetBlockIndex(blkLocations, offset);
            long bytesInThisBlock = blkLocations[startIndex].GetOffset() + blkLocations[startIndex
                                    ].GetLength() - offset;

            //If this is the only block, just return
            if (bytesInThisBlock >= splitSize)
            {
                return(new string[][] { blkLocations[startIndex].GetHosts(), blkLocations[startIndex
                                        ].GetCachedHosts() });
            }
            long bytesInFirstBlock = bytesInThisBlock;
            int  index             = startIndex + 1;

            splitSize -= bytesInThisBlock;
            while (splitSize > 0)
            {
                bytesInThisBlock = Math.Min(splitSize, blkLocations[index++].GetLength());
                splitSize       -= bytesInThisBlock;
            }
            long bytesInLastBlock = bytesInThisBlock;
            int  endIndex         = index - 1;
            IDictionary <Node, FileInputFormat.NodeInfo> hostsMap = new IdentityHashMap <Node,
                                                                                         FileInputFormat.NodeInfo>();
            IDictionary <Node, FileInputFormat.NodeInfo> racksMap = new IdentityHashMap <Node,
                                                                                         FileInputFormat.NodeInfo>();

            string[] allTopos = new string[0];
            // Build the hierarchy and aggregate the contribution of
            // bytes at each level. See TestGetSplitHosts.java
            for (index = startIndex; index <= endIndex; index++)
            {
                // Establish the bytes in this block
                if (index == startIndex)
                {
                    bytesInThisBlock = bytesInFirstBlock;
                }
                else
                {
                    if (index == endIndex)
                    {
                        bytesInThisBlock = bytesInLastBlock;
                    }
                    else
                    {
                        bytesInThisBlock = blkLocations[index].GetLength();
                    }
                }
                allTopos = blkLocations[index].GetTopologyPaths();
                // If no topology information is available, just
                // prefix a fakeRack
                if (allTopos.Length == 0)
                {
                    allTopos = FakeRacks(blkLocations, index);
                }
                // NOTE: This code currently works only for one level of
                // hierarchy (rack/host). However, it is relatively easy
                // to extend this to support aggregation at different
                // levels
                foreach (string topo in allTopos)
                {
                    Node node;
                    Node parentNode;
                    FileInputFormat.NodeInfo nodeInfo;
                    FileInputFormat.NodeInfo parentNodeInfo;
                    node = clusterMap.GetNode(topo);
                    if (node == null)
                    {
                        node = new NodeBase(topo);
                        clusterMap.Add(node);
                    }
                    nodeInfo = hostsMap[node];
                    if (nodeInfo == null)
                    {
                        nodeInfo       = new FileInputFormat.NodeInfo(node);
                        hostsMap[node] = nodeInfo;
                        parentNode     = node.GetParent();
                        parentNodeInfo = racksMap[parentNode];
                        if (parentNodeInfo == null)
                        {
                            parentNodeInfo       = new FileInputFormat.NodeInfo(parentNode);
                            racksMap[parentNode] = parentNodeInfo;
                        }
                        parentNodeInfo.AddLeaf(nodeInfo);
                    }
                    else
                    {
                        nodeInfo       = hostsMap[node];
                        parentNode     = node.GetParent();
                        parentNodeInfo = racksMap[parentNode];
                    }
                    nodeInfo.AddValue(index, bytesInThisBlock);
                    parentNodeInfo.AddValue(index, bytesInThisBlock);
                }
            }
            // for all topos
            // for all indices
            // We don't yet support cached hosts when bytesInThisBlock > splitSize
            return(new string[][] { IdentifyHosts(allTopos.Length, racksMap), new string[0] });
        }