protected internal virtual void AddReReservation(Priority priority) { lock (this) { reReservations.AddItem(priority); } }
internal virtual void CreateSplits(IDictionary <string, ICollection <CombineFileInputFormat.OneBlockInfo > > nodeToBlocks, IDictionary <CombineFileInputFormat.OneBlockInfo, string[]> blockToNodes , IDictionary <string, IList <CombineFileInputFormat.OneBlockInfo> > rackToBlocks, long totLength, long maxSize, long minSizeNode, long minSizeRack, IList <InputSplit > splits) { AList <CombineFileInputFormat.OneBlockInfo> validBlocks = new AList <CombineFileInputFormat.OneBlockInfo >(); long curSplitSize = 0; int totalNodes = nodeToBlocks.Count; long totalLength = totLength; Multiset <string> splitsPerNode = HashMultiset.Create(); ICollection <string> completedNodes = new HashSet <string>(); while (true) { // it is allowed for maxSize to be 0. Disable smoothing load for such cases // process all nodes and create splits that are local to a node. Generate // one split per node iteration, and walk over nodes multiple times to // distribute the splits across nodes. for (IEnumerator <KeyValuePair <string, ICollection <CombineFileInputFormat.OneBlockInfo > > > iter = nodeToBlocks.GetEnumerator(); iter.HasNext();) { KeyValuePair <string, ICollection <CombineFileInputFormat.OneBlockInfo> > one = iter .Next(); string node = one.Key; // Skip the node if it has previously been marked as completed. if (completedNodes.Contains(node)) { continue; } ICollection <CombineFileInputFormat.OneBlockInfo> blocksInCurrentNode = one.Value; // for each block, copy it into validBlocks. Delete it from // blockToNodes so that the same block does not appear in // two different splits. IEnumerator <CombineFileInputFormat.OneBlockInfo> oneBlockIter = blocksInCurrentNode .GetEnumerator(); while (oneBlockIter.HasNext()) { CombineFileInputFormat.OneBlockInfo oneblock = oneBlockIter.Next(); // Remove all blocks which may already have been assigned to other // splits. if (!blockToNodes.Contains(oneblock)) { oneBlockIter.Remove(); continue; } validBlocks.AddItem(oneblock); Sharpen.Collections.Remove(blockToNodes, oneblock); curSplitSize += oneblock.length; // if the accumulated split size exceeds the maximum, then // create this split. if (maxSize != 0 && curSplitSize >= maxSize) { // create an input split and add it to the splits array AddCreatedSplit(splits, Sharpen.Collections.Singleton(node), validBlocks); totalLength -= curSplitSize; curSplitSize = 0; splitsPerNode.AddItem(node); // Remove entries from blocksInNode so that we don't walk these // again. blocksInCurrentNode.RemoveAll(validBlocks); validBlocks.Clear(); // Done creating a single split for this node. Move on to the next // node so that splits are distributed across nodes. break; } } if (validBlocks.Count != 0) { // This implies that the last few blocks (or all in case maxSize=0) // were not part of a split. The node is complete. // if there were any blocks left over and their combined size is // larger than minSplitNode, then combine them into one split. // Otherwise add them back to the unprocessed pool. It is likely // that they will be combined with other blocks from the // same rack later on. // This condition also kicks in when max split size is not set. All // blocks on a node will be grouped together into a single split. if (minSizeNode != 0 && curSplitSize >= minSizeNode && splitsPerNode.Count(node) == 0) { // haven't created any split on this machine. so its ok to add a // smaller one for parallelism. Otherwise group it in the rack for // balanced size create an input split and add it to the splits // array AddCreatedSplit(splits, Sharpen.Collections.Singleton(node), validBlocks); totalLength -= curSplitSize; splitsPerNode.AddItem(node); // Remove entries from blocksInNode so that we don't walk this again. blocksInCurrentNode.RemoveAll(validBlocks); } else { // The node is done. This was the last set of blocks for this node. // Put the unplaced blocks back into the pool for later rack-allocation. foreach (CombineFileInputFormat.OneBlockInfo oneblock in validBlocks) { blockToNodes[oneblock] = oneblock.hosts; } } validBlocks.Clear(); curSplitSize = 0; completedNodes.AddItem(node); } else { // No in-flight blocks. if (blocksInCurrentNode.Count == 0) { // Node is done. All blocks were fit into node-local splits. completedNodes.AddItem(node); } } } // else Run through the node again. // Check if node-local assignments are complete. if (completedNodes.Count == totalNodes || totalLength == 0) { // All nodes have been walked over and marked as completed or all blocks // have been assigned. The rest should be handled via rackLock assignment. Log.Info("DEBUG: Terminated node allocation with : CompletedNodes: " + completedNodes .Count + ", size left: " + totalLength); break; } } // if blocks in a rack are below the specified minimum size, then keep them // in 'overflow'. After the processing of all racks is complete, these // overflow blocks will be combined into splits. AList <CombineFileInputFormat.OneBlockInfo> overflowBlocks = new AList <CombineFileInputFormat.OneBlockInfo >(); ICollection <string> racks = new HashSet <string>(); // Process all racks over and over again until there is no more work to do. while (blockToNodes.Count > 0) { // Create one split for this rack before moving over to the next rack. // Come back to this rack after creating a single split for each of the // remaining racks. // Process one rack location at a time, Combine all possible blocks that // reside on this rack as one split. (constrained by minimum and maximum // split size). // iterate over all racks for (IEnumerator <KeyValuePair <string, IList <CombineFileInputFormat.OneBlockInfo> > > iter = rackToBlocks.GetEnumerator(); iter.HasNext();) { KeyValuePair <string, IList <CombineFileInputFormat.OneBlockInfo> > one = iter.Next( ); racks.AddItem(one.Key); IList <CombineFileInputFormat.OneBlockInfo> blocks = one.Value; // for each block, copy it into validBlocks. Delete it from // blockToNodes so that the same block does not appear in // two different splits. bool createdSplit = false; foreach (CombineFileInputFormat.OneBlockInfo oneblock in blocks) { if (blockToNodes.Contains(oneblock)) { validBlocks.AddItem(oneblock); Sharpen.Collections.Remove(blockToNodes, oneblock); curSplitSize += oneblock.length; // if the accumulated split size exceeds the maximum, then // create this split. if (maxSize != 0 && curSplitSize >= maxSize) { // create an input split and add it to the splits array AddCreatedSplit(splits, GetHosts(racks), validBlocks); createdSplit = true; break; } } } // if we created a split, then just go to the next rack if (createdSplit) { curSplitSize = 0; validBlocks.Clear(); racks.Clear(); continue; } if (!validBlocks.IsEmpty()) { if (minSizeRack != 0 && curSplitSize >= minSizeRack) { // if there is a minimum size specified, then create a single split // otherwise, store these blocks into overflow data structure AddCreatedSplit(splits, GetHosts(racks), validBlocks); } else { // There were a few blocks in this rack that // remained to be processed. Keep them in 'overflow' block list. // These will be combined later. Sharpen.Collections.AddAll(overflowBlocks, validBlocks); } } curSplitSize = 0; validBlocks.Clear(); racks.Clear(); } } System.Diagnostics.Debug.Assert(blockToNodes.IsEmpty()); System.Diagnostics.Debug.Assert(curSplitSize == 0); System.Diagnostics.Debug.Assert(validBlocks.IsEmpty()); System.Diagnostics.Debug.Assert(racks.IsEmpty()); // Process all overflow blocks foreach (CombineFileInputFormat.OneBlockInfo oneblock_1 in overflowBlocks) { validBlocks.AddItem(oneblock_1); curSplitSize += oneblock_1.length; // This might cause an exiting rack location to be re-added, // but it should be ok. for (int i = 0; i < oneblock_1.racks.Length; i++) { racks.AddItem(oneblock_1.racks[i]); } // if the accumulated split size exceeds the maximum, then // create this split. if (maxSize != 0 && curSplitSize >= maxSize) { // create an input split and add it to the splits array AddCreatedSplit(splits, GetHosts(racks), validBlocks); curSplitSize = 0; validBlocks.Clear(); racks.Clear(); } } // Process any remaining blocks, if any. if (!validBlocks.IsEmpty()) { AddCreatedSplit(splits, GetHosts(racks), validBlocks); } }