/// <summary> /// Retrieve the map output of a single map task /// and send it to the merger. /// </summary> /// <exception cref="System.IO.IOException"/> private bool CopyMapOutput(TaskAttemptID mapTaskId) { // Figure out where the map task stored its output. Path mapOutputFileName = localMapFiles[mapTaskId].GetOutputFile(); Path indexFileName = mapOutputFileName.Suffix(".index"); // Read its index to determine the location of our split // and its size. SpillRecord sr = new SpillRecord(indexFileName, job); IndexRecord ir = sr.GetIndex(reduce); long compressedLength = ir.partLength; long decompressedLength = ir.rawLength; compressedLength -= CryptoUtils.CryptoPadding(job); decompressedLength -= CryptoUtils.CryptoPadding(job); // Get the location for the map output - either in-memory or on-disk MapOutput <K, V> mapOutput = merger.Reserve(mapTaskId, decompressedLength, id); // Check if we can shuffle *now* ... if (mapOutput == null) { Log.Info("fetcher#" + id + " - MergeManager returned Status.WAIT ..."); return(false); } // Go! Log.Info("localfetcher#" + id + " about to shuffle output of map " + mapOutput.GetMapId () + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput .GetDescription()); // now read the file, seek to the appropriate section, and send it. FileSystem localFs = FileSystem.GetLocal(job).GetRaw(); FSDataInputStream inStream = localFs.Open(mapOutputFileName); inStream = CryptoUtils.WrapIfNecessary(job, inStream); try { inStream.Seek(ir.startOffset + CryptoUtils.CryptoPadding(job)); mapOutput.Shuffle(Localhost, inStream, compressedLength, decompressedLength, metrics , reporter); } finally { try { inStream.Close(); } catch (IOException ioe) { Log.Warn("IOException closing inputstream from map output: " + ioe.ToString()); } } scheduler.CopySucceeded(mapTaskId, Localhost, compressedLength, 0, 0, mapOutput); return(true); }
/// <exception cref="System.IO.IOException"/> private TaskAttemptID[] CopyMapOutput(MapHost host, DataInputStream input, ICollection <TaskAttemptID> remaining, bool canRetry) { MapOutput <K, V> mapOutput = null; TaskAttemptID mapId = null; long decompressedLength = -1; long compressedLength = -1; try { long startTime = Time.MonotonicNow(); int forReduce = -1; //Read the shuffle header try { ShuffleHeader header = new ShuffleHeader(); header.ReadFields(input); mapId = TaskAttemptID.ForName(header.mapId); compressedLength = header.compressedLength; decompressedLength = header.uncompressedLength; forReduce = header.forReduce; } catch (ArgumentException e) { badIdErrs.Increment(1); Log.Warn("Invalid map id ", e); //Don't know which one was bad, so consider all of them as bad return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count])); } InputStream @is = input; @is = CryptoUtils.WrapIfNecessary(jobConf, @is, compressedLength); compressedLength -= CryptoUtils.CryptoPadding(jobConf); decompressedLength -= CryptoUtils.CryptoPadding(jobConf); // Do some basic sanity verification if (!VerifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId )) { return(new TaskAttemptID[] { mapId }); } if (Log.IsDebugEnabled()) { Log.Debug("header: " + mapId + ", len: " + compressedLength + ", decomp len: " + decompressedLength); } // Get the location for the map output - either in-memory or on-disk try { mapOutput = merger.Reserve(mapId, decompressedLength, id); } catch (IOException ioe) { // kill this reduce attempt ioErrs.Increment(1); scheduler.ReportLocalError(ioe); return(EmptyAttemptIdArray); } // Check if we can shuffle *now* ... if (mapOutput == null) { Log.Info("fetcher#" + id + " - MergeManager returned status WAIT ..."); //Not an error but wait to process data. return(EmptyAttemptIdArray); } // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError // on decompression failures. Catching and re-throwing as IOException // to allow fetch failure logic to be processed try { // Go! Log.Info("fetcher#" + id + " about to shuffle output of map " + mapOutput.GetMapId () + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput .GetDescription()); mapOutput.Shuffle(host, @is, compressedLength, decompressedLength, metrics, reporter ); } catch (InternalError e) { Log.Warn("Failed to shuffle for fetcher#" + id, e); throw new IOException(e); } // Inform the shuffle scheduler long endTime = Time.MonotonicNow(); // Reset retryStartTime as map task make progress if retried before. retryStartTime = 0; scheduler.CopySucceeded(mapId, host, compressedLength, startTime, endTime, mapOutput ); // Note successful shuffle remaining.Remove(mapId); metrics.SuccessFetch(); return(null); } catch (IOException ioe) { if (mapOutput != null) { mapOutput.Abort(); } if (canRetry) { CheckTimeoutOrRetry(host, ioe); } ioErrs.Increment(1); if (mapId == null || mapOutput == null) { Log.Warn("fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if (mapId == null) { return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count])); } else { return(new TaskAttemptID[] { mapId }); } } Log.Warn("Failed to shuffle output of " + mapId + " from " + host.GetHostName(), ioe); // Inform the shuffle-scheduler metrics.FailedFetch(); return(new TaskAttemptID[] { mapId }); } }