Beispiel #1
0
        /// <summary>
        /// Retrieve the map output of a single map task
        /// and send it to the merger.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        private bool CopyMapOutput(TaskAttemptID mapTaskId)
        {
            // Figure out where the map task stored its output.
            Path mapOutputFileName = localMapFiles[mapTaskId].GetOutputFile();
            Path indexFileName     = mapOutputFileName.Suffix(".index");
            // Read its index to determine the location of our split
            // and its size.
            SpillRecord sr = new SpillRecord(indexFileName, job);
            IndexRecord ir = sr.GetIndex(reduce);
            long        compressedLength   = ir.partLength;
            long        decompressedLength = ir.rawLength;

            compressedLength   -= CryptoUtils.CryptoPadding(job);
            decompressedLength -= CryptoUtils.CryptoPadding(job);
            // Get the location for the map output - either in-memory or on-disk
            MapOutput <K, V> mapOutput = merger.Reserve(mapTaskId, decompressedLength, id);

            // Check if we can shuffle *now* ...
            if (mapOutput == null)
            {
                Log.Info("fetcher#" + id + " - MergeManager returned Status.WAIT ...");
                return(false);
            }
            // Go!
            Log.Info("localfetcher#" + id + " about to shuffle output of map " + mapOutput.GetMapId
                         () + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput
                     .GetDescription());
            // now read the file, seek to the appropriate section, and send it.
            FileSystem        localFs  = FileSystem.GetLocal(job).GetRaw();
            FSDataInputStream inStream = localFs.Open(mapOutputFileName);

            inStream = CryptoUtils.WrapIfNecessary(job, inStream);
            try
            {
                inStream.Seek(ir.startOffset + CryptoUtils.CryptoPadding(job));
                mapOutput.Shuffle(Localhost, inStream, compressedLength, decompressedLength, metrics
                                  , reporter);
            }
            finally
            {
                try
                {
                    inStream.Close();
                }
                catch (IOException ioe)
                {
                    Log.Warn("IOException closing inputstream from map output: " + ioe.ToString());
                }
            }
            scheduler.CopySucceeded(mapTaskId, Localhost, compressedLength, 0, 0, mapOutput);
            return(true);
        }
Beispiel #2
0
        /// <exception cref="System.IO.IOException"/>
        private TaskAttemptID[] CopyMapOutput(MapHost host, DataInputStream input, ICollection
                                              <TaskAttemptID> remaining, bool canRetry)
        {
            MapOutput <K, V> mapOutput          = null;
            TaskAttemptID    mapId              = null;
            long             decompressedLength = -1;
            long             compressedLength   = -1;

            try
            {
                long startTime = Time.MonotonicNow();
                int  forReduce = -1;
                //Read the shuffle header
                try
                {
                    ShuffleHeader header = new ShuffleHeader();
                    header.ReadFields(input);
                    mapId              = TaskAttemptID.ForName(header.mapId);
                    compressedLength   = header.compressedLength;
                    decompressedLength = header.uncompressedLength;
                    forReduce          = header.forReduce;
                }
                catch (ArgumentException e)
                {
                    badIdErrs.Increment(1);
                    Log.Warn("Invalid map id ", e);
                    //Don't know which one was bad, so consider all of them as bad
                    return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count]));
                }
                InputStream @is = input;
                @is = CryptoUtils.WrapIfNecessary(jobConf, @is, compressedLength);
                compressedLength   -= CryptoUtils.CryptoPadding(jobConf);
                decompressedLength -= CryptoUtils.CryptoPadding(jobConf);
                // Do some basic sanity verification
                if (!VerifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId
                                  ))
                {
                    return(new TaskAttemptID[] { mapId });
                }
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("header: " + mapId + ", len: " + compressedLength + ", decomp len: " +
                              decompressedLength);
                }
                // Get the location for the map output - either in-memory or on-disk
                try
                {
                    mapOutput = merger.Reserve(mapId, decompressedLength, id);
                }
                catch (IOException ioe)
                {
                    // kill this reduce attempt
                    ioErrs.Increment(1);
                    scheduler.ReportLocalError(ioe);
                    return(EmptyAttemptIdArray);
                }
                // Check if we can shuffle *now* ...
                if (mapOutput == null)
                {
                    Log.Info("fetcher#" + id + " - MergeManager returned status WAIT ...");
                    //Not an error but wait to process data.
                    return(EmptyAttemptIdArray);
                }
                // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError
                // on decompression failures. Catching and re-throwing as IOException
                // to allow fetch failure logic to be processed
                try
                {
                    // Go!
                    Log.Info("fetcher#" + id + " about to shuffle output of map " + mapOutput.GetMapId
                                 () + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput
                             .GetDescription());
                    mapOutput.Shuffle(host, @is, compressedLength, decompressedLength, metrics, reporter
                                      );
                }
                catch (InternalError e)
                {
                    Log.Warn("Failed to shuffle for fetcher#" + id, e);
                    throw new IOException(e);
                }
                // Inform the shuffle scheduler
                long endTime = Time.MonotonicNow();
                // Reset retryStartTime as map task make progress if retried before.
                retryStartTime = 0;
                scheduler.CopySucceeded(mapId, host, compressedLength, startTime, endTime, mapOutput
                                        );
                // Note successful shuffle
                remaining.Remove(mapId);
                metrics.SuccessFetch();
                return(null);
            }
            catch (IOException ioe)
            {
                if (mapOutput != null)
                {
                    mapOutput.Abort();
                }
                if (canRetry)
                {
                    CheckTimeoutOrRetry(host, ioe);
                }
                ioErrs.Increment(1);
                if (mapId == null || mapOutput == null)
                {
                    Log.Warn("fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength
                             + ", " + compressedLength, ioe);
                    if (mapId == null)
                    {
                        return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count]));
                    }
                    else
                    {
                        return(new TaskAttemptID[] { mapId });
                    }
                }
                Log.Warn("Failed to shuffle output of " + mapId + " from " + host.GetHostName(),
                         ioe);
                // Inform the shuffle-scheduler
                metrics.FailedFetch();
                return(new TaskAttemptID[] { mapId });
            }
        }