public virtual void TestSucceedAndFailedCopyMap <K, V>()
        {
            JobConf job = new JobConf();

            job.SetNumMapTasks(2);
            //mock creation
            TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol
                                                                            >();
            Reporter   mockReporter   = Org.Mockito.Mockito.Mock <Reporter>();
            FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>();
            Type       combinerClass  = job.GetCombinerClass();

            Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector
                                                                             <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>();
            // needed for mock with generic
            TaskAttemptID     mockTaskAttemptID     = Org.Mockito.Mockito.Mock <TaskAttemptID>();
            LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator
                                                                                >();
            CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec
                                                                              >();

            Counters.Counter mockCounter       = Org.Mockito.Mockito.Mock <Counters.Counter>();
            TaskStatus       mockTaskStatus    = Org.Mockito.Mockito.Mock <TaskStatus>();
            Progress         mockProgress      = Org.Mockito.Mockito.Mock <Progress>();
            MapOutputFile    mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>();

            Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task
                                                                               >();
            MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>();

            ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K
                                                                                              , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator
                                                                                                   , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector,
                                                                                                   mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus
                                                                                                   , mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
            TaskStatus status   = new _TaskStatus_251();
            Progress   progress = new Progress();
            ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status
                                                                                    , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes
                                                                                        (), context.GetFailedShuffleCounter());
            MapHost       host1           = new MapHost("host1", null);
            TaskAttemptID failedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0)
                                                                         , TaskType.Map, 0), 0);
            TaskAttemptID succeedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0
                                                                                    ), TaskType.Map, 1), 1);

            // handle output fetch failure for failedAttemptID, part I
            scheduler.HostFailed(host1.GetHostName());
            // handle output fetch succeed for succeedAttemptID
            long bytes = (long)500 * 1024 * 1024;

            scheduler.CopySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
            // handle output fetch failure for failedAttemptID, part II
            // for MAPREDUCE-6361: verify no NPE exception get thrown out
            scheduler.CopyFailed(failedAttemptID, host1, true, false);
        }
Example #2
0
 public virtual void CopyFailed(TaskAttemptID mapId, MapHost host, bool readError,
                                bool connectExcpt)
 {
     lock (this)
     {
         host.Penalize();
         int failures = 1;
         if (failureCounts.Contains(mapId))
         {
             IntWritable x = failureCounts[mapId];
             x.Set(x.Get() + 1);
             failures = x.Get();
         }
         else
         {
             failureCounts[mapId] = new IntWritable(1);
         }
         string      hostname      = host.GetHostName();
         IntWritable hostFailedNum = hostFailures[hostname];
         // MAPREDUCE-6361: hostname could get cleanup from hostFailures in another
         // thread with copySucceeded.
         // In this case, add back hostname to hostFailures to get rid of NPE issue.
         if (hostFailedNum == null)
         {
             hostFailures[hostname] = new IntWritable(1);
         }
         //report failure if already retried maxHostFailures times
         bool hostFail = hostFailures[hostname].Get() > GetMaxHostFailures() ? true : false;
         if (failures >= abortFailureLimit)
         {
             try
             {
                 throw new IOException(failures + " failures downloading " + mapId);
             }
             catch (IOException ie)
             {
                 reporter.ReportException(ie);
             }
         }
         CheckAndInformMRAppMaster(failures, mapId, readError, connectExcpt, hostFail);
         CheckReducerHealth();
         long delay = (long)(InitialPenalty * Math.Pow(PenaltyGrowthRate, failures));
         if (delay > maxDelay)
         {
             delay = maxDelay;
         }
         penalties.AddItem(new ShuffleSchedulerImpl.Penalty(host, delay));
         failedShuffleCounter.Increment(1);
     }
 }
Example #3
0
        /// <exception cref="System.IO.IOException"/>
        public override void Shuffle(MapHost host, InputStream input, long compressedLength
                                     , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter)
        {
            input = new IFileInputStream(input, compressedLength, conf);
            // Copy data to local-disk
            long bytesLeft = compressedLength;

            try
            {
                int    BytesToRead = 64 * 1024;
                byte[] buf         = new byte[BytesToRead];
                while (bytesLeft > 0)
                {
                    int n = ((IFileInputStream)input).ReadWithChecksum(buf, 0, (int)Math.Min(bytesLeft
                                                                                             , BytesToRead));
                    if (n < 0)
                    {
                        throw new IOException("read past end of stream reading " + GetMapId());
                    }
                    disk.Write(buf, 0, n);
                    bytesLeft -= n;
                    metrics.InputBytes(n);
                    reporter.Progress();
                }
                Log.Info("Read " + (compressedLength - bytesLeft) + " bytes from map-output for "
                         + GetMapId());
                disk.Close();
            }
            catch (IOException ioe)
            {
                // Close the streams
                IOUtils.Cleanup(Log, input, disk);
                // Re-throw
                throw;
            }
            // Sanity check
            if (bytesLeft != 0)
            {
                throw new IOException("Incomplete map output received for " + GetMapId() + " from "
                                      + host.GetHostName() + " (" + bytesLeft + " bytes missing of " + compressedLength
                                      + ")");
            }
            this.compressedSize = compressedLength;
        }
Example #4
0
        /// <summary>
        /// check if hit timeout of retry, if not, throw an exception and start a
        /// new round of retry.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        private void CheckTimeoutOrRetry(MapHost host, IOException ioe)
        {
            // First time to retry.
            long currentTime = Time.MonotonicNow();

            if (retryStartTime == 0)
            {
                retryStartTime = currentTime;
            }
            // Retry is not timeout, let's do retry with throwing an exception.
            if (currentTime - retryStartTime < this.fetchRetryTimeout)
            {
                Log.Warn("Shuffle output from " + host.GetHostName() + " failed, retry it.", ioe);
                throw ioe;
            }
            else
            {
                // timeout, prepare to be failed.
                Log.Warn("Timeout for copying MapOutput with retry on host " + host + "after " +
                         fetchRetryTimeout + " milliseconds.");
            }
        }
Example #5
0
        private DataInputStream OpenShuffleUrl(MapHost host, ICollection <TaskAttemptID> remaining
                                               , Uri url)
        {
            DataInputStream input = null;

            try
            {
                SetupConnectionsWithRetry(host, remaining, url);
                if (stopped)
                {
                    AbortConnect(host, remaining);
                }
                else
                {
                    input = new DataInputStream(connection.GetInputStream());
                }
            }
            catch (IOException ie)
            {
                bool connectExcpt = ie is ConnectException;
                ioErrs.Increment(1);
                Log.Warn("Failed to connect to " + host + " with " + remaining.Count + " map outputs"
                         , ie);
                // If connect did not succeed, just mark all the maps as failed,
                // indirectly penalizing the host
                scheduler.HostFailed(host.GetHostName());
                foreach (TaskAttemptID left in remaining)
                {
                    scheduler.CopyFailed(left, host, false, connectExcpt);
                }
                // Add back all the remaining maps, WITHOUT marking them as failed
                foreach (TaskAttemptID left_1 in remaining)
                {
                    scheduler.PutBackKnownMapOutput(host, left_1);
                }
            }
            return(input);
        }
Example #6
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void CopySucceeded(TaskAttemptID mapId, MapHost host, long bytes,
                                   long startMillis, long endMillis, MapOutput <K, V> output)
 {
     lock (this)
     {
         Sharpen.Collections.Remove(failureCounts, mapId);
         Sharpen.Collections.Remove(hostFailures, host.GetHostName());
         int mapIndex = mapId.GetTaskID().GetId();
         if (!finishedMaps[mapIndex])
         {
             output.Commit();
             finishedMaps[mapIndex] = true;
             shuffledMapsCounter.Increment(1);
             if (--remainingMaps == 0)
             {
                 Sharpen.Runtime.NotifyAll(this);
             }
             // update single copy task status
             long copyMillis = (endMillis - startMillis);
             if (copyMillis == 0)
             {
                 copyMillis = 1;
             }
             float  bytesPerMillis     = (float)bytes / copyMillis;
             float  transferRate       = bytesPerMillis * BytesPerMillisToMbs;
             string individualProgress = "copy task(" + mapId + " succeeded" + " at " + mbpsFormat
                                         .Format(transferRate) + " MB/s)";
             // update the aggregated status
             copyTimeTracker.Add(startMillis, endMillis);
             totalBytesShuffledTillNow += bytes;
             UpdateStatus(individualProgress);
             reduceShuffleBytes.Increment(bytes);
             lastProgressTime = Time.MonotonicNow();
             Log.Debug("map " + mapId + " done " + status.GetStateString());
         }
     }
 }
Example #7
0
        /// <exception cref="System.IO.IOException"/>
        private TaskAttemptID[] CopyMapOutput(MapHost host, DataInputStream input, ICollection
                                              <TaskAttemptID> remaining, bool canRetry)
        {
            MapOutput <K, V> mapOutput          = null;
            TaskAttemptID    mapId              = null;
            long             decompressedLength = -1;
            long             compressedLength   = -1;

            try
            {
                long startTime = Time.MonotonicNow();
                int  forReduce = -1;
                //Read the shuffle header
                try
                {
                    ShuffleHeader header = new ShuffleHeader();
                    header.ReadFields(input);
                    mapId              = TaskAttemptID.ForName(header.mapId);
                    compressedLength   = header.compressedLength;
                    decompressedLength = header.uncompressedLength;
                    forReduce          = header.forReduce;
                }
                catch (ArgumentException e)
                {
                    badIdErrs.Increment(1);
                    Log.Warn("Invalid map id ", e);
                    //Don't know which one was bad, so consider all of them as bad
                    return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count]));
                }
                InputStream @is = input;
                @is = CryptoUtils.WrapIfNecessary(jobConf, @is, compressedLength);
                compressedLength   -= CryptoUtils.CryptoPadding(jobConf);
                decompressedLength -= CryptoUtils.CryptoPadding(jobConf);
                // Do some basic sanity verification
                if (!VerifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId
                                  ))
                {
                    return(new TaskAttemptID[] { mapId });
                }
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("header: " + mapId + ", len: " + compressedLength + ", decomp len: " +
                              decompressedLength);
                }
                // Get the location for the map output - either in-memory or on-disk
                try
                {
                    mapOutput = merger.Reserve(mapId, decompressedLength, id);
                }
                catch (IOException ioe)
                {
                    // kill this reduce attempt
                    ioErrs.Increment(1);
                    scheduler.ReportLocalError(ioe);
                    return(EmptyAttemptIdArray);
                }
                // Check if we can shuffle *now* ...
                if (mapOutput == null)
                {
                    Log.Info("fetcher#" + id + " - MergeManager returned status WAIT ...");
                    //Not an error but wait to process data.
                    return(EmptyAttemptIdArray);
                }
                // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError
                // on decompression failures. Catching and re-throwing as IOException
                // to allow fetch failure logic to be processed
                try
                {
                    // Go!
                    Log.Info("fetcher#" + id + " about to shuffle output of map " + mapOutput.GetMapId
                                 () + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput
                             .GetDescription());
                    mapOutput.Shuffle(host, @is, compressedLength, decompressedLength, metrics, reporter
                                      );
                }
                catch (InternalError e)
                {
                    Log.Warn("Failed to shuffle for fetcher#" + id, e);
                    throw new IOException(e);
                }
                // Inform the shuffle scheduler
                long endTime = Time.MonotonicNow();
                // Reset retryStartTime as map task make progress if retried before.
                retryStartTime = 0;
                scheduler.CopySucceeded(mapId, host, compressedLength, startTime, endTime, mapOutput
                                        );
                // Note successful shuffle
                remaining.Remove(mapId);
                metrics.SuccessFetch();
                return(null);
            }
            catch (IOException ioe)
            {
                if (mapOutput != null)
                {
                    mapOutput.Abort();
                }
                if (canRetry)
                {
                    CheckTimeoutOrRetry(host, ioe);
                }
                ioErrs.Increment(1);
                if (mapId == null || mapOutput == null)
                {
                    Log.Warn("fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength
                             + ", " + compressedLength, ioe);
                    if (mapId == null)
                    {
                        return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count]));
                    }
                    else
                    {
                        return(new TaskAttemptID[] { mapId });
                    }
                }
                Log.Warn("Failed to shuffle output of " + mapId + " from " + host.GetHostName(),
                         ioe);
                // Inform the shuffle-scheduler
                metrics.FailedFetch();
                return(new TaskAttemptID[] { mapId });
            }
        }
Example #8
0
        protected internal virtual void CopyFromHost(MapHost host)
        {
            // reset retryStartTime for a new host
            retryStartTime = 0;
            // Get completed maps on 'host'
            IList <TaskAttemptID> maps = scheduler.GetMapsForHost(host);

            // Sanity check to catch hosts with only 'OBSOLETE' maps,
            // especially at the tail of large jobs
            if (maps.Count == 0)
            {
                return;
            }
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Fetcher " + id + " going to fetch from " + host + " for: " + maps);
            }
            // List of maps to be fetched yet
            ICollection <TaskAttemptID> remaining = new HashSet <TaskAttemptID>(maps);
            // Construct the url and connect
            Uri             url   = GetMapOutputURL(host, maps);
            DataInputStream input = OpenShuffleUrl(host, remaining, url);

            if (input == null)
            {
                return;
            }
            try
            {
                // Loop through available map-outputs and fetch them
                // On any error, faildTasks is not null and we exit
                // after putting back the remaining maps to the
                // yet_to_be_fetched list and marking the failed tasks.
                TaskAttemptID[] failedTasks = null;
                while (!remaining.IsEmpty() && failedTasks == null)
                {
                    try
                    {
                        failedTasks = CopyMapOutput(host, input, remaining, fetchRetryEnabled);
                    }
                    catch (IOException)
                    {
                        //
                        // Setup connection again if disconnected by NM
                        connection.Disconnect();
                        // Get map output from remaining tasks only.
                        url   = GetMapOutputURL(host, remaining);
                        input = OpenShuffleUrl(host, remaining, url);
                        if (input == null)
                        {
                            return;
                        }
                    }
                }
                if (failedTasks != null && failedTasks.Length > 0)
                {
                    Log.Warn("copyMapOutput failed for tasks " + Arrays.ToString(failedTasks));
                    scheduler.HostFailed(host.GetHostName());
                    foreach (TaskAttemptID left in failedTasks)
                    {
                        scheduler.CopyFailed(left, host, true, false);
                    }
                }
                // Sanity check
                if (failedTasks == null && !remaining.IsEmpty())
                {
                    throw new IOException("server didn't return all expected map outputs: " + remaining
                                          .Count + " left.");
                }
                input.Close();
                input = null;
            }
            finally
            {
                if (input != null)
                {
                    IOUtils.Cleanup(Log, input);
                    input = null;
                }
                foreach (TaskAttemptID left in remaining)
                {
                    scheduler.PutBackKnownMapOutput(host, left);
                }
            }
        }