示例#1
0
 public virtual int Compare(MapOutput <K, V> o1, MapOutput <K, V> o2)
 {
     if (o1.id == o2.id)
     {
         return(0);
     }
     if (o1.size < o2.size)
     {
         return(-1);
     }
     else
     {
         if (o1.size > o2.size)
         {
             return(1);
         }
     }
     if (o1.id < o2.id)
     {
         return(-1);
     }
     else
     {
         return(1);
     }
 }
        public virtual void TestSucceedAndFailedCopyMap <K, V>()
        {
            JobConf job = new JobConf();

            job.SetNumMapTasks(2);
            //mock creation
            TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol
                                                                            >();
            Reporter   mockReporter   = Org.Mockito.Mockito.Mock <Reporter>();
            FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>();
            Type       combinerClass  = job.GetCombinerClass();

            Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector
                                                                             <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>();
            // needed for mock with generic
            TaskAttemptID     mockTaskAttemptID     = Org.Mockito.Mockito.Mock <TaskAttemptID>();
            LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator
                                                                                >();
            CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec
                                                                              >();

            Counters.Counter mockCounter       = Org.Mockito.Mockito.Mock <Counters.Counter>();
            TaskStatus       mockTaskStatus    = Org.Mockito.Mockito.Mock <TaskStatus>();
            Progress         mockProgress      = Org.Mockito.Mockito.Mock <Progress>();
            MapOutputFile    mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>();

            Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task
                                                                               >();
            MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>();

            ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K
                                                                                              , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator
                                                                                                   , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector,
                                                                                                   mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus
                                                                                                   , mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
            TaskStatus status   = new _TaskStatus_251();
            Progress   progress = new Progress();
            ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status
                                                                                    , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes
                                                                                        (), context.GetFailedShuffleCounter());
            MapHost       host1           = new MapHost("host1", null);
            TaskAttemptID failedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0)
                                                                         , TaskType.Map, 0), 0);
            TaskAttemptID succeedAttemptID = new TaskAttemptID(new TaskID(new JobID("test", 0
                                                                                    ), TaskType.Map, 1), 1);

            // handle output fetch failure for failedAttemptID, part I
            scheduler.HostFailed(host1.GetHostName());
            // handle output fetch succeed for succeedAttemptID
            long bytes = (long)500 * 1024 * 1024;

            scheduler.CopySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
            // handle output fetch failure for failedAttemptID, part II
            // for MAPREDUCE-6361: verify no NPE exception get thrown out
            scheduler.CopyFailed(failedAttemptID, host1, true, false);
        }
示例#3
0
        /// <summary>
        /// Retrieve the map output of a single map task
        /// and send it to the merger.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        private bool CopyMapOutput(TaskAttemptID mapTaskId)
        {
            // Figure out where the map task stored its output.
            Path mapOutputFileName = localMapFiles[mapTaskId].GetOutputFile();
            Path indexFileName     = mapOutputFileName.Suffix(".index");
            // Read its index to determine the location of our split
            // and its size.
            SpillRecord sr = new SpillRecord(indexFileName, job);
            IndexRecord ir = sr.GetIndex(reduce);
            long        compressedLength   = ir.partLength;
            long        decompressedLength = ir.rawLength;

            compressedLength   -= CryptoUtils.CryptoPadding(job);
            decompressedLength -= CryptoUtils.CryptoPadding(job);
            // Get the location for the map output - either in-memory or on-disk
            MapOutput <K, V> mapOutput = merger.Reserve(mapTaskId, decompressedLength, id);

            // Check if we can shuffle *now* ...
            if (mapOutput == null)
            {
                Log.Info("fetcher#" + id + " - MergeManager returned Status.WAIT ...");
                return(false);
            }
            // Go!
            Log.Info("localfetcher#" + id + " about to shuffle output of map " + mapOutput.GetMapId
                         () + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput
                     .GetDescription());
            // now read the file, seek to the appropriate section, and send it.
            FileSystem        localFs  = FileSystem.GetLocal(job).GetRaw();
            FSDataInputStream inStream = localFs.Open(mapOutputFileName);

            inStream = CryptoUtils.WrapIfNecessary(job, inStream);
            try
            {
                inStream.Seek(ir.startOffset + CryptoUtils.CryptoPadding(job));
                mapOutput.Shuffle(Localhost, inStream, compressedLength, decompressedLength, metrics
                                  , reporter);
            }
            finally
            {
                try
                {
                    inStream.Close();
                }
                catch (IOException ioe)
                {
                    Log.Warn("IOException closing inputstream from map output: " + ioe.ToString());
                }
            }
            scheduler.CopySucceeded(mapTaskId, Localhost, compressedLength, 0, 0, mapOutput);
            return(true);
        }
示例#4
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void CopySucceeded(TaskAttemptID mapId, MapHost host, long bytes,
                                   long startMillis, long endMillis, MapOutput <K, V> output)
 {
     lock (this)
     {
         Sharpen.Collections.Remove(failureCounts, mapId);
         Sharpen.Collections.Remove(hostFailures, host.GetHostName());
         int mapIndex = mapId.GetTaskID().GetId();
         if (!finishedMaps[mapIndex])
         {
             output.Commit();
             finishedMaps[mapIndex] = true;
             shuffledMapsCounter.Increment(1);
             if (--remainingMaps == 0)
             {
                 Sharpen.Runtime.NotifyAll(this);
             }
             // update single copy task status
             long copyMillis = (endMillis - startMillis);
             if (copyMillis == 0)
             {
                 copyMillis = 1;
             }
             float  bytesPerMillis     = (float)bytes / copyMillis;
             float  transferRate       = bytesPerMillis * BytesPerMillisToMbs;
             string individualProgress = "copy task(" + mapId + " succeeded" + " at " + mbpsFormat
                                         .Format(transferRate) + " MB/s)";
             // update the aggregated status
             copyTimeTracker.Add(startMillis, endMillis);
             totalBytesShuffledTillNow += bytes;
             UpdateStatus(individualProgress);
             reduceShuffleBytes.Increment(bytes);
             lastProgressTime = Time.MonotonicNow();
             Log.Debug("map " + mapId + " done " + status.GetStateString());
         }
     }
 }
示例#5
0
        /// <exception cref="System.IO.IOException"/>
        private TaskAttemptID[] CopyMapOutput(MapHost host, DataInputStream input, ICollection
                                              <TaskAttemptID> remaining, bool canRetry)
        {
            MapOutput <K, V> mapOutput          = null;
            TaskAttemptID    mapId              = null;
            long             decompressedLength = -1;
            long             compressedLength   = -1;

            try
            {
                long startTime = Time.MonotonicNow();
                int  forReduce = -1;
                //Read the shuffle header
                try
                {
                    ShuffleHeader header = new ShuffleHeader();
                    header.ReadFields(input);
                    mapId              = TaskAttemptID.ForName(header.mapId);
                    compressedLength   = header.compressedLength;
                    decompressedLength = header.uncompressedLength;
                    forReduce          = header.forReduce;
                }
                catch (ArgumentException e)
                {
                    badIdErrs.Increment(1);
                    Log.Warn("Invalid map id ", e);
                    //Don't know which one was bad, so consider all of them as bad
                    return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count]));
                }
                InputStream @is = input;
                @is = CryptoUtils.WrapIfNecessary(jobConf, @is, compressedLength);
                compressedLength   -= CryptoUtils.CryptoPadding(jobConf);
                decompressedLength -= CryptoUtils.CryptoPadding(jobConf);
                // Do some basic sanity verification
                if (!VerifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId
                                  ))
                {
                    return(new TaskAttemptID[] { mapId });
                }
                if (Log.IsDebugEnabled())
                {
                    Log.Debug("header: " + mapId + ", len: " + compressedLength + ", decomp len: " +
                              decompressedLength);
                }
                // Get the location for the map output - either in-memory or on-disk
                try
                {
                    mapOutput = merger.Reserve(mapId, decompressedLength, id);
                }
                catch (IOException ioe)
                {
                    // kill this reduce attempt
                    ioErrs.Increment(1);
                    scheduler.ReportLocalError(ioe);
                    return(EmptyAttemptIdArray);
                }
                // Check if we can shuffle *now* ...
                if (mapOutput == null)
                {
                    Log.Info("fetcher#" + id + " - MergeManager returned status WAIT ...");
                    //Not an error but wait to process data.
                    return(EmptyAttemptIdArray);
                }
                // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError
                // on decompression failures. Catching and re-throwing as IOException
                // to allow fetch failure logic to be processed
                try
                {
                    // Go!
                    Log.Info("fetcher#" + id + " about to shuffle output of map " + mapOutput.GetMapId
                                 () + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput
                             .GetDescription());
                    mapOutput.Shuffle(host, @is, compressedLength, decompressedLength, metrics, reporter
                                      );
                }
                catch (InternalError e)
                {
                    Log.Warn("Failed to shuffle for fetcher#" + id, e);
                    throw new IOException(e);
                }
                // Inform the shuffle scheduler
                long endTime = Time.MonotonicNow();
                // Reset retryStartTime as map task make progress if retried before.
                retryStartTime = 0;
                scheduler.CopySucceeded(mapId, host, compressedLength, startTime, endTime, mapOutput
                                        );
                // Note successful shuffle
                remaining.Remove(mapId);
                metrics.SuccessFetch();
                return(null);
            }
            catch (IOException ioe)
            {
                if (mapOutput != null)
                {
                    mapOutput.Abort();
                }
                if (canRetry)
                {
                    CheckTimeoutOrRetry(host, ioe);
                }
                ioErrs.Increment(1);
                if (mapId == null || mapOutput == null)
                {
                    Log.Warn("fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength
                             + ", " + compressedLength, ioe);
                    if (mapId == null)
                    {
                        return(Sharpen.Collections.ToArray(remaining, new TaskAttemptID[remaining.Count]));
                    }
                    else
                    {
                        return(new TaskAttemptID[] { mapId });
                    }
                }
                Log.Warn("Failed to shuffle output of " + mapId + " from " + host.GetHostName(),
                         ioe);
                // Inform the shuffle-scheduler
                metrics.FailedFetch();
                return(new TaskAttemptID[] { mapId });
            }
        }
示例#6
0
        /// <exception cref="System.Exception"/>
        public virtual void TestMemoryMerge()
        {
            int     TotalMemBytes = 10000;
            int     OutputSize    = 7950;
            JobConf conf          = new JobConf();

            conf.SetFloat(MRJobConfig.ShuffleInputBufferPercent, 1.0f);
            conf.SetLong(MRJobConfig.ReduceMemoryTotalBytes, TotalMemBytes);
            conf.SetFloat(MRJobConfig.ShuffleMemoryLimitPercent, 0.8f);
            conf.SetFloat(MRJobConfig.ShuffleMergePercent, 0.9f);
            TestMergeManager.TestExceptionReporter reporter = new TestMergeManager.TestExceptionReporter
                                                                  ();
            CyclicBarrier mergeStart    = new CyclicBarrier(2);
            CyclicBarrier mergeComplete = new CyclicBarrier(2);

            TestMergeManager.StubbedMergeManager mgr = new TestMergeManager.StubbedMergeManager
                                                           (conf, reporter, mergeStart, mergeComplete);
            // reserve enough map output to cause a merge when it is committed
            MapOutput <Text, Text> out1 = mgr.Reserve(null, OutputSize, 0);

            NUnit.Framework.Assert.IsTrue("Should be a memory merge", (out1 is InMemoryMapOutput
                                                                       ));
            InMemoryMapOutput <Text, Text> mout1 = (InMemoryMapOutput <Text, Text>)out1;

            FillOutput(mout1);
            MapOutput <Text, Text> out2 = mgr.Reserve(null, OutputSize, 0);

            NUnit.Framework.Assert.IsTrue("Should be a memory merge", (out2 is InMemoryMapOutput
                                                                       ));
            InMemoryMapOutput <Text, Text> mout2 = (InMemoryMapOutput <Text, Text>)out2;

            FillOutput(mout2);
            // next reservation should be a WAIT
            MapOutput <Text, Text> out3 = mgr.Reserve(null, OutputSize, 0);

            NUnit.Framework.Assert.AreEqual("Should be told to wait", null, out3);
            // trigger the first merge and wait for merge thread to start merging
            // and free enough output to reserve more
            mout1.Commit();
            mout2.Commit();
            mergeStart.Await();
            NUnit.Framework.Assert.AreEqual(1, mgr.GetNumMerges());
            // reserve enough map output to cause another merge when committed
            out1 = mgr.Reserve(null, OutputSize, 0);
            NUnit.Framework.Assert.IsTrue("Should be a memory merge", (out1 is InMemoryMapOutput
                                                                       ));
            mout1 = (InMemoryMapOutput <Text, Text>)out1;
            FillOutput(mout1);
            out2 = mgr.Reserve(null, OutputSize, 0);
            NUnit.Framework.Assert.IsTrue("Should be a memory merge", (out2 is InMemoryMapOutput
                                                                       ));
            mout2 = (InMemoryMapOutput <Text, Text>)out2;
            FillOutput(mout2);
            // next reservation should be null
            out3 = mgr.Reserve(null, OutputSize, 0);
            NUnit.Framework.Assert.AreEqual("Should be told to wait", null, out3);
            // commit output *before* merge thread completes
            mout1.Commit();
            mout2.Commit();
            // allow the first merge to complete
            mergeComplete.Await();
            // start the second merge and verify
            mergeStart.Await();
            NUnit.Framework.Assert.AreEqual(2, mgr.GetNumMerges());
            // trigger the end of the second merge
            mergeComplete.Await();
            NUnit.Framework.Assert.AreEqual(2, mgr.GetNumMerges());
            NUnit.Framework.Assert.AreEqual("exception reporter invoked", 0, reporter.GetNumExceptions
                                                ());
        }
        public virtual void TestAggregatedTransferRate <K, V>()
        {
            JobConf job = new JobConf();

            job.SetNumMapTasks(10);
            //mock creation
            TaskUmbilicalProtocol mockUmbilical = Org.Mockito.Mockito.Mock <TaskUmbilicalProtocol
                                                                            >();
            Reporter   mockReporter   = Org.Mockito.Mockito.Mock <Reporter>();
            FileSystem mockFileSystem = Org.Mockito.Mockito.Mock <FileSystem>();
            Type       combinerClass  = job.GetCombinerClass();

            Task.CombineOutputCollector <K, V> mockCombineOutputCollector = (Task.CombineOutputCollector
                                                                             <K, V>)Org.Mockito.Mockito.Mock <Task.CombineOutputCollector>();
            // needed for mock with generic
            TaskAttemptID     mockTaskAttemptID     = Org.Mockito.Mockito.Mock <TaskAttemptID>();
            LocalDirAllocator mockLocalDirAllocator = Org.Mockito.Mockito.Mock <LocalDirAllocator
                                                                                >();
            CompressionCodec mockCompressionCodec = Org.Mockito.Mockito.Mock <CompressionCodec
                                                                              >();

            Counters.Counter mockCounter       = Org.Mockito.Mockito.Mock <Counters.Counter>();
            TaskStatus       mockTaskStatus    = Org.Mockito.Mockito.Mock <TaskStatus>();
            Progress         mockProgress      = Org.Mockito.Mockito.Mock <Progress>();
            MapOutputFile    mockMapOutputFile = Org.Mockito.Mockito.Mock <MapOutputFile>();

            Org.Apache.Hadoop.Mapred.Task mockTask = Org.Mockito.Mockito.Mock <Org.Apache.Hadoop.Mapred.Task
                                                                               >();
            MapOutput <K, V> output = Org.Mockito.Mockito.Mock <MapOutput>();

            ShuffleConsumerPlugin.Context <K, V> context = new ShuffleConsumerPlugin.Context <K
                                                                                              , V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator
                                                                                                   , mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector,
                                                                                                   mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus
                                                                                                   , mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
            TaskStatus status   = new _TaskStatus_115();
            Progress   progress = new Progress();
            ShuffleSchedulerImpl <K, V> scheduler = new ShuffleSchedulerImpl <K, V>(job, status
                                                                                    , null, null, progress, context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes
                                                                                        (), context.GetFailedShuffleCounter());
            TaskAttemptID attemptID0 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 0), 0);
            //adding the 1st interval, 40MB from 60s to 100s
            long bytes = (long)40 * 1024 * 1024;

            scheduler.CopySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000
                                    , output);
            NUnit.Framework.Assert.AreEqual(CopyMessage(1, 1, 1), progress.ToString());
            TaskAttemptID attemptID1 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 1), 1);

            //adding the 2nd interval before the 1st interval, 50MB from 0s to 50s
            bytes = (long)50 * 1024 * 1024;
            scheduler.CopySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output
                                    );
            NUnit.Framework.Assert.AreEqual(CopyMessage(2, 1, 1), progress.ToString());
            TaskAttemptID attemptID2 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 2), 2);

            //adding the 3rd interval overlapping with the 1st and the 2nd interval
            //110MB from 25s to 80s
            bytes = (long)110 * 1024 * 1024;
            scheduler.CopySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000,
                                    output);
            NUnit.Framework.Assert.AreEqual(CopyMessage(3, 2, 2), progress.ToString());
            TaskAttemptID attemptID3 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 3), 3);

            //adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s
            bytes = (long)100 * 1024 * 1024;
            scheduler.CopySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000
                                    , output);
            NUnit.Framework.Assert.AreEqual(CopyMessage(4, 0.5, 1), progress.ToString());
            TaskAttemptID attemptID4 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 4), 4);

            //adding the 5th interval between after 4th, 50MB from 350s to 400s
            bytes = (long)50 * 1024 * 1024;
            scheduler.CopySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000
                                    , output);
            NUnit.Framework.Assert.AreEqual(CopyMessage(5, 1, 1), progress.ToString());
            TaskAttemptID attemptID5 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 5), 5);

            //adding the 6th interval between after 5th, 50MB from 450s to 500s
            bytes = (long)50 * 1024 * 1024;
            scheduler.CopySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000
                                    , output);
            NUnit.Framework.Assert.AreEqual(CopyMessage(6, 1, 1), progress.ToString());
            TaskAttemptID attemptID6 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 6), 6);

            //adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s
            bytes = (long)20 * 1024 * 1024;
            scheduler.CopySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000
                                    , output);
            NUnit.Framework.Assert.AreEqual(CopyMessage(7, 1, 1), progress.ToString());
            TaskAttemptID attemptID7 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 7), 7);

            //adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s
            bytes = (long)30 * 1024 * 1024;
            scheduler.CopySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000
                                    , output);
            NUnit.Framework.Assert.AreEqual(CopyMessage(8, 0.5, 1), progress.ToString());
            TaskAttemptID attemptID8 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 8), 8);

            //adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s
            bytes = (long)50 * 1024 * 1024;
            scheduler.CopySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000
                                    , output);
            NUnit.Framework.Assert.AreEqual(CopyMessage(9, 1, 1), progress.ToString());
            TaskAttemptID attemptID9 = new TaskAttemptID(new TaskID(new JobID("test", 0), TaskType
                                                                    .Map, 9), 9);

            //adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s
            bytes = (long)500 * 1024 * 1024;
            scheduler.CopySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output
                                    );
            NUnit.Framework.Assert.AreEqual(CopyMessage(10, 1, 2), progress.ToString());
        }