Exemple #1
0
        internal Fetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V>
                         scheduler, MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics
                         , ExceptionReporter exceptionReporter, SecretKey shuffleKey, int id)
        {
            /* Default read timeout (in milliseconds) */
            // Initiative value is 0, which means it hasn't retried yet.
            this.jobConf           = job;
            this.reporter          = reporter;
            this.scheduler         = scheduler;
            this.merger            = merger;
            this.metrics           = metrics;
            this.exceptionReporter = exceptionReporter;
            this.id               = id;
            this.reduce           = reduceId.GetTaskID().GetId();
            this.shuffleSecretKey = shuffleKey;
            ioErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.IoError.ToString
                                             ());
            wrongLengthErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongLength
                                                  .ToString());
            badIdErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.BadId.ToString
                                                ());
            wrongMapErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongMap
                                               .ToString());
            connectionErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.Connection
                                                 .ToString());
            wrongReduceErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongReduce
                                                  .ToString());
            this.connectionTimeout = job.GetInt(MRJobConfig.ShuffleConnectTimeout, DefaultStalledCopyTimeout
                                                );
            this.readTimeout        = job.GetInt(MRJobConfig.ShuffleReadTimeout, DefaultReadTimeout);
            this.fetchRetryInterval = job.GetInt(MRJobConfig.ShuffleFetchRetryIntervalMs, MRJobConfig
                                                 .DefaultShuffleFetchRetryIntervalMs);
            this.fetchRetryTimeout = job.GetInt(MRJobConfig.ShuffleFetchRetryTimeoutMs, DefaultStalledCopyTimeout
                                                );
            bool shuffleFetchEnabledDefault = job.GetBoolean(YarnConfiguration.NmRecoveryEnabled
                                                             , YarnConfiguration.DefaultNmRecoveryEnabled);

            this.fetchRetryEnabled = job.GetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, shuffleFetchEnabledDefault
                                                    );
            SetName("fetcher#" + id);
            SetDaemon(true);
            lock (typeof(Org.Apache.Hadoop.Mapreduce.Task.Reduce.Fetcher))
            {
                sslShuffle = job.GetBoolean(MRConfig.ShuffleSslEnabledKey, MRConfig.ShuffleSslEnabledDefault
                                            );
                if (sslShuffle && sslFactory == null)
                {
                    sslFactory = new SSLFactory(SSLFactory.Mode.Client, job);
                    try
                    {
                        sslFactory.Init();
                    }
                    catch (Exception ex)
                    {
                        sslFactory.Destroy();
                        throw new RuntimeException(ex);
                    }
                }
            }
        }
Exemple #2
0
 public Fetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V> scheduler
                , MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics, ExceptionReporter
                exceptionReporter, SecretKey shuffleKey)
     : this(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter, shuffleKey
            , ++nextId)
 {
 }
Exemple #3
0
        public virtual void Setup()
        {
            // mocked generics
            Log.Info(">>>> " + name.GetMethodName());
            job = new JobConf();
            job.SetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, false);
            jobWithRetry = new JobConf();
            jobWithRetry.SetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, true);
            id         = TaskAttemptID.ForName("attempt_0_1_r_1_1");
            ss         = Org.Mockito.Mockito.Mock <ShuffleSchedulerImpl>();
            mm         = Org.Mockito.Mockito.Mock <MergeManagerImpl>();
            r          = Org.Mockito.Mockito.Mock <Reporter>();
            metrics    = Org.Mockito.Mockito.Mock <ShuffleClientMetrics>();
            except     = Org.Mockito.Mockito.Mock <ExceptionReporter>();
            key        = JobTokenSecretManager.CreateSecretKey(new byte[] { 0, 0, 0, 0 });
            connection = Org.Mockito.Mockito.Mock <HttpURLConnection>();
            allErrs    = Org.Mockito.Mockito.Mock <Counters.Counter>();
            Org.Mockito.Mockito.When(r.GetCounter(Matchers.AnyString(), Matchers.AnyString())
                                     ).ThenReturn(allErrs);
            AList <TaskAttemptID> maps = new AList <TaskAttemptID>(1);

            maps.AddItem(map1ID);
            maps.AddItem(map2ID);
            Org.Mockito.Mockito.When(ss.GetMapsForHost(host)).ThenReturn(maps);
        }
Exemple #4
0
 //Used for status updates
 public virtual void Init(ShuffleConsumerPlugin.Context context)
 {
     this.context       = context;
     this.reduceId      = context.GetReduceId();
     this.jobConf       = context.GetJobConf();
     this.umbilical     = context.GetUmbilical();
     this.reporter      = context.GetReporter();
     this.metrics       = new ShuffleClientMetrics(reduceId, jobConf);
     this.copyPhase     = context.GetCopyPhase();
     this.taskStatus    = context.GetStatus();
     this.reduceTask    = context.GetReduceTask();
     this.localMapFiles = context.GetLocalMapFiles();
     scheduler          = new ShuffleSchedulerImpl <K, V>(jobConf, taskStatus, reduceId, this, copyPhase
                                                          , context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes(), context.GetFailedShuffleCounter
                                                              ());
     merger = CreateMergeManager(context);
 }
Exemple #5
0
        /// <exception cref="System.IO.IOException"/>
        public override void Shuffle(MapHost host, InputStream input, long compressedLength
                                     , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter)
        {
            input = new IFileInputStream(input, compressedLength, conf);
            // Copy data to local-disk
            long bytesLeft = compressedLength;

            try
            {
                int    BytesToRead = 64 * 1024;
                byte[] buf         = new byte[BytesToRead];
                while (bytesLeft > 0)
                {
                    int n = ((IFileInputStream)input).ReadWithChecksum(buf, 0, (int)Math.Min(bytesLeft
                                                                                             , BytesToRead));
                    if (n < 0)
                    {
                        throw new IOException("read past end of stream reading " + GetMapId());
                    }
                    disk.Write(buf, 0, n);
                    bytesLeft -= n;
                    metrics.InputBytes(n);
                    reporter.Progress();
                }
                Log.Info("Read " + (compressedLength - bytesLeft) + " bytes from map-output for "
                         + GetMapId());
                disk.Close();
            }
            catch (IOException ioe)
            {
                // Close the streams
                IOUtils.Cleanup(Log, input, disk);
                // Re-throw
                throw;
            }
            // Sanity check
            if (bytesLeft != 0)
            {
                throw new IOException("Incomplete map output received for " + GetMapId() + " from "
                                      + host.GetHostName() + " (" + bytesLeft + " bytes missing of " + compressedLength
                                      + ")");
            }
            this.compressedSize = compressedLength;
        }
Exemple #6
0
        /// <exception cref="System.IO.IOException"/>
        public override void Shuffle(MapHost host, InputStream input, long compressedLength
                                     , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter)
        {
            IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, conf);

            input = checksumIn;
            // Are map-outputs compressed?
            if (codec != null)
            {
                decompressor.Reset();
                input = codec.CreateInputStream(input, decompressor);
            }
            try
            {
                IOUtils.ReadFully(input, memory, 0, memory.Length);
                metrics.InputBytes(memory.Length);
                reporter.Progress();
                Log.Info("Read " + memory.Length + " bytes from map-output for " + GetMapId());
                if (input.Read() >= 0)
                {
                    throw new IOException("Unexpected extra bytes from input stream for " + GetMapId(
                                              ));
                }
            }
            catch (IOException ioe)
            {
                // Close the streams
                IOUtils.Cleanup(Log, input);
                // Re-throw
                throw;
            }
            finally
            {
                CodecPool.ReturnDecompressor(decompressor);
            }
        }
Exemple #7
0
 /// <exception cref="System.IO.IOException"/>
 public abstract void Shuffle(MapHost host, InputStream input, long compressedLength
                              , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter);
Exemple #8
0
 public FakeFetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V
                                                                               > scheduler, MergeManagerImpl <K, V> merger, Reporter reporter, ShuffleClientMetrics
                    metrics, ExceptionReporter exceptionReporter, SecretKey jobTokenSecret, HttpURLConnection
                    connection, int id)
     : base(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter, jobTokenSecret
            , id)
 {
     this.connection = connection;
 }
Exemple #9
0
 public LocalFetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K,
                                                                                V> scheduler, MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics
                     metrics, ExceptionReporter exceptionReporter, SecretKey shuffleKey, IDictionary
                     <TaskAttemptID, MapOutputFile> localMapFiles)
     : base(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter, shuffleKey
            )
 {
     this.job           = job;
     this.localMapFiles = localMapFiles;
     SetName("localfetcher#" + id);
     SetDaemon(true);
 }