internal Fetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V> scheduler, MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics , ExceptionReporter exceptionReporter, SecretKey shuffleKey, int id) { /* Default read timeout (in milliseconds) */ // Initiative value is 0, which means it hasn't retried yet. this.jobConf = job; this.reporter = reporter; this.scheduler = scheduler; this.merger = merger; this.metrics = metrics; this.exceptionReporter = exceptionReporter; this.id = id; this.reduce = reduceId.GetTaskID().GetId(); this.shuffleSecretKey = shuffleKey; ioErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.IoError.ToString ()); wrongLengthErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongLength .ToString()); badIdErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.BadId.ToString ()); wrongMapErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongMap .ToString()); connectionErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.Connection .ToString()); wrongReduceErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongReduce .ToString()); this.connectionTimeout = job.GetInt(MRJobConfig.ShuffleConnectTimeout, DefaultStalledCopyTimeout ); this.readTimeout = job.GetInt(MRJobConfig.ShuffleReadTimeout, DefaultReadTimeout); this.fetchRetryInterval = job.GetInt(MRJobConfig.ShuffleFetchRetryIntervalMs, MRJobConfig .DefaultShuffleFetchRetryIntervalMs); this.fetchRetryTimeout = job.GetInt(MRJobConfig.ShuffleFetchRetryTimeoutMs, DefaultStalledCopyTimeout ); bool shuffleFetchEnabledDefault = job.GetBoolean(YarnConfiguration.NmRecoveryEnabled , YarnConfiguration.DefaultNmRecoveryEnabled); this.fetchRetryEnabled = job.GetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, shuffleFetchEnabledDefault ); SetName("fetcher#" + id); SetDaemon(true); lock (typeof(Org.Apache.Hadoop.Mapreduce.Task.Reduce.Fetcher)) { sslShuffle = job.GetBoolean(MRConfig.ShuffleSslEnabledKey, MRConfig.ShuffleSslEnabledDefault ); if (sslShuffle && sslFactory == null) { sslFactory = new SSLFactory(SSLFactory.Mode.Client, job); try { sslFactory.Init(); } catch (Exception ex) { sslFactory.Destroy(); throw new RuntimeException(ex); } } } }
public Fetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V> scheduler , MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter, SecretKey shuffleKey) : this(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter, shuffleKey , ++nextId) { }
public virtual void Setup() { // mocked generics Log.Info(">>>> " + name.GetMethodName()); job = new JobConf(); job.SetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, false); jobWithRetry = new JobConf(); jobWithRetry.SetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, true); id = TaskAttemptID.ForName("attempt_0_1_r_1_1"); ss = Org.Mockito.Mockito.Mock <ShuffleSchedulerImpl>(); mm = Org.Mockito.Mockito.Mock <MergeManagerImpl>(); r = Org.Mockito.Mockito.Mock <Reporter>(); metrics = Org.Mockito.Mockito.Mock <ShuffleClientMetrics>(); except = Org.Mockito.Mockito.Mock <ExceptionReporter>(); key = JobTokenSecretManager.CreateSecretKey(new byte[] { 0, 0, 0, 0 }); connection = Org.Mockito.Mockito.Mock <HttpURLConnection>(); allErrs = Org.Mockito.Mockito.Mock <Counters.Counter>(); Org.Mockito.Mockito.When(r.GetCounter(Matchers.AnyString(), Matchers.AnyString()) ).ThenReturn(allErrs); AList <TaskAttemptID> maps = new AList <TaskAttemptID>(1); maps.AddItem(map1ID); maps.AddItem(map2ID); Org.Mockito.Mockito.When(ss.GetMapsForHost(host)).ThenReturn(maps); }
//Used for status updates public virtual void Init(ShuffleConsumerPlugin.Context context) { this.context = context; this.reduceId = context.GetReduceId(); this.jobConf = context.GetJobConf(); this.umbilical = context.GetUmbilical(); this.reporter = context.GetReporter(); this.metrics = new ShuffleClientMetrics(reduceId, jobConf); this.copyPhase = context.GetCopyPhase(); this.taskStatus = context.GetStatus(); this.reduceTask = context.GetReduceTask(); this.localMapFiles = context.GetLocalMapFiles(); scheduler = new ShuffleSchedulerImpl <K, V>(jobConf, taskStatus, reduceId, this, copyPhase , context.GetShuffledMapsCounter(), context.GetReduceShuffleBytes(), context.GetFailedShuffleCounter ()); merger = CreateMergeManager(context); }
/// <exception cref="System.IO.IOException"/> public override void Shuffle(MapHost host, InputStream input, long compressedLength , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter) { input = new IFileInputStream(input, compressedLength, conf); // Copy data to local-disk long bytesLeft = compressedLength; try { int BytesToRead = 64 * 1024; byte[] buf = new byte[BytesToRead]; while (bytesLeft > 0) { int n = ((IFileInputStream)input).ReadWithChecksum(buf, 0, (int)Math.Min(bytesLeft , BytesToRead)); if (n < 0) { throw new IOException("read past end of stream reading " + GetMapId()); } disk.Write(buf, 0, n); bytesLeft -= n; metrics.InputBytes(n); reporter.Progress(); } Log.Info("Read " + (compressedLength - bytesLeft) + " bytes from map-output for " + GetMapId()); disk.Close(); } catch (IOException ioe) { // Close the streams IOUtils.Cleanup(Log, input, disk); // Re-throw throw; } // Sanity check if (bytesLeft != 0) { throw new IOException("Incomplete map output received for " + GetMapId() + " from " + host.GetHostName() + " (" + bytesLeft + " bytes missing of " + compressedLength + ")"); } this.compressedSize = compressedLength; }
/// <exception cref="System.IO.IOException"/> public override void Shuffle(MapHost host, InputStream input, long compressedLength , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter) { IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, conf); input = checksumIn; // Are map-outputs compressed? if (codec != null) { decompressor.Reset(); input = codec.CreateInputStream(input, decompressor); } try { IOUtils.ReadFully(input, memory, 0, memory.Length); metrics.InputBytes(memory.Length); reporter.Progress(); Log.Info("Read " + memory.Length + " bytes from map-output for " + GetMapId()); if (input.Read() >= 0) { throw new IOException("Unexpected extra bytes from input stream for " + GetMapId( )); } } catch (IOException ioe) { // Close the streams IOUtils.Cleanup(Log, input); // Re-throw throw; } finally { CodecPool.ReturnDecompressor(decompressor); } }
/// <exception cref="System.IO.IOException"/> public abstract void Shuffle(MapHost host, InputStream input, long compressedLength , long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter);
public FakeFetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V > scheduler, MergeManagerImpl <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter, SecretKey jobTokenSecret, HttpURLConnection connection, int id) : base(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter, jobTokenSecret , id) { this.connection = connection; }
public LocalFetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V> scheduler, MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter, SecretKey shuffleKey, IDictionary <TaskAttemptID, MapOutputFile> localMapFiles) : base(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter, shuffleKey ) { this.job = job; this.localMapFiles = localMapFiles; SetName("localfetcher#" + id); SetDaemon(true); }