public virtual object GetDatum() { if (datum == null) { datum = new TaskAttemptUnsuccessfulCompletion(); datum.taskid = new Utf8(attemptId.GetTaskID().ToString()); datum.taskType = new Utf8(taskType.ToString()); datum.attemptId = new Utf8(attemptId.ToString()); datum.finishTime = finishTime; datum.hostname = new Utf8(hostname); if (rackName != null) { datum.rackname = new Utf8(rackName); } datum.port = port; datum.error = new Utf8(error); datum.status = new Utf8(status); datum.counters = EventWriter.ToAvro(counters); datum.clockSplits = AvroArrayUtils.ToAvro(ProgressSplitsBlock.ArrayGetWallclockTime (allSplits)); datum.cpuUsages = AvroArrayUtils.ToAvro(ProgressSplitsBlock.ArrayGetCPUTime(allSplits )); datum.vMemKbytes = AvroArrayUtils.ToAvro(ProgressSplitsBlock.ArrayGetVMemKbytes(allSplits )); datum.physMemKbytes = AvroArrayUtils.ToAvro(ProgressSplitsBlock.ArrayGetPhysMemKbytes (allSplits)); } return(datum); }
/// <exception cref="System.IO.IOException"/> public virtual void CleanUpPartialOutputForTask(TaskAttemptContext context) { // we double check this is never invoked from a non-preemptable subclass. // This should never happen, since the invoking codes is checking it too, // but it is safer to double check. Errors handling this would produce // inconsistent output. if (!this.GetType().IsAnnotationPresent(typeof(Checkpointable))) { throw new InvalidOperationException("Invoking cleanUpPartialOutputForTask() " + "from non @Preemptable class" ); } FileSystem fs = FsFor(GetTaskAttemptPath(context), context.GetConfiguration()); Log.Info("cleanUpPartialOutputForTask: removing everything belonging to " + context .GetTaskAttemptID().GetTaskID() + " in: " + GetCommittedTaskPath(context).GetParent ()); TaskAttemptID taid = context.GetTaskAttemptID(); TaskID tid = taid.GetTaskID(); Path pCommit = GetCommittedTaskPath(context).GetParent(); // remove any committed output for (int i = 0; i < taid.GetId(); ++i) { TaskAttemptID oldId = new TaskAttemptID(tid, i); Path pTask = new Path(pCommit, oldId.ToString()); if (fs.Exists(pTask) && !fs.Delete(pTask, true)) { throw new IOException("Failed to delete " + pTask); } } }
internal Fetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V> scheduler, MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics , ExceptionReporter exceptionReporter, SecretKey shuffleKey, int id) { /* Default read timeout (in milliseconds) */ // Initiative value is 0, which means it hasn't retried yet. this.jobConf = job; this.reporter = reporter; this.scheduler = scheduler; this.merger = merger; this.metrics = metrics; this.exceptionReporter = exceptionReporter; this.id = id; this.reduce = reduceId.GetTaskID().GetId(); this.shuffleSecretKey = shuffleKey; ioErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.IoError.ToString ()); wrongLengthErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongLength .ToString()); badIdErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.BadId.ToString ()); wrongMapErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongMap .ToString()); connectionErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.Connection .ToString()); wrongReduceErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongReduce .ToString()); this.connectionTimeout = job.GetInt(MRJobConfig.ShuffleConnectTimeout, DefaultStalledCopyTimeout ); this.readTimeout = job.GetInt(MRJobConfig.ShuffleReadTimeout, DefaultReadTimeout); this.fetchRetryInterval = job.GetInt(MRJobConfig.ShuffleFetchRetryIntervalMs, MRJobConfig .DefaultShuffleFetchRetryIntervalMs); this.fetchRetryTimeout = job.GetInt(MRJobConfig.ShuffleFetchRetryTimeoutMs, DefaultStalledCopyTimeout ); bool shuffleFetchEnabledDefault = job.GetBoolean(YarnConfiguration.NmRecoveryEnabled , YarnConfiguration.DefaultNmRecoveryEnabled); this.fetchRetryEnabled = job.GetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, shuffleFetchEnabledDefault ); SetName("fetcher#" + id); SetDaemon(true); lock (typeof(Org.Apache.Hadoop.Mapreduce.Task.Reduce.Fetcher)) { sslShuffle = job.GetBoolean(MRConfig.ShuffleSslEnabledKey, MRConfig.ShuffleSslEnabledDefault ); if (sslShuffle && sslFactory == null) { sslFactory = new SSLFactory(SSLFactory.Mode.Client, job); try { sslFactory.Init(); } catch (Exception ex) { sslFactory.Destroy(); throw new RuntimeException(ex); } } } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> protected override void Map(object key, Text value, Mapper.Context context) { // Make one mapper slower for speculative execution TaskAttemptID taid = context.GetTaskAttemptID(); long sleepTime = 100; Configuration conf = context.GetConfiguration(); bool test_speculate_map = conf.GetBoolean(MRJobConfig.MapSpeculative, false); // IF TESTING MAPPER SPECULATIVE EXECUTION: // Make the "*_m_000000_0" attempt take much longer than the others. // When speculative execution is enabled, this should cause the attempt // to be killed and restarted. At that point, the attempt ID will be // "*_m_000000_1", so sleepTime will still remain 100ms. if ((taid.GetTaskType() == TaskType.Map) && test_speculate_map && (taid.GetTaskID ().GetId() == 0) && (taid.GetId() == 0)) { sleepTime = 10000; } try { Sharpen.Thread.Sleep(sleepTime); } catch (Exception) { } // Ignore context.Write(value, new IntWritable(1)); }
public virtual object GetDatum() { if (datum == null) { datum = new TaskAttemptFinished(); datum.taskid = new Utf8(attemptId.GetTaskID().ToString()); datum.attemptId = new Utf8(attemptId.ToString()); datum.taskType = new Utf8(taskType.ToString()); datum.taskStatus = new Utf8(taskStatus); datum.finishTime = finishTime; if (rackName != null) { datum.rackname = new Utf8(rackName); } datum.hostname = new Utf8(hostname); datum.state = new Utf8(state); datum.counters = EventWriter.ToAvro(counters); } return(datum); }
internal static URI GetBaseURI(TaskAttemptID reduceId, string url) { StringBuilder baseUrl = new StringBuilder(url); if (!url.EndsWith("/")) { baseUrl.Append("/"); } baseUrl.Append("mapOutput?job="); baseUrl.Append(reduceId.GetJobID()); baseUrl.Append("&reduce="); baseUrl.Append(reduceId.GetTaskID().GetId()); baseUrl.Append("&map="); URI u = URI.Create(baseUrl.ToString()); return(u); }
/// <summary>Create an event to record the start of an attempt</summary> /// <param name="attemptId">Id of the attempt</param> /// <param name="taskType">Type of task</param> /// <param name="startTime">Start time of the attempt</param> /// <param name="trackerName">Name of the Task Tracker where attempt is running</param> /// <param name="httpPort">The port number of the tracker</param> /// <param name="shufflePort">The shuffle port number of the container</param> /// <param name="containerId">The containerId for the task attempt.</param> /// <param name="locality">The locality of the task attempt</param> /// <param name="avataar">The avataar of the task attempt</param> public TaskAttemptStartedEvent(TaskAttemptID attemptId, TaskType taskType, long startTime , string trackerName, int httpPort, int shufflePort, ContainerId containerId, string locality, string avataar) { datum.attemptId = new Utf8(attemptId.ToString()); datum.taskid = new Utf8(attemptId.GetTaskID().ToString()); datum.startTime = startTime; datum.taskType = new Utf8(taskType.ToString()); datum.trackerName = new Utf8(trackerName); datum.httpPort = httpPort; datum.shufflePort = shufflePort; datum.containerId = new Utf8(containerId.ToString()); if (locality != null) { datum.locality = new Utf8(locality); } if (avataar != null) { datum.avataar = new Utf8(avataar); } }
/// <exception cref="System.IO.IOException"/> public virtual void CopySucceeded(TaskAttemptID mapId, MapHost host, long bytes, long startMillis, long endMillis, MapOutput <K, V> output) { lock (this) { Sharpen.Collections.Remove(failureCounts, mapId); Sharpen.Collections.Remove(hostFailures, host.GetHostName()); int mapIndex = mapId.GetTaskID().GetId(); if (!finishedMaps[mapIndex]) { output.Commit(); finishedMaps[mapIndex] = true; shuffledMapsCounter.Increment(1); if (--remainingMaps == 0) { Sharpen.Runtime.NotifyAll(this); } // update single copy task status long copyMillis = (endMillis - startMillis); if (copyMillis == 0) { copyMillis = 1; } float bytesPerMillis = (float)bytes / copyMillis; float transferRate = bytesPerMillis * BytesPerMillisToMbs; string individualProgress = "copy task(" + mapId + " succeeded" + " at " + mbpsFormat .Format(transferRate) + " MB/s)"; // update the aggregated status copyTimeTracker.Add(startMillis, endMillis); totalBytesShuffledTillNow += bytes; UpdateStatus(individualProgress); reduceShuffleBytes.Increment(bytes); lastProgressTime = Time.MonotonicNow(); Log.Debug("map " + mapId + " done " + status.GetStateString()); } } }
public virtual IList <TaskAttemptID> GetMapsForHost(MapHost host) { lock (this) { IList <TaskAttemptID> list = host.GetAndClearKnownMaps(); IEnumerator <TaskAttemptID> itr = list.GetEnumerator(); IList <TaskAttemptID> result = new AList <TaskAttemptID>(); int includedMaps = 0; int totalSize = list.Count; // find the maps that we still need, up to the limit while (itr.HasNext()) { TaskAttemptID id = itr.Next(); if (!obsoleteMaps.Contains(id) && !finishedMaps[id.GetTaskID().GetId()]) { result.AddItem(id); if (++includedMaps >= MaxMapsAtOnce) { break; } } } // put back the maps left after the limit while (itr.HasNext()) { TaskAttemptID id = itr.Next(); if (!obsoleteMaps.Contains(id) && !finishedMaps[id.GetTaskID().GetId()]) { host.AddKnownMap(id); } } Log.Info("assigned " + includedMaps + " of " + totalSize + " to " + host + " to " + Sharpen.Thread.CurrentThread().GetName()); return(result); } }
/// <exception cref="System.Exception"/> private void TestProfilerInternal(bool useDefault) { if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists()) { Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test." ); return; } SleepJob sleepJob = new SleepJob(); JobConf sleepConf = new JobConf(mrCluster.GetConfig()); sleepConf.SetProfileEnabled(true); sleepConf.SetProfileTaskRange(true, ProfiledTaskId.ToString()); sleepConf.SetProfileTaskRange(false, ProfiledTaskId.ToString()); if (!useDefault) { // use hprof for map to profile.out sleepConf.Set(MRJobConfig.TaskMapProfileParams, "-agentlib:hprof=cpu=times,heap=sites,force=n,thread=y,verbose=n," + "file=%s"); // use Xprof for reduce to stdout sleepConf.Set(MRJobConfig.TaskReduceProfileParams, "-Xprof"); } sleepJob.SetConf(sleepConf); // 2-map-2-reduce SleepJob Job job = sleepJob.CreateJob(2, 2, 500, 1, 500, 1); job.SetJarByClass(typeof(SleepJob)); job.AddFileToClassPath(AppJar); // The AppMaster jar itself. job.WaitForCompletion(true); JobId jobId = TypeConverter.ToYarn(job.GetJobID()); ApplicationId appID = jobId.GetAppId(); int pollElapsed = 0; while (true) { Sharpen.Thread.Sleep(1000); pollElapsed += 1000; if (TerminalRmAppStates.Contains(mrCluster.GetResourceManager().GetRMContext().GetRMApps ()[appID].GetState())) { break; } if (pollElapsed >= 60000) { Log.Warn("application did not reach terminal state within 60 seconds"); break; } } NUnit.Framework.Assert.AreEqual(RMAppState.Finished, mrCluster.GetResourceManager ().GetRMContext().GetRMApps()[appID].GetState()); // Job finished, verify logs // Configuration nmConf = mrCluster.GetNodeManager(0).GetConfig(); string appIdStr = appID.ToString(); string appIdSuffix = Sharpen.Runtime.Substring(appIdStr, "application_".Length, appIdStr .Length); string containerGlob = "container_" + appIdSuffix + "_*_*"; IDictionary <TaskAttemptID, Path> taLogDirs = new Dictionary <TaskAttemptID, Path>( ); Sharpen.Pattern taskPattern = Sharpen.Pattern.Compile(".*Task:(attempt_" + appIdSuffix + "_[rm]_" + "[0-9]+_[0-9]+).*"); foreach (string logDir in nmConf.GetTrimmedStrings(YarnConfiguration.NmLogDirs)) { // filter out MRAppMaster and create attemptId->logDir map // foreach (FileStatus fileStatus in localFs.GlobStatus(new Path(logDir + Path.Separator + appIdStr + Path.Separator + containerGlob + Path.Separator + TaskLog.LogName. Syslog))) { BufferedReader br = new BufferedReader(new InputStreamReader(localFs.Open(fileStatus .GetPath()))); string line; while ((line = br.ReadLine()) != null) { Matcher m = taskPattern.Matcher(line); if (m.Matches()) { // found Task done message taLogDirs[TaskAttemptID.ForName(m.Group(1))] = fileStatus.GetPath().GetParent(); break; } } br.Close(); } } NUnit.Framework.Assert.AreEqual(4, taLogDirs.Count); // all 4 attempts found foreach (KeyValuePair <TaskAttemptID, Path> dirEntry in taLogDirs) { TaskAttemptID tid = dirEntry.Key; Path profilePath = new Path(dirEntry.Value, TaskLog.LogName.Profile.ToString()); Path stdoutPath = new Path(dirEntry.Value, TaskLog.LogName.Stdout.ToString()); if (useDefault || tid.GetTaskType() == TaskType.Map) { if (tid.GetTaskID().GetId() == ProfiledTaskId) { // verify profile.out BufferedReader br = new BufferedReader(new InputStreamReader(localFs.Open(profilePath ))); string line = br.ReadLine(); NUnit.Framework.Assert.IsTrue("No hprof content found!", line != null && line.StartsWith ("JAVA PROFILE")); br.Close(); NUnit.Framework.Assert.AreEqual(0L, localFs.GetFileStatus(stdoutPath).GetLen()); } else { NUnit.Framework.Assert.IsFalse("hprof file should not exist", localFs.Exists(profilePath )); } } else { NUnit.Framework.Assert.IsFalse("hprof file should not exist", localFs.Exists(profilePath )); if (tid.GetTaskID().GetId() == ProfiledTaskId) { // reducer is profiled with Xprof BufferedReader br = new BufferedReader(new InputStreamReader(localFs.Open(stdoutPath ))); bool flatProfFound = false; string line; while ((line = br.ReadLine()) != null) { if (line.StartsWith("Flat profile")) { flatProfFound = true; break; } } br.Close(); NUnit.Framework.Assert.IsTrue("Xprof flat profile not found!", flatProfFound); } else { NUnit.Framework.Assert.AreEqual(0L, localFs.GetFileStatus(stdoutPath).GetLen()); } } } }
/// <exception cref="System.IO.IOException"/> public override void Merge(IList <InMemoryMapOutput <K, V> > inputs) { if (inputs == null || inputs.Count == 0) { return; } //name this output file same as the name of the first file that is //there in the current list of inmem files (this is guaranteed to //be absent on the disk currently. So we don't overwrite a prev. //created spill). Also we need to create the output file now since //it is not guaranteed that this file will be present after merge //is called (we delete empty files as soon as we see them //in the merge method) //figure out the mapId TaskAttemptID mapId = inputs[0].GetMapId(); TaskID mapTaskId = mapId.GetTaskID(); IList <Merger.Segment <K, V> > inMemorySegments = new AList <Merger.Segment <K, V> >(); long mergeOutputSize = this._enclosing.CreateInMemorySegments(inputs, inMemorySegments , 0); int noInMemorySegments = inMemorySegments.Count; Path outputPath = this._enclosing.mapOutputFile.GetInputFileForWrite(mapTaskId, mergeOutputSize ).Suffix(Org.Apache.Hadoop.Mapred.Task.MergedOutputPrefix); FSDataOutputStream @out = CryptoUtils.WrapIfNecessary(this._enclosing.jobConf, this ._enclosing.rfs.Create(outputPath)); IFile.Writer <K, V> writer = new IFile.Writer <K, V>(this._enclosing.jobConf, @out, (Type)this._enclosing.jobConf.GetMapOutputKeyClass(), (Type)this._enclosing.jobConf .GetMapOutputValueClass(), this._enclosing.codec, null, true); RawKeyValueIterator rIter = null; MergeManagerImpl.CompressAwarePath compressAwarePath; try { MergeManagerImpl.Log.Info("Initiating in-memory merge with " + noInMemorySegments + " segments..."); rIter = Merger.Merge(this._enclosing.jobConf, this._enclosing.rfs, (Type)this._enclosing .jobConf.GetMapOutputKeyClass(), (Type)this._enclosing.jobConf.GetMapOutputValueClass (), inMemorySegments, inMemorySegments.Count, new Path(this._enclosing.reduceId. ToString()), (RawComparator <K>) this._enclosing.jobConf.GetOutputKeyComparator(), this._enclosing.reporter, this._enclosing.spilledRecordsCounter, null, null); if (null == this._enclosing.combinerClass) { Merger.WriteFile(rIter, writer, this._enclosing.reporter, this._enclosing.jobConf ); } else { this._enclosing.combineCollector.SetWriter(writer); this._enclosing.CombineAndSpill(rIter, this._enclosing.reduceCombineInputCounter); } writer.Close(); compressAwarePath = new MergeManagerImpl.CompressAwarePath(outputPath, writer.GetRawLength (), writer.GetCompressedLength()); MergeManagerImpl.Log.Info(this._enclosing.reduceId + " Merge of the " + noInMemorySegments + " files in-memory complete." + " Local file is " + outputPath + " of size " + this._enclosing.localFS.GetFileStatus(outputPath).GetLen()); } catch (IOException e) { //make sure that we delete the ondisk file that we created //earlier when we invoked cloneFileAttributes this._enclosing.localFS.Delete(outputPath, true); throw; } // Note the output of the merge this._enclosing.CloseOnDiskFile(compressAwarePath); }
/// <exception cref="System.IO.IOException"/> public OnDiskMapOutput(TaskAttemptID mapId, TaskAttemptID reduceId, MergeManagerImpl <K, V> merger, long size, JobConf conf, MapOutputFile mapOutputFile, int fetcher , bool primaryMapOutput) : this(mapId, reduceId, merger, size, conf, mapOutputFile, fetcher, primaryMapOutput , FileSystem.GetLocal(conf).GetRaw(), mapOutputFile.GetInputFileForWrite(mapId.GetTaskID (), size)) { }