internal Fetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V> scheduler, MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics , ExceptionReporter exceptionReporter, SecretKey shuffleKey, int id) { /* Default read timeout (in milliseconds) */ // Initiative value is 0, which means it hasn't retried yet. this.jobConf = job; this.reporter = reporter; this.scheduler = scheduler; this.merger = merger; this.metrics = metrics; this.exceptionReporter = exceptionReporter; this.id = id; this.reduce = reduceId.GetTaskID().GetId(); this.shuffleSecretKey = shuffleKey; ioErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.IoError.ToString ()); wrongLengthErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongLength .ToString()); badIdErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.BadId.ToString ()); wrongMapErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongMap .ToString()); connectionErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.Connection .ToString()); wrongReduceErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongReduce .ToString()); this.connectionTimeout = job.GetInt(MRJobConfig.ShuffleConnectTimeout, DefaultStalledCopyTimeout ); this.readTimeout = job.GetInt(MRJobConfig.ShuffleReadTimeout, DefaultReadTimeout); this.fetchRetryInterval = job.GetInt(MRJobConfig.ShuffleFetchRetryIntervalMs, MRJobConfig .DefaultShuffleFetchRetryIntervalMs); this.fetchRetryTimeout = job.GetInt(MRJobConfig.ShuffleFetchRetryTimeoutMs, DefaultStalledCopyTimeout ); bool shuffleFetchEnabledDefault = job.GetBoolean(YarnConfiguration.NmRecoveryEnabled , YarnConfiguration.DefaultNmRecoveryEnabled); this.fetchRetryEnabled = job.GetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, shuffleFetchEnabledDefault ); SetName("fetcher#" + id); SetDaemon(true); lock (typeof(Org.Apache.Hadoop.Mapreduce.Task.Reduce.Fetcher)) { sslShuffle = job.GetBoolean(MRConfig.ShuffleSslEnabledKey, MRConfig.ShuffleSslEnabledDefault ); if (sslShuffle && sslFactory == null) { sslFactory = new SSLFactory(SSLFactory.Mode.Client, job); try { sslFactory.Init(); } catch (Exception ex) { sslFactory.Destroy(); throw new RuntimeException(ex); } } } }
/// <exception cref="System.IO.IOException"/> private static void CleanFlags(JobConf conf) { FileSystem fs = FileSystem.Get(conf); fs.Delete(GetFlagDir(conf.GetBoolean("localFS", true)), true); fs.Mkdirs(GetFlagDir(conf.GetBoolean("localFS", true))); }
public ShuffleSchedulerImpl(JobConf job, TaskStatus status, TaskAttemptID reduceId , ExceptionReporter reporter, Progress progress, Counters.Counter shuffledMapsCounter , Counters.Counter reduceShuffleBytes, Counters.Counter failedShuffleCounter) { referee = new ShuffleSchedulerImpl.Referee(this); totalMaps = job.GetNumMapTasks(); abortFailureLimit = Math.Max(30, totalMaps / 10); copyTimeTracker = new ShuffleSchedulerImpl.CopyTimeTracker(); remainingMaps = totalMaps; finishedMaps = new bool[remainingMaps]; this.reporter = reporter; this.status = status; this.reduceId = reduceId; this.progress = progress; this.shuffledMapsCounter = shuffledMapsCounter; this.reduceShuffleBytes = reduceShuffleBytes; this.failedShuffleCounter = failedShuffleCounter; this.startTime = Time.MonotonicNow(); lastProgressTime = startTime; referee.Start(); this.maxFailedUniqueFetches = Math.Min(totalMaps, 5); this.maxFetchFailuresBeforeReporting = job.GetInt(MRJobConfig.ShuffleFetchFailures , ReportFailureLimit); this.reportReadErrorImmediately = job.GetBoolean(MRJobConfig.ShuffleNotifyReaderror , true); this.maxDelay = job.GetLong(MRJobConfig.MaxShuffleFetchRetryDelay, MRJobConfig.DefaultMaxShuffleFetchRetryDelay ); this.maxHostFailures = job.GetInt(MRJobConfig.MaxShuffleFetchHostFailures, MRJobConfig .DefaultMaxShuffleFetchHostFailures); }
public virtual void TestAMStandardEnv() { string AdminLibPath = "foo"; string UserLibPath = "bar"; string UserShell = "shell"; JobConf jobConf = new JobConf(); jobConf.Set(MRJobConfig.MrAmAdminUserEnv, "LD_LIBRARY_PATH=" + AdminLibPath); jobConf.Set(MRJobConfig.MrAmEnv, "LD_LIBRARY_PATH=" + UserLibPath); jobConf.Set(MRJobConfig.MapredAdminUserShell, UserShell); YARNRunner yarnRunner = new YARNRunner(jobConf); ApplicationSubmissionContext appSubCtx = BuildSubmitContext(yarnRunner, jobConf); // make sure PWD is first in the lib path ContainerLaunchContext clc = appSubCtx.GetAMContainerSpec(); IDictionary <string, string> env = clc.GetEnvironment(); string libPath = env[ApplicationConstants.Environment.LdLibraryPath.ToString()]; NUnit.Framework.Assert.IsNotNull("LD_LIBRARY_PATH not set", libPath); string cps = jobConf.GetBoolean(MRConfig.MapreduceAppSubmissionCrossPlatform, MRConfig .DefaultMapreduceAppSubmissionCrossPlatform) ? ApplicationConstants.ClassPathSeparator : FilePath.pathSeparator; NUnit.Framework.Assert.AreEqual("Bad AM LD_LIBRARY_PATH setting", MRApps.CrossPlatformifyMREnv (conf, ApplicationConstants.Environment.Pwd) + cps + AdminLibPath + cps + UserLibPath , libPath); // make sure SHELL is set string shell = env[ApplicationConstants.Environment.Shell.ToString()]; NUnit.Framework.Assert.IsNotNull("SHELL not set", shell); NUnit.Framework.Assert.AreEqual("Bad SHELL setting", UserShell, shell); }
/// <summary>List input directories.</summary> /// <remarks> /// List input directories. /// Subclasses may override to, e.g., select only files matching a regular /// expression. /// </remarks> /// <param name="job">the job to list input paths for</param> /// <returns>array of FileStatus objects</returns> /// <exception cref="System.IO.IOException">if zero items.</exception> protected internal virtual FileStatus[] ListStatus(JobConf job) { Path[] dirs = GetInputPaths(job); if (dirs.Length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), dirs, job); // Whether we need to recursive look into the directory structure bool recursive = job.GetBoolean(InputDirRecursive, false); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). IList <PathFilter> filters = new AList <PathFilter>(); filters.AddItem(hiddenFileFilter); PathFilter jobFilter = GetInputPathFilter(job); if (jobFilter != null) { filters.AddItem(jobFilter); } PathFilter inputFilter = new FileInputFormat.MultiPathFilter(filters); FileStatus[] result; int numThreads = job.GetInt(FileInputFormat.ListStatusNumThreads, FileInputFormat .DefaultListStatusNumThreads); StopWatch sw = new StopWatch().Start(); if (numThreads == 1) { IList <FileStatus> locatedFiles = SingleThreadedListStatus(job, dirs, inputFilter, recursive); result = Sharpen.Collections.ToArray(locatedFiles, new FileStatus[locatedFiles.Count ]); } else { IEnumerable <FileStatus> locatedFiles = null; try { LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher( job, dirs, recursive, inputFilter, false); locatedFiles = locatedFileStatusFetcher.GetFileStatuses(); } catch (Exception) { throw new IOException("Interrupted while getting file statuses"); } result = Iterables.ToArray <FileStatus>(locatedFiles); } sw.Stop(); if (Log.IsDebugEnabled()) { Log.Debug("Time taken to get FileStatuses: " + sw.Now(TimeUnit.Milliseconds)); } Log.Info("Total input paths to process : " + result.Length); return(result); }
public virtual void Configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.SetAutoIncrReducerProcCount(job, false); skipping = job.GetBoolean(MRJobConfig.SkipRecords, false); }
// Mappers that simply checks if the desired user env are present or not public override void Configure(JobConf job) { bool oldConfigs = job.GetBoolean(OldConfigs, false); if (oldConfigs) { string javaOpts = job.Get(JobConf.MapredTaskJavaOpts); NUnit.Framework.Assert.IsNotNull(JobConf.MapredTaskJavaOpts + " is null!", javaOpts ); NUnit.Framework.Assert.AreEqual(JobConf.MapredTaskJavaOpts + " has value of: " + javaOpts, javaOpts, TaskOptsVal); } else { string mapJavaOpts = job.Get(JobConf.MapredMapTaskJavaOpts); NUnit.Framework.Assert.IsNotNull(JobConf.MapredMapTaskJavaOpts + " is null!", mapJavaOpts ); NUnit.Framework.Assert.AreEqual(JobConf.MapredMapTaskJavaOpts + " has value of: " + mapJavaOpts, mapJavaOpts, MapOptsVal); } string path = job.Get("path"); // check if the pwd is there in LD_LIBRARY_PATH string pwd = Runtime.Getenv("PWD"); NUnit.Framework.Assert.IsTrue("LD doesnt contain pwd", Runtime.Getenv("LD_LIBRARY_PATH" ).Contains(pwd)); // check if X=$X:/abc works for LD_LIBRARY_PATH CheckEnv("LD_LIBRARY_PATH", "/tmp", "append"); // check if X=y works for an already existing parameter CheckEnv("LANG", "en_us_8859_1", "noappend"); // check if X=/tmp for a new env variable CheckEnv("MY_PATH", "/tmp", "noappend"); // check if X=$X:/tmp works for a new env var and results into :/tmp CheckEnv("NEW_PATH", FilePath.pathSeparator + "/tmp", "noappend"); // check if X=$(tt's X var):/tmp for an old env variable inherited from // the tt if (Shell.Windows) { // On Windows, PATH is replaced one more time as part of default config // of "mapreduce.admin.user.env", i.e. on Windows, // "mapreduce.admin.user.env" is set to // "PATH=%PATH%;%HADOOP_COMMON_HOME%\\bin" string hadoopHome = Runtime.Getenv("HADOOP_COMMON_HOME"); if (hadoopHome == null) { hadoopHome = string.Empty; } string hadoopLibLocation = hadoopHome + "\\bin"; path += FilePath.pathSeparator + hadoopLibLocation; path += FilePath.pathSeparator + path; } CheckEnv("PATH", path + FilePath.pathSeparator + "/tmp", "noappend"); string jobLocalDir = job.Get(MRJobConfig.JobLocalDir); NUnit.Framework.Assert.IsNotNull(MRJobConfig.JobLocalDir + " is null", jobLocalDir ); }
/// <summary>Run the map task.</summary> /// <param name="input">the set of inputs</param> /// <param name="output">the object to collect the outputs of the map</param> /// <param name="reporter">the object to update with status</param> /// <exception cref="System.IO.IOException"/> public override void Run(RecordReader <K1, V1> input, OutputCollector <K2, V2> output , Reporter reporter) { Application <K1, V1, K2, V2> application = null; try { RecordReader <FloatWritable, NullWritable> fakeInput = (!Submitter.GetIsJavaRecordReader (job) && !Submitter.GetIsJavaMapper(job)) ? (RecordReader <FloatWritable, NullWritable >)input : null; application = new Application <K1, V1, K2, V2>(job, fakeInput, output, reporter, ( Type)job.GetOutputKeyClass(), (Type)job.GetOutputValueClass()); } catch (Exception ie) { throw new RuntimeException("interrupted", ie); } DownwardProtocol <K1, V1> downlink = application.GetDownlink(); bool isJavaInput = Submitter.GetIsJavaRecordReader(job); downlink.RunMap(reporter.GetInputSplit(), job.GetNumReduceTasks(), isJavaInput); bool skipping = job.GetBoolean(MRJobConfig.SkipRecords, false); try { if (isJavaInput) { // allocate key & value instances that are re-used for all entries K1 key = input.CreateKey(); V1 value = input.CreateValue(); downlink.SetInputTypes(key.GetType().FullName, value.GetType().FullName); while (input.Next(key, value)) { // map pair to output downlink.MapItem(key, value); if (skipping) { //flush the streams on every record input if running in skip mode //so that we don't buffer other records surrounding a bad record. downlink.Flush(); } } downlink.EndOfInput(); } application.WaitForFinish(); } catch (Exception t) { application.Abort(t); } finally { application.Cleanup(); } }
public virtual void TestSetReducerWithReducerByValueAsFalse() { JobConf jobConf = new JobConf(); JobConf reducerConf = new JobConf(); Chain.SetReducer(jobConf, typeof(TestChain.MyReducer), typeof(object), typeof(object ), typeof(object), typeof(object), false, reducerConf); bool reduceByValue = reducerConf.GetBoolean("chain.reducer.byValue", true); NUnit.Framework.Assert.AreEqual("It should set chain.reducer.byValue as false " + "in reducerConf when we give value as false", false, reduceByValue); }
/// <exception cref="System.IO.IOException"/> private static void WriteFlag(JobConf conf, string flag) { FileSystem fs = FileSystem.Get(conf); if (GetFlag(conf, flag)) { Fail("Flag " + flag + " already exists"); } DataOutputStream file = fs.Create(new Path(GetFlagDir(conf.GetBoolean("localFS", true)), flag)); file.Close(); }
/// <summary>Configures all the chain elements for the task.</summary> /// <param name="jobConf">chain job's JobConf.</param> public virtual void Configure(JobConf jobConf) { string prefix = GetPrefix(isMap); chainJobConf = jobConf; SerializationFactory serializationFactory = new SerializationFactory(chainJobConf ); int index = jobConf.GetInt(prefix + ChainMapperSize, 0); for (int i = 0; i < index; i++) { Type klass = jobConf.GetClass <Mapper>(prefix + ChainMapperClass + i, null); JobConf mConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainMapperConfig + i)); Mapper mapper = ReflectionUtils.NewInstance(klass, mConf); mappers.AddItem(mapper); if (mConf.GetBoolean(MapperByValue, true)) { mappersKeySerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass (MapperOutputKeyClass, null))); mappersValueSerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass (MapperOutputValueClass, null))); } else { mappersKeySerialization.AddItem(null); mappersValueSerialization.AddItem(null); } } Type klass_1 = jobConf.GetClass <Reducer>(prefix + ChainReducerClass, null); if (klass_1 != null) { JobConf rConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainReducerConfig )); reducer = ReflectionUtils.NewInstance(klass_1, rConf); if (rConf.GetBoolean(ReducerByValue, true)) { reducerKeySerialization = serializationFactory.GetSerialization(rConf.GetClass(ReducerOutputKeyClass , null)); reducerValueSerialization = serializationFactory.GetSerialization(rConf.GetClass( ReducerOutputValueClass, null)); } else { reducerKeySerialization = null; reducerValueSerialization = null; } } }
/// <summary>Helper function to generate a name that is unique for the task.</summary> /// <remarks> /// Helper function to generate a name that is unique for the task. /// <p>The generated name can be used to create custom files from within the /// different tasks for the job, the names for different tasks will not collide /// with each other.</p> /// <p>The given name is postfixed with the task type, 'm' for maps, 'r' for /// reduces and the task partition number. For example, give a name 'test' /// running on the first map o the job the generated name will be /// 'test-m-00000'.</p> /// </remarks> /// <param name="conf">the configuration for the job.</param> /// <param name="name">the name to make unique.</param> /// <returns>a unique name accross all tasks of the job.</returns> public static string GetUniqueName(JobConf conf, string name) { int partition = conf.GetInt(JobContext.TaskPartition, -1); if (partition == -1) { throw new ArgumentException("This method can only be called from within a Job"); } string taskType = conf.GetBoolean(JobContext.TaskIsmap, JobContext.DefaultTaskIsmap ) ? "m" : "r"; NumberFormat numberFormat = NumberFormat.GetInstance(); numberFormat.SetMinimumIntegerDigits(5); numberFormat.SetGroupingUsed(false); return(name + "-" + taskType + "-" + numberFormat.Format(partition)); }
/// <summary>Get whether job-setup and job-cleanup is needed for the job</summary> /// <returns>boolean</returns> public virtual bool GetJobSetupCleanupNeeded() { return(conf.GetBoolean(MRJobConfig.SetupCleanupNeeded, true)); }
/// <summary>Is the job output compressed?</summary> /// <param name="conf"> /// the /// <see cref="JobConf"/> /// to look in /// </param> /// <returns> /// <code>true</code> if the job output should be compressed, /// <code>false</code> otherwise /// </returns> public static bool GetCompressOutput(JobConf conf) { return(conf.GetBoolean(FileOutputFormat.Compress, false)); }
/// <summary>Check whether the job is using a Java RecordReader</summary> /// <param name="conf">the configuration to check</param> /// <returns>is it a Java RecordReader?</returns> public static bool GetIsJavaRecordReader(JobConf conf) { return(conf.GetBoolean(Org.Apache.Hadoop.Mapred.Pipes.Submitter.IsJavaRr, false)); }
/// <summary>Returns if a named output is multiple.</summary> /// <param name="conf">job conf</param> /// <param name="namedOutput">named output</param> /// <returns> /// <code>true</code> if the name output is multi, <code>false</code> /// if it is single. If the name output is not defined it returns /// <code>false</code> /// </returns> public static bool IsMultiNamedOutput(JobConf conf, string namedOutput) { CheckNamedOutput(conf, namedOutput, false); return(conf.GetBoolean(MoPrefix + namedOutput + Multi, false)); }
public virtual void Configure(JobConf job) { ioEx = job.GetBoolean("multithreaded.ioException", false); rtEx = job.GetBoolean("multithreaded.runtimeException", false); }
/// <summary>Returns if the counters for the named outputs are enabled or not.</summary> /// <remarks> /// Returns if the counters for the named outputs are enabled or not. /// <p> /// By default these counters are disabled. /// <p> /// MultipleOutputs supports counters, by default the are disabled. /// The counters group is the /// <see cref="MultipleOutputs"/> /// class name. /// </p> /// The names of the counters are the same as the named outputs. For multi /// named outputs the name of the counter is the concatenation of the named /// output, and underscore '_' and the multiname. /// </remarks> /// <param name="conf">job conf to enableadd the named output.</param> /// <returns>TRUE if the counters are enabled, FALSE if they are disabled.</returns> public static bool GetCountersEnabled(JobConf conf) { return(conf.GetBoolean(CountersEnabled, false)); }
/// <summary> /// Does the user want to keep the command file for debugging? If this is /// true, pipes will write a copy of the command data to a file in the /// task directory named "downlink.data", which may be used to run the C++ /// program under the debugger. /// </summary> /// <remarks> /// Does the user want to keep the command file for debugging? If this is /// true, pipes will write a copy of the command data to a file in the /// task directory named "downlink.data", which may be used to run the C++ /// program under the debugger. You probably also want to set /// JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from /// being deleted. /// To run using the data file, set the environment variable /// "mapreduce.pipes.commandfile" to point to the file. /// </remarks> /// <param name="conf">the configuration to check</param> /// <returns>will the framework save the command file?</returns> public static bool GetKeepCommandFile(JobConf conf) { return(conf.GetBoolean(Org.Apache.Hadoop.Mapred.Pipes.Submitter.PreserveCommandfile , false)); }
public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS , LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec , Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase , MapOutputFile mapOutputFile) { /* Maximum percentage of the in-memory limit that a single shuffle can * consume*/ this.reduceId = reduceId; this.jobConf = jobConf; this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.reporter = reporter; this.codec = codec; this.combinerClass = combinerClass; this.combineCollector = combineCollector; this.reduceCombineInputCounter = reduceCombineInputCounter; this.spilledRecordsCounter = spilledRecordsCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.mapOutputFile = mapOutputFile; this.mapOutputFile.SetConf(jobConf); this.localFS = localFS; this.rfs = ((LocalFileSystem)localFS).GetRaw(); float maxInMemCopyUse = jobConf.GetFloat(MRJobConfig.ShuffleInputBufferPercent, MRJobConfig .DefaultShuffleInputBufferPercent); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleInputBufferPercent + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory this.memoryLimit = (long)(jobConf.GetLong(MRJobConfig.ReduceMemoryTotalBytes, Runtime .GetRuntime().MaxMemory()) * maxInMemCopyUse); this.ioSortFactor = jobConf.GetInt(MRJobConfig.IoSortFactor, 100); float singleShuffleMemoryLimitPercent = jobConf.GetFloat(MRJobConfig.ShuffleMemoryLimitPercent , DefaultShuffleMemoryLimitPercent); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleMemoryLimitPercent + ": " + singleShuffleMemoryLimitPercent); } usedMemory = 0L; commitMemory = 0L; this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent ); this.memToMemMergeOutputsThreshold = jobConf.GetInt(MRJobConfig.ReduceMemtomemThreshold , ioSortFactor); this.mergeThreshold = (long)(this.memoryLimit * jobConf.GetFloat(MRJobConfig.ShuffleMergePercent , 0.90f)); Log.Info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold ); if (this.maxSingleShuffleLimit >= this.mergeThreshold) { throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold " + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold); } bool allowMemToMemMerge = jobConf.GetBoolean(MRJobConfig.ReduceMemtomemEnabled, false ); if (allowMemToMemMerge) { this.memToMemMerger = new MergeManagerImpl.IntermediateMemoryToMemoryMerger(this, this, memToMemMergeOutputsThreshold); this.memToMemMerger.Start(); } else { this.memToMemMerger = null; } this.inMemoryMerger = CreateInMemoryMerger(); this.inMemoryMerger.Start(); this.onDiskMerger = new MergeManagerImpl.OnDiskMerger(this, this); this.onDiskMerger.Start(); this.mergePhase = mergePhase; }
// TODO later: add explicit "isUber()" checks of some sort /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public virtual void TestContainerRollingLog() { if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists()) { Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test." ); return; } SleepJob sleepJob = new SleepJob(); JobConf sleepConf = new JobConf(mrCluster.GetConfig()); sleepConf.Set(MRJobConfig.MapLogLevel, Level.All.ToString()); long userLogKb = 4; sleepConf.SetLong(MRJobConfig.TaskUserlogLimit, userLogKb); sleepConf.SetInt(MRJobConfig.TaskLogBackups, 3); sleepConf.Set(MRJobConfig.MrAmLogLevel, Level.All.ToString()); long amLogKb = 7; sleepConf.SetLong(MRJobConfig.MrAmLogKb, amLogKb); sleepConf.SetInt(MRJobConfig.MrAmLogBackups, 7); sleepJob.SetConf(sleepConf); Job job = sleepJob.CreateJob(1, 0, 1L, 100, 0L, 0); job.SetJarByClass(typeof(SleepJob)); job.AddFileToClassPath(AppJar); // The AppMaster jar itself. job.WaitForCompletion(true); JobId jobId = TypeConverter.ToYarn(job.GetJobID()); ApplicationId appID = jobId.GetAppId(); int pollElapsed = 0; while (true) { Sharpen.Thread.Sleep(1000); pollElapsed += 1000; if (TerminalRmAppStates.Contains(mrCluster.GetResourceManager().GetRMContext().GetRMApps ()[appID].GetState())) { break; } if (pollElapsed >= 60000) { Log.Warn("application did not reach terminal state within 60 seconds"); break; } } NUnit.Framework.Assert.AreEqual(RMAppState.Finished, mrCluster.GetResourceManager ().GetRMContext().GetRMApps()[appID].GetState()); // Job finished, verify logs // string appIdStr = appID.ToString(); string appIdSuffix = Sharpen.Runtime.Substring(appIdStr, "application_".Length, appIdStr .Length); string containerGlob = "container_" + appIdSuffix + "_*_*"; string syslogGlob = appIdStr + Path.Separator + containerGlob + Path.Separator + TaskLog.LogName.Syslog; int numAppMasters = 0; int numMapTasks = 0; for (int i = 0; i < NumNodeMgrs; i++) { Configuration nmConf = mrCluster.GetNodeManager(i).GetConfig(); foreach (string logDir in nmConf.GetTrimmedStrings(YarnConfiguration.NmLogDirs)) { Path absSyslogGlob = new Path(logDir + Path.Separator + syslogGlob); Log.Info("Checking for glob: " + absSyslogGlob); FileStatus[] syslogs = localFs.GlobStatus(absSyslogGlob); foreach (FileStatus slog in syslogs) { bool foundAppMaster = job.IsUber(); Path containerPathComponent = slog.GetPath().GetParent(); if (!foundAppMaster) { ContainerId cid = ConverterUtils.ToContainerId(containerPathComponent.GetName()); foundAppMaster = ((cid.GetContainerId() & ContainerId.ContainerIdBitmask) == 1); } FileStatus[] sysSiblings = localFs.GlobStatus(new Path(containerPathComponent, TaskLog.LogName .Syslog + "*")); // sort to ensure for i > 0 sysSiblings[i] == "syslog.i" Arrays.Sort(sysSiblings); if (foundAppMaster) { numAppMasters++; } else { numMapTasks++; } if (foundAppMaster) { NUnit.Framework.Assert.AreSame("Unexpected number of AM sylog* files", sleepConf. GetInt(MRJobConfig.MrAmLogBackups, 0) + 1, sysSiblings.Length); NUnit.Framework.Assert.IsTrue("AM syslog.1 length kb should be >= " + amLogKb, sysSiblings [1].GetLen() >= amLogKb * 1024); } else { NUnit.Framework.Assert.AreSame("Unexpected number of MR task sylog* files", sleepConf .GetInt(MRJobConfig.TaskLogBackups, 0) + 1, sysSiblings.Length); NUnit.Framework.Assert.IsTrue("MR syslog.1 length kb should be >= " + userLogKb, sysSiblings[1].GetLen() >= userLogKb * 1024); } } } } // Make sure we checked non-empty set // NUnit.Framework.Assert.AreEqual("No AppMaster log found!", 1, numAppMasters); if (sleepConf.GetBoolean(MRJobConfig.JobUbertaskEnable, false)) { NUnit.Framework.Assert.AreEqual("MapTask log with uber found!", 0, numMapTasks); } else { NUnit.Framework.Assert.AreEqual("No MapTask log found!", 1, numMapTasks); } }
/// <exception cref="System.IO.IOException"/> private static bool GetFlag(JobConf conf, string flag) { FileSystem fs = FileSystem.Get(conf); return(fs.Exists(new Path(GetFlagDir(conf.GetBoolean("localFS", true)), flag))); }
public virtual void Configure(JobConf job) { SetConf(job); fastCheck = job.GetBoolean("fs.test.fastCheck", false); }