internal Fetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V> scheduler, MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics , ExceptionReporter exceptionReporter, SecretKey shuffleKey, int id) { /* Default read timeout (in milliseconds) */ // Initiative value is 0, which means it hasn't retried yet. this.jobConf = job; this.reporter = reporter; this.scheduler = scheduler; this.merger = merger; this.metrics = metrics; this.exceptionReporter = exceptionReporter; this.id = id; this.reduce = reduceId.GetTaskID().GetId(); this.shuffleSecretKey = shuffleKey; ioErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.IoError.ToString ()); wrongLengthErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongLength .ToString()); badIdErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.BadId.ToString ()); wrongMapErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongMap .ToString()); connectionErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.Connection .ToString()); wrongReduceErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongReduce .ToString()); this.connectionTimeout = job.GetInt(MRJobConfig.ShuffleConnectTimeout, DefaultStalledCopyTimeout ); this.readTimeout = job.GetInt(MRJobConfig.ShuffleReadTimeout, DefaultReadTimeout); this.fetchRetryInterval = job.GetInt(MRJobConfig.ShuffleFetchRetryIntervalMs, MRJobConfig .DefaultShuffleFetchRetryIntervalMs); this.fetchRetryTimeout = job.GetInt(MRJobConfig.ShuffleFetchRetryTimeoutMs, DefaultStalledCopyTimeout ); bool shuffleFetchEnabledDefault = job.GetBoolean(YarnConfiguration.NmRecoveryEnabled , YarnConfiguration.DefaultNmRecoveryEnabled); this.fetchRetryEnabled = job.GetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, shuffleFetchEnabledDefault ); SetName("fetcher#" + id); SetDaemon(true); lock (typeof(Org.Apache.Hadoop.Mapreduce.Task.Reduce.Fetcher)) { sslShuffle = job.GetBoolean(MRConfig.ShuffleSslEnabledKey, MRConfig.ShuffleSslEnabledDefault ); if (sslShuffle && sslFactory == null) { sslFactory = new SSLFactory(SSLFactory.Mode.Client, job); try { sslFactory.Init(); } catch (Exception ex) { sslFactory.Destroy(); throw new RuntimeException(ex); } } } }
private static JobEndNotifier.JobEndStatusInfo CreateNotification(JobConf conf, JobStatus status) { JobEndNotifier.JobEndStatusInfo notification = null; string uri = conf.GetJobEndNotificationURI(); if (uri != null) { int retryAttempts = conf.GetInt(JobContext.MrJobEndRetryAttempts, 0); long retryInterval = conf.GetInt(JobContext.MrJobEndRetryInterval, 30000); int timeout = conf.GetInt(JobContext.MrJobEndNotificationTimeout, JobContext.DefaultMrJobEndNotificationTimeout ); if (uri.Contains("$jobId")) { uri = uri.Replace("$jobId", ((JobID)status.GetJobID()).ToString()); } if (uri.Contains("$jobStatus")) { string statusStr = (status.GetRunState() == JobStatus.Succeeded) ? "SUCCEEDED" : (status.GetRunState() == JobStatus.Failed) ? "FAILED" : "KILLED"; uri = uri.Replace("$jobStatus", statusStr); } notification = new JobEndNotifier.JobEndStatusInfo(uri, retryAttempts, retryInterval , timeout); } return(notification); }
public ShuffleSchedulerImpl(JobConf job, TaskStatus status, TaskAttemptID reduceId , ExceptionReporter reporter, Progress progress, Counters.Counter shuffledMapsCounter , Counters.Counter reduceShuffleBytes, Counters.Counter failedShuffleCounter) { referee = new ShuffleSchedulerImpl.Referee(this); totalMaps = job.GetNumMapTasks(); abortFailureLimit = Math.Max(30, totalMaps / 10); copyTimeTracker = new ShuffleSchedulerImpl.CopyTimeTracker(); remainingMaps = totalMaps; finishedMaps = new bool[remainingMaps]; this.reporter = reporter; this.status = status; this.reduceId = reduceId; this.progress = progress; this.shuffledMapsCounter = shuffledMapsCounter; this.reduceShuffleBytes = reduceShuffleBytes; this.failedShuffleCounter = failedShuffleCounter; this.startTime = Time.MonotonicNow(); lastProgressTime = startTime; referee.Start(); this.maxFailedUniqueFetches = Math.Min(totalMaps, 5); this.maxFetchFailuresBeforeReporting = job.GetInt(MRJobConfig.ShuffleFetchFailures , ReportFailureLimit); this.reportReadErrorImmediately = job.GetBoolean(MRJobConfig.ShuffleNotifyReaderror , true); this.maxDelay = job.GetLong(MRJobConfig.MaxShuffleFetchRetryDelay, MRJobConfig.DefaultMaxShuffleFetchRetryDelay ); this.maxHostFailures = job.GetInt(MRJobConfig.MaxShuffleFetchHostFailures, MRJobConfig .DefaultMaxShuffleFetchHostFailures); }
// fill keys, values with ~1.5 blocks for block-compressed seq fill private static void FillBlocks(JobConf conf) { Random r = new Random(); long seed = conf.GetLong("filebench.seed", -1); if (seed > 0) { r.SetSeed(seed); } int keylen = conf.GetInt("filebench.key.words", 5); int vallen = conf.GetInt("filebench.val.words", 20); int acc = (3 * conf.GetInt("io.seqfile.compress.blocksize", 1000000)) >> 1; AList <string> k = new AList <string>(); AList <string> v = new AList <string>(); for (int i = 0; acc > 0; ++i) { string s = GenerateSentence(r, keylen); acc -= s.Length; k.AddItem(s); s = GenerateSentence(r, vallen); acc -= s.Length; v.AddItem(s); } keys = Sharpen.Collections.ToArray(k, new string[0]); values = Sharpen.Collections.ToArray(v, new string[0]); }
public override void Configure(JobConf job) { numBytesToWrite = job.GetLong("test.tmb.bytes_per_map", 128 * 1024 * 1024); minKeySize = job.GetInt("test.tmb.min_key", 10); keySizeRange = job.GetInt("test.tmb.max_key", 10) - minKeySize; minValueSize = job.GetInt("test.tmb.min_value", 10); valueSizeRange = job.GetInt("test.tmb.max_value", 10) - minValueSize; }
public override void Configure(JobConf job) { bytesToWrite = job.GetLong(RandomTextWriter.BytesPerMap, 1 * 1024 * 1024 * 1024); keymin = job.GetInt(RandomTextWriter.MinKey, 5); keymax = job.GetInt(RandomTextWriter.MaxKey, 10); valmin = job.GetInt(RandomTextWriter.MinValue, 5); valmax = job.GetInt(RandomTextWriter.MaxValue, 10); }
/// <summary>When no input dir is specified, generate random data.</summary> /// <exception cref="System.IO.IOException"/> protected internal static void ConfRandom(JobConf job) { // from RandomWriter job.SetInputFormat(typeof(GenericMRLoadGenerator.RandomInputFormat)); job.SetMapperClass(typeof(GenericMRLoadGenerator.RandomMapOutput)); ClusterStatus cluster = new JobClient(job).GetClusterStatus(); int numMapsPerHost = job.GetInt(RandomTextWriter.MapsPerHost, 10); long numBytesToWritePerMap = job.GetLong(RandomTextWriter.BytesPerMap, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { throw new IOException("Cannot have " + RandomTextWriter.BytesPerMap + " set to 0" ); } long totalBytesToWrite = job.GetLong(RandomTextWriter.TotalBytes, numMapsPerHost * numBytesToWritePerMap * cluster.GetTaskTrackers()); int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.SetLong(RandomTextWriter.BytesPerMap, totalBytesToWrite); } job.SetNumMapTasks(numMaps); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void TestFailAbortInternal(int version) { JobConf conf = new JobConf(); conf.Set(FileSystem.FsDefaultNameKey, "faildel:///"); conf.SetClass("fs.faildel.impl", typeof(TestFileOutputCommitter.FakeFileSystem), typeof(FileSystem)); conf.Set(JobContext.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); conf.SetInt(MRConstants.ApplicationAttemptId, 1); FileOutputFormat.SetOutputPath(conf, outDir); JobContext jContext = new JobContextImpl(conf, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output FilePath jobTmpDir = new FilePath(new Path(outDir, FileOutputCommitter.TempDirName + Path.Separator + conf.GetInt(MRConstants.ApplicationAttemptId, 0) + Path.Separator + FileOutputCommitter.TempDirName).ToString()); FilePath taskTmpDir = new FilePath(jobTmpDir, "_" + taskID); FilePath expectedFile = new FilePath(taskTmpDir, partFile); TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(null , conf, expectedFile.GetAbsolutePath(), null); WriteOutput(theRecordWriter, tContext); // do abort Exception th = null; try { committer.AbortTask(tContext); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists ()); th = null; try { committer.AbortJob(jContext, JobStatus.State.Failed); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists()); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
// spilled map records, some records at the reduce public virtual void Configure(JobConf conf) { nMaps = conf.GetNumMapTasks(); id = nMaps - conf.GetInt(JobContext.TaskPartition, -1) - 1; Arrays.Fill(b, 0, 4096, unchecked ((byte)'V')); ((StringBuilder)fmt.Out()).Append(keyfmt); }
/// <summary>List input directories.</summary> /// <remarks> /// List input directories. /// Subclasses may override to, e.g., select only files matching a regular /// expression. /// </remarks> /// <param name="job">the job to list input paths for</param> /// <returns>array of FileStatus objects</returns> /// <exception cref="System.IO.IOException">if zero items.</exception> protected internal virtual FileStatus[] ListStatus(JobConf job) { Path[] dirs = GetInputPaths(job); if (dirs.Length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), dirs, job); // Whether we need to recursive look into the directory structure bool recursive = job.GetBoolean(InputDirRecursive, false); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). IList <PathFilter> filters = new AList <PathFilter>(); filters.AddItem(hiddenFileFilter); PathFilter jobFilter = GetInputPathFilter(job); if (jobFilter != null) { filters.AddItem(jobFilter); } PathFilter inputFilter = new FileInputFormat.MultiPathFilter(filters); FileStatus[] result; int numThreads = job.GetInt(FileInputFormat.ListStatusNumThreads, FileInputFormat .DefaultListStatusNumThreads); StopWatch sw = new StopWatch().Start(); if (numThreads == 1) { IList <FileStatus> locatedFiles = SingleThreadedListStatus(job, dirs, inputFilter, recursive); result = Sharpen.Collections.ToArray(locatedFiles, new FileStatus[locatedFiles.Count ]); } else { IEnumerable <FileStatus> locatedFiles = null; try { LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher( job, dirs, recursive, inputFilter, false); locatedFiles = locatedFileStatusFetcher.GetFileStatuses(); } catch (Exception) { throw new IOException("Interrupted while getting file statuses"); } result = Iterables.ToArray <FileStatus>(locatedFiles); } sw.Stop(); if (Log.IsDebugEnabled()) { Log.Debug("Time taken to get FileStatuses: " + sw.Now(TimeUnit.Milliseconds)); } Log.Info("Total input paths to process : " + result.Length); return(result); }
/// <exception cref="System.IO.IOException"/> internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces , Path sortInput, Path sortOutput) { JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker)); jobConf.SetJobName("sortvalidate-record-checker"); jobConf.SetInputFormat(typeof(SequenceFileInputFormat)); jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat)); jobConf.SetOutputKeyClass(typeof(BytesWritable)); jobConf.SetOutputValueClass(typeof(IntWritable)); jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map)); jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce)); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.GetClusterStatus(); if (noMaps == -1) { noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10); } if (noReduces == -1) { noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9); string sortReduces = jobConf.Get(ReducesPerHost); if (sortReduces != null) { noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces); } } jobConf.SetNumMapTasks(noMaps); jobConf.SetNumReduceTasks(noReduces); FileInputFormat.SetInputPaths(jobConf, sortInput); FileInputFormat.AddInputPath(jobConf, sortOutput); Path outputPath = new Path("/tmp/sortvalidate/recordchecker"); FileSystem fs = FileSystem.Get(defaults); if (fs.Exists(outputPath)) { fs.Delete(outputPath, true); } FileOutputFormat.SetOutputPath(jobConf, outputPath); // Uncomment to run locally in a single process //job_conf.set(JTConfig.JT, "local"); Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf); System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces + " reduces."); DateTime startTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); JobClient.RunJob(jobConf); DateTime end_time = new DateTime(); System.Console.Out.WriteLine("Job ended: " + end_time); System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime ()) / 1000 + " seconds."); }
// Dummy Input format to send 1 record - number of spits is numMapTasks public virtual InputSplit[] GetSplits(JobConf conf, int numSplits) { numSplits = conf.GetInt("LG.numMapTasks", 1); InputSplit[] ret = new InputSplit[numSplits]; for (int i = 0; i < numSplits; ++i) { ret[i] = new LoadGeneratorMR.EmptySplit(); } return(ret); }
internal ShuffleClientMetrics(TaskAttemptID reduceId, JobConf jobConf) { this.numCopiers = jobConf.GetInt(MRJobConfig.ShuffleParallelCopies, 5); MetricsContext metricsContext = MetricsUtil.GetContext("mapred"); this.shuffleMetrics = MetricsUtil.CreateRecord(metricsContext, "shuffleInput"); this.shuffleMetrics.SetTag("user", jobConf.GetUser()); this.shuffleMetrics.SetTag("jobName", jobConf.GetJobName()); this.shuffleMetrics.SetTag("jobId", reduceId.GetJobID().ToString()); this.shuffleMetrics.SetTag("taskId", reduceId.ToString()); this.shuffleMetrics.SetTag("sessionId", jobConf.GetSessionId()); metricsContext.RegisterUpdater(this); }
/// <summary>Configures all the chain elements for the task.</summary> /// <param name="jobConf">chain job's JobConf.</param> public virtual void Configure(JobConf jobConf) { string prefix = GetPrefix(isMap); chainJobConf = jobConf; SerializationFactory serializationFactory = new SerializationFactory(chainJobConf ); int index = jobConf.GetInt(prefix + ChainMapperSize, 0); for (int i = 0; i < index; i++) { Type klass = jobConf.GetClass <Mapper>(prefix + ChainMapperClass + i, null); JobConf mConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainMapperConfig + i)); Mapper mapper = ReflectionUtils.NewInstance(klass, mConf); mappers.AddItem(mapper); if (mConf.GetBoolean(MapperByValue, true)) { mappersKeySerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass (MapperOutputKeyClass, null))); mappersValueSerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass (MapperOutputValueClass, null))); } else { mappersKeySerialization.AddItem(null); mappersValueSerialization.AddItem(null); } } Type klass_1 = jobConf.GetClass <Reducer>(prefix + ChainReducerClass, null); if (klass_1 != null) { JobConf rConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainReducerConfig )); reducer = ReflectionUtils.NewInstance(klass_1, rConf); if (rConf.GetBoolean(ReducerByValue, true)) { reducerKeySerialization = serializationFactory.GetSerialization(rConf.GetClass(ReducerOutputKeyClass , null)); reducerValueSerialization = serializationFactory.GetSerialization(rConf.GetClass( ReducerOutputValueClass, null)); } else { reducerKeySerialization = null; reducerValueSerialization = null; } } }
/// <summary>Helper function to generate a name that is unique for the task.</summary> /// <remarks> /// Helper function to generate a name that is unique for the task. /// <p>The generated name can be used to create custom files from within the /// different tasks for the job, the names for different tasks will not collide /// with each other.</p> /// <p>The given name is postfixed with the task type, 'm' for maps, 'r' for /// reduces and the task partition number. For example, give a name 'test' /// running on the first map o the job the generated name will be /// 'test-m-00000'.</p> /// </remarks> /// <param name="conf">the configuration for the job.</param> /// <param name="name">the name to make unique.</param> /// <returns>a unique name accross all tasks of the job.</returns> public static string GetUniqueName(JobConf conf, string name) { int partition = conf.GetInt(JobContext.TaskPartition, -1); if (partition == -1) { throw new ArgumentException("This method can only be called from within a Job"); } string taskType = conf.GetBoolean(JobContext.TaskIsmap, JobContext.DefaultTaskIsmap ) ? "m" : "r"; NumberFormat numberFormat = NumberFormat.GetInstance(); numberFormat.SetMinimumIntegerDigits(5); numberFormat.SetGroupingUsed(false); return(name + "-" + taskType + "-" + numberFormat.Format(partition)); }
private static AList <ValueAggregatorDescriptor> GetAggregatorDescriptors(JobConf job) { string advn = "aggregator.descriptor"; int num = job.GetInt(advn + ".num", 0); AList <ValueAggregatorDescriptor> retv = new AList <ValueAggregatorDescriptor>(num); for (int i = 0; i < num; i++) { string spec = job.Get(advn + "." + i); ValueAggregatorDescriptor ad = GetValueAggregatorDescriptor(spec, job); if (ad != null) { retv.AddItem(ad); } } return(retv); }
public virtual void Configure(JobConf jobConf) { int numberOfThreads = jobConf.GetInt(MultithreadedMapper.NumThreads, 10); if (Log.IsDebugEnabled()) { Log.Debug("Configuring jobConf " + jobConf.GetJobName() + " to use " + numberOfThreads + " threads"); } this.job = jobConf; //increment processed counter only if skipping feature is enabled this.incrProcCount = SkipBadRecords.GetMapperMaxSkipRecords(job) > 0 && SkipBadRecords .GetAutoIncrMapperProcCount(job); this.mapper = ReflectionUtils.NewInstance(jobConf.GetMapperClass(), jobConf); // Creating a threadpool of the configured size to execute the Mapper // map method in parallel. executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 0L, TimeUnit .Milliseconds, new MultithreadedMapRunner.BlockingArrayQueue(numberOfThreads)); }
public virtual void Configure(JobConf conf) { SetConf(conf); try { fs = FileSystem.Get(conf); } catch (Exception e) { throw new RuntimeException("Cannot create file system.", e); } bufferSize = conf.GetInt("test.io.file.buffer.size", 4096); buffer = new byte[bufferSize]; try { hostName = Sharpen.Runtime.GetLocalHost().GetHostName(); } catch (Exception) { hostName = "localhost"; } }
/// <summary>Generate the outfile name based on a given anme and the input file name. /// </summary> /// <remarks> /// Generate the outfile name based on a given anme and the input file name. If /// the /// <see cref="Org.Apache.Hadoop.Mapreduce.MRJobConfig.MapInputFile"/> /// does not exists (i.e. this is not for a map only job), /// the given name is returned unchanged. If the config value for /// "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given /// name is returned unchanged. Otherwise, return a file name consisting of the /// N trailing legs of the input file name where N is the config value for /// "num.of.trailing.legs.to.use". /// </remarks> /// <param name="job">the job config</param> /// <param name="name">the output file name</param> /// <returns>the outfile name based on a given anme and the input file name.</returns> protected internal virtual string GetInputFileBasedOutputFileName(JobConf job, string name) { string infilepath = job.Get(MRJobConfig.MapInputFile); if (infilepath == null) { // if the {@link JobContext#MAP_INPUT_FILE} does not exists, // then return the given name return(name); } int numOfTrailingLegsToUse = job.GetInt("mapred.outputformat.numOfTrailingLegs", 0); if (numOfTrailingLegsToUse <= 0) { return(name); } Path infile = new Path(infilepath); Path parent = infile.GetParent(); string midName = infile.GetName(); Path outPath = new Path(midName); for (int i = 1; i < numOfTrailingLegsToUse; i++) { if (parent == null) { break; } midName = parent.GetName(); if (midName.Length == 0) { break; } parent = parent.GetParent(); outPath = new Path(midName, outPath); } return(outPath.ToString()); }
public override void Configure(JobConf job) { // 'key' == sortInput for sort-input; key == sortOutput for sort-output key = DeduceInputFile(job); if (key == sortOutput) { partitioner = new HashPartitioner <WritableComparable, Writable>(); // Figure the 'current' partition and no. of reduces of the 'sort' try { URI inputURI = new URI(job.Get(JobContext.MapInputFile)); string inputFile = inputURI.GetPath(); // part file is of the form part-r-xxxxx partition = Sharpen.Extensions.ValueOf(Sharpen.Runtime.Substring(inputFile, inputFile .LastIndexOf("part") + 7)); noSortReducers = job.GetInt(SortReduces, -1); } catch (Exception e) { System.Console.Error.WriteLine("Caught: " + e); System.Environment.Exit(-1); } } }
public override void Configure(JobConf job) { pattern = Sharpen.Pattern.Compile(job.Get(RegexMapper.Pattern)); group = job.GetInt(RegexMapper.Group, 0); }
public virtual int GetPartition(Text key, Text value, int numPartitions) { int keyValue = 0; try { keyValue = System.Convert.ToInt32(key.ToString()); } catch (FormatException) { keyValue = 0; } int partitionNumber = (numPartitions * (Math.Max(0, keyValue - 1))) / job.GetInt( "mapred.test.num_lines", 10000); return(partitionNumber); }
/// <summary> /// This launches a given namenode operation (<code>-operation</code>), /// starting at a given time (<code>-startTime</code>). /// </summary> /// <remarks> /// This launches a given namenode operation (<code>-operation</code>), /// starting at a given time (<code>-startTime</code>). The files used /// by the openRead, rename, and delete operations are the same files /// created by the createWrite operation. Typically, the program /// would be run four times, once for each operation in this order: /// createWrite, openRead, rename, delete. /// <pre> /// Usage: nnbench /// -operation <one of createWrite, openRead, rename, or delete> /// -baseDir <base output/input DFS path> /// -startTime <time to start, given in seconds from the epoch> /// -numFiles <number of files to create, read, rename, or delete> /// -blocksPerFile <number of blocks to create per file> /// [-bytesPerBlock <number of bytes to write to each block, default is 1>] /// [-bytesPerChecksum <value for io.bytes.per.checksum>] /// </pre> /// </remarks> /// <param name="args">is an array of the program command line arguments</param> /// <exception cref="System.IO.IOException">indicates a problem with test startup</exception> public static void Main(string[] args) { string version = "NameNodeBenchmark.0.3"; System.Console.Out.WriteLine(version); int bytesPerChecksum = -1; string usage = "Usage: nnbench " + " -operation <one of createWrite, openRead, rename, or delete> " + " -baseDir <base output/input DFS path> " + " -startTime <time to start, given in seconds from the epoch> " + " -numFiles <number of files to create> " + " -blocksPerFile <number of blocks to create per file> " + " [-bytesPerBlock <number of bytes to write to each block, default is 1>] " + " [-bytesPerChecksum <value for io.bytes.per.checksum>]" + "Note: bytesPerBlock MUST be a multiple of bytesPerChecksum"; string operation = null; for (int i = 0; i < args.Length; i++) { // parse command line if (args[i].Equals("-baseDir")) { baseDir = new Path(args[++i]); } else { if (args[i].Equals("-numFiles")) { numFiles = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-blocksPerFile")) { blocksPerFile = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-bytesPerBlock")) { bytesPerBlock = long.Parse(args[++i]); } else { if (args[i].Equals("-bytesPerChecksum")) { bytesPerChecksum = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-startTime")) { startTime = long.Parse(args[++i]) * 1000; } else { if (args[i].Equals("-operation")) { operation = args[++i]; } else { System.Console.Out.WriteLine(usage); System.Environment.Exit(-1); } } } } } } } } bytesPerFile = bytesPerBlock * blocksPerFile; JobConf jobConf = new JobConf(new Configuration(), typeof(NNBench)); if (bytesPerChecksum < 0) { // if it is not set in cmdline bytesPerChecksum = jobConf.GetInt("io.bytes.per.checksum", 512); } jobConf.Set("io.bytes.per.checksum", Sharpen.Extensions.ToString(bytesPerChecksum )); System.Console.Out.WriteLine("Inputs: "); System.Console.Out.WriteLine(" operation: " + operation); System.Console.Out.WriteLine(" baseDir: " + baseDir); System.Console.Out.WriteLine(" startTime: " + startTime); System.Console.Out.WriteLine(" numFiles: " + numFiles); System.Console.Out.WriteLine(" blocksPerFile: " + blocksPerFile); System.Console.Out.WriteLine(" bytesPerBlock: " + bytesPerBlock); System.Console.Out.WriteLine(" bytesPerChecksum: " + bytesPerChecksum); if (operation == null || baseDir == null || numFiles < 1 || blocksPerFile < 1 || bytesPerBlock < 0 || bytesPerBlock % bytesPerChecksum != 0) { // verify args System.Console.Error.WriteLine(usage); System.Environment.Exit(-1); } fileSys = FileSystem.Get(jobConf); string uniqueId = Sharpen.Runtime.GetLocalHost().GetHostName(); taskDir = new Path(baseDir, uniqueId); // initialize buffer used for writing/reading file buffer = new byte[(int)Math.Min(bytesPerFile, 32768L)]; DateTime execTime; DateTime endTime; long duration; int exceptions = 0; Barrier(); // wait for coordinated start time execTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); if (operation.Equals("createWrite")) { if (!fileSys.Mkdirs(taskDir)) { throw new IOException("Mkdirs failed to create " + taskDir.ToString()); } exceptions = CreateWrite(); } else { if (operation.Equals("openRead")) { exceptions = OpenRead(); } else { if (operation.Equals("rename")) { exceptions = Rename(); } else { if (operation.Equals("delete")) { exceptions = Delete(); } else { System.Console.Error.WriteLine(usage); System.Environment.Exit(-1); } } } } endTime = new DateTime(); System.Console.Out.WriteLine("Job ended: " + endTime); duration = (endTime.GetTime() - execTime.GetTime()) / 1000; System.Console.Out.WriteLine("The " + operation + " job took " + duration + " seconds." ); System.Console.Out.WriteLine("The job recorded " + exceptions + " exceptions."); }
// TODO later: add explicit "isUber()" checks of some sort /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public virtual void TestContainerRollingLog() { if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists()) { Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test." ); return; } SleepJob sleepJob = new SleepJob(); JobConf sleepConf = new JobConf(mrCluster.GetConfig()); sleepConf.Set(MRJobConfig.MapLogLevel, Level.All.ToString()); long userLogKb = 4; sleepConf.SetLong(MRJobConfig.TaskUserlogLimit, userLogKb); sleepConf.SetInt(MRJobConfig.TaskLogBackups, 3); sleepConf.Set(MRJobConfig.MrAmLogLevel, Level.All.ToString()); long amLogKb = 7; sleepConf.SetLong(MRJobConfig.MrAmLogKb, amLogKb); sleepConf.SetInt(MRJobConfig.MrAmLogBackups, 7); sleepJob.SetConf(sleepConf); Job job = sleepJob.CreateJob(1, 0, 1L, 100, 0L, 0); job.SetJarByClass(typeof(SleepJob)); job.AddFileToClassPath(AppJar); // The AppMaster jar itself. job.WaitForCompletion(true); JobId jobId = TypeConverter.ToYarn(job.GetJobID()); ApplicationId appID = jobId.GetAppId(); int pollElapsed = 0; while (true) { Sharpen.Thread.Sleep(1000); pollElapsed += 1000; if (TerminalRmAppStates.Contains(mrCluster.GetResourceManager().GetRMContext().GetRMApps ()[appID].GetState())) { break; } if (pollElapsed >= 60000) { Log.Warn("application did not reach terminal state within 60 seconds"); break; } } NUnit.Framework.Assert.AreEqual(RMAppState.Finished, mrCluster.GetResourceManager ().GetRMContext().GetRMApps()[appID].GetState()); // Job finished, verify logs // string appIdStr = appID.ToString(); string appIdSuffix = Sharpen.Runtime.Substring(appIdStr, "application_".Length, appIdStr .Length); string containerGlob = "container_" + appIdSuffix + "_*_*"; string syslogGlob = appIdStr + Path.Separator + containerGlob + Path.Separator + TaskLog.LogName.Syslog; int numAppMasters = 0; int numMapTasks = 0; for (int i = 0; i < NumNodeMgrs; i++) { Configuration nmConf = mrCluster.GetNodeManager(i).GetConfig(); foreach (string logDir in nmConf.GetTrimmedStrings(YarnConfiguration.NmLogDirs)) { Path absSyslogGlob = new Path(logDir + Path.Separator + syslogGlob); Log.Info("Checking for glob: " + absSyslogGlob); FileStatus[] syslogs = localFs.GlobStatus(absSyslogGlob); foreach (FileStatus slog in syslogs) { bool foundAppMaster = job.IsUber(); Path containerPathComponent = slog.GetPath().GetParent(); if (!foundAppMaster) { ContainerId cid = ConverterUtils.ToContainerId(containerPathComponent.GetName()); foundAppMaster = ((cid.GetContainerId() & ContainerId.ContainerIdBitmask) == 1); } FileStatus[] sysSiblings = localFs.GlobStatus(new Path(containerPathComponent, TaskLog.LogName .Syslog + "*")); // sort to ensure for i > 0 sysSiblings[i] == "syslog.i" Arrays.Sort(sysSiblings); if (foundAppMaster) { numAppMasters++; } else { numMapTasks++; } if (foundAppMaster) { NUnit.Framework.Assert.AreSame("Unexpected number of AM sylog* files", sleepConf. GetInt(MRJobConfig.MrAmLogBackups, 0) + 1, sysSiblings.Length); NUnit.Framework.Assert.IsTrue("AM syslog.1 length kb should be >= " + amLogKb, sysSiblings [1].GetLen() >= amLogKb * 1024); } else { NUnit.Framework.Assert.AreSame("Unexpected number of MR task sylog* files", sleepConf .GetInt(MRJobConfig.TaskLogBackups, 0) + 1, sysSiblings.Length); NUnit.Framework.Assert.IsTrue("MR syslog.1 length kb should be >= " + userLogKb, sysSiblings[1].GetLen() >= userLogKb * 1024); } } } } // Make sure we checked non-empty set // NUnit.Framework.Assert.AreEqual("No AppMaster log found!", 1, numAppMasters); if (sleepConf.GetBoolean(MRJobConfig.JobUbertaskEnable, false)) { NUnit.Framework.Assert.AreEqual("MapTask log with uber found!", 0, numMapTasks); } else { NUnit.Framework.Assert.AreEqual("No MapTask log found!", 1, numMapTasks); } }
public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS , LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec , Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase , MapOutputFile mapOutputFile) { /* Maximum percentage of the in-memory limit that a single shuffle can * consume*/ this.reduceId = reduceId; this.jobConf = jobConf; this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.reporter = reporter; this.codec = codec; this.combinerClass = combinerClass; this.combineCollector = combineCollector; this.reduceCombineInputCounter = reduceCombineInputCounter; this.spilledRecordsCounter = spilledRecordsCounter; this.mergedMapOutputsCounter = mergedMapOutputsCounter; this.mapOutputFile = mapOutputFile; this.mapOutputFile.SetConf(jobConf); this.localFS = localFS; this.rfs = ((LocalFileSystem)localFS).GetRaw(); float maxInMemCopyUse = jobConf.GetFloat(MRJobConfig.ShuffleInputBufferPercent, MRJobConfig .DefaultShuffleInputBufferPercent); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleInputBufferPercent + ": " + maxInMemCopyUse); } // Allow unit tests to fix Runtime memory this.memoryLimit = (long)(jobConf.GetLong(MRJobConfig.ReduceMemoryTotalBytes, Runtime .GetRuntime().MaxMemory()) * maxInMemCopyUse); this.ioSortFactor = jobConf.GetInt(MRJobConfig.IoSortFactor, 100); float singleShuffleMemoryLimitPercent = jobConf.GetFloat(MRJobConfig.ShuffleMemoryLimitPercent , DefaultShuffleMemoryLimitPercent); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleMemoryLimitPercent + ": " + singleShuffleMemoryLimitPercent); } usedMemory = 0L; commitMemory = 0L; this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent ); this.memToMemMergeOutputsThreshold = jobConf.GetInt(MRJobConfig.ReduceMemtomemThreshold , ioSortFactor); this.mergeThreshold = (long)(this.memoryLimit * jobConf.GetFloat(MRJobConfig.ShuffleMergePercent , 0.90f)); Log.Info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold ); if (this.maxSingleShuffleLimit >= this.mergeThreshold) { throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold " + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " + this.mergeThreshold); } bool allowMemToMemMerge = jobConf.GetBoolean(MRJobConfig.ReduceMemtomemEnabled, false ); if (allowMemToMemMerge) { this.memToMemMerger = new MergeManagerImpl.IntermediateMemoryToMemoryMerger(this, this, memToMemMergeOutputsThreshold); this.memToMemMerger.Start(); } else { this.memToMemMerger = null; } this.inMemoryMerger = CreateInMemoryMerger(); this.inMemoryMerger.Start(); this.onDiskMerger = new MergeManagerImpl.OnDiskMerger(this, this); this.onDiskMerger.Start(); this.mergePhase = mergePhase; }
public IndexCache(JobConf conf) { this.conf = conf; totalMemoryAllowed = conf.GetInt(TTConfig.TtIndexCache, 10) * 1024 * 1024; Log.Info("IndexCache created with max memory = " + totalMemoryAllowed); }
public virtual void Configure(JobConf conf) { N = conf.GetInt("mapreduce.input.lineinputformat.linespermap", 1); }
public virtual void Configure(JobConf job) { srcs = job.GetInt("testdatamerge.sources", 0); NUnit.Framework.Assert.IsTrue("Invalid src count: " + srcs, srcs > 0); }