Exemple #1
0
        internal Fetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V>
                         scheduler, MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics
                         , ExceptionReporter exceptionReporter, SecretKey shuffleKey, int id)
        {
            /* Default read timeout (in milliseconds) */
            // Initiative value is 0, which means it hasn't retried yet.
            this.jobConf           = job;
            this.reporter          = reporter;
            this.scheduler         = scheduler;
            this.merger            = merger;
            this.metrics           = metrics;
            this.exceptionReporter = exceptionReporter;
            this.id               = id;
            this.reduce           = reduceId.GetTaskID().GetId();
            this.shuffleSecretKey = shuffleKey;
            ioErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.IoError.ToString
                                             ());
            wrongLengthErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongLength
                                                  .ToString());
            badIdErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.BadId.ToString
                                                ());
            wrongMapErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongMap
                                               .ToString());
            connectionErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.Connection
                                                 .ToString());
            wrongReduceErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongReduce
                                                  .ToString());
            this.connectionTimeout = job.GetInt(MRJobConfig.ShuffleConnectTimeout, DefaultStalledCopyTimeout
                                                );
            this.readTimeout        = job.GetInt(MRJobConfig.ShuffleReadTimeout, DefaultReadTimeout);
            this.fetchRetryInterval = job.GetInt(MRJobConfig.ShuffleFetchRetryIntervalMs, MRJobConfig
                                                 .DefaultShuffleFetchRetryIntervalMs);
            this.fetchRetryTimeout = job.GetInt(MRJobConfig.ShuffleFetchRetryTimeoutMs, DefaultStalledCopyTimeout
                                                );
            bool shuffleFetchEnabledDefault = job.GetBoolean(YarnConfiguration.NmRecoveryEnabled
                                                             , YarnConfiguration.DefaultNmRecoveryEnabled);

            this.fetchRetryEnabled = job.GetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, shuffleFetchEnabledDefault
                                                    );
            SetName("fetcher#" + id);
            SetDaemon(true);
            lock (typeof(Org.Apache.Hadoop.Mapreduce.Task.Reduce.Fetcher))
            {
                sslShuffle = job.GetBoolean(MRConfig.ShuffleSslEnabledKey, MRConfig.ShuffleSslEnabledDefault
                                            );
                if (sslShuffle && sslFactory == null)
                {
                    sslFactory = new SSLFactory(SSLFactory.Mode.Client, job);
                    try
                    {
                        sslFactory.Init();
                    }
                    catch (Exception ex)
                    {
                        sslFactory.Destroy();
                        throw new RuntimeException(ex);
                    }
                }
            }
        }
        private static JobEndNotifier.JobEndStatusInfo CreateNotification(JobConf conf, JobStatus
                                                                          status)
        {
            JobEndNotifier.JobEndStatusInfo notification = null;
            string uri = conf.GetJobEndNotificationURI();

            if (uri != null)
            {
                int  retryAttempts = conf.GetInt(JobContext.MrJobEndRetryAttempts, 0);
                long retryInterval = conf.GetInt(JobContext.MrJobEndRetryInterval, 30000);
                int  timeout       = conf.GetInt(JobContext.MrJobEndNotificationTimeout, JobContext.DefaultMrJobEndNotificationTimeout
                                                 );
                if (uri.Contains("$jobId"))
                {
                    uri = uri.Replace("$jobId", ((JobID)status.GetJobID()).ToString());
                }
                if (uri.Contains("$jobStatus"))
                {
                    string statusStr = (status.GetRunState() == JobStatus.Succeeded) ? "SUCCEEDED" :
                                       (status.GetRunState() == JobStatus.Failed) ? "FAILED" : "KILLED";
                    uri = uri.Replace("$jobStatus", statusStr);
                }
                notification = new JobEndNotifier.JobEndStatusInfo(uri, retryAttempts, retryInterval
                                                                   , timeout);
            }
            return(notification);
        }
Exemple #3
0
 public ShuffleSchedulerImpl(JobConf job, TaskStatus status, TaskAttemptID reduceId
                             , ExceptionReporter reporter, Progress progress, Counters.Counter shuffledMapsCounter
                             , Counters.Counter reduceShuffleBytes, Counters.Counter failedShuffleCounter)
 {
     referee                   = new ShuffleSchedulerImpl.Referee(this);
     totalMaps                 = job.GetNumMapTasks();
     abortFailureLimit         = Math.Max(30, totalMaps / 10);
     copyTimeTracker           = new ShuffleSchedulerImpl.CopyTimeTracker();
     remainingMaps             = totalMaps;
     finishedMaps              = new bool[remainingMaps];
     this.reporter             = reporter;
     this.status               = status;
     this.reduceId             = reduceId;
     this.progress             = progress;
     this.shuffledMapsCounter  = shuffledMapsCounter;
     this.reduceShuffleBytes   = reduceShuffleBytes;
     this.failedShuffleCounter = failedShuffleCounter;
     this.startTime            = Time.MonotonicNow();
     lastProgressTime          = startTime;
     referee.Start();
     this.maxFailedUniqueFetches          = Math.Min(totalMaps, 5);
     this.maxFetchFailuresBeforeReporting = job.GetInt(MRJobConfig.ShuffleFetchFailures
                                                       , ReportFailureLimit);
     this.reportReadErrorImmediately = job.GetBoolean(MRJobConfig.ShuffleNotifyReaderror
                                                      , true);
     this.maxDelay = job.GetLong(MRJobConfig.MaxShuffleFetchRetryDelay, MRJobConfig.DefaultMaxShuffleFetchRetryDelay
                                 );
     this.maxHostFailures = job.GetInt(MRJobConfig.MaxShuffleFetchHostFailures, MRJobConfig
                                       .DefaultMaxShuffleFetchHostFailures);
 }
Exemple #4
0
        // fill keys, values with ~1.5 blocks for block-compressed seq fill
        private static void FillBlocks(JobConf conf)
        {
            Random r    = new Random();
            long   seed = conf.GetLong("filebench.seed", -1);

            if (seed > 0)
            {
                r.SetSeed(seed);
            }
            int            keylen = conf.GetInt("filebench.key.words", 5);
            int            vallen = conf.GetInt("filebench.val.words", 20);
            int            acc    = (3 * conf.GetInt("io.seqfile.compress.blocksize", 1000000)) >> 1;
            AList <string> k      = new AList <string>();
            AList <string> v      = new AList <string>();

            for (int i = 0; acc > 0; ++i)
            {
                string s = GenerateSentence(r, keylen);
                acc -= s.Length;
                k.AddItem(s);
                s    = GenerateSentence(r, vallen);
                acc -= s.Length;
                v.AddItem(s);
            }
            keys   = Sharpen.Collections.ToArray(k, new string[0]);
            values = Sharpen.Collections.ToArray(v, new string[0]);
        }
 public override void Configure(JobConf job)
 {
     numBytesToWrite = job.GetLong("test.tmb.bytes_per_map", 128 * 1024 * 1024);
     minKeySize      = job.GetInt("test.tmb.min_key", 10);
     keySizeRange    = job.GetInt("test.tmb.max_key", 10) - minKeySize;
     minValueSize    = job.GetInt("test.tmb.min_value", 10);
     valueSizeRange  = job.GetInt("test.tmb.max_value", 10) - minValueSize;
 }
Exemple #6
0
 public override void Configure(JobConf job)
 {
     bytesToWrite = job.GetLong(RandomTextWriter.BytesPerMap, 1 * 1024 * 1024 * 1024);
     keymin       = job.GetInt(RandomTextWriter.MinKey, 5);
     keymax       = job.GetInt(RandomTextWriter.MaxKey, 10);
     valmin       = job.GetInt(RandomTextWriter.MinValue, 5);
     valmax       = job.GetInt(RandomTextWriter.MaxValue, 10);
 }
Exemple #7
0
        /// <summary>When no input dir is specified, generate random data.</summary>
        /// <exception cref="System.IO.IOException"/>
        protected internal static void ConfRandom(JobConf job)
        {
            // from RandomWriter
            job.SetInputFormat(typeof(GenericMRLoadGenerator.RandomInputFormat));
            job.SetMapperClass(typeof(GenericMRLoadGenerator.RandomMapOutput));
            ClusterStatus cluster               = new JobClient(job).GetClusterStatus();
            int           numMapsPerHost        = job.GetInt(RandomTextWriter.MapsPerHost, 10);
            long          numBytesToWritePerMap = job.GetLong(RandomTextWriter.BytesPerMap, 1 * 1024 *
                                                              1024 * 1024);

            if (numBytesToWritePerMap == 0)
            {
                throw new IOException("Cannot have " + RandomTextWriter.BytesPerMap + " set to 0"
                                      );
            }
            long totalBytesToWrite = job.GetLong(RandomTextWriter.TotalBytes, numMapsPerHost
                                                 * numBytesToWritePerMap * cluster.GetTaskTrackers());
            int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);

            if (numMaps == 0 && totalBytesToWrite > 0)
            {
                numMaps = 1;
                job.SetLong(RandomTextWriter.BytesPerMap, totalBytesToWrite);
            }
            job.SetNumMapTasks(numMaps);
        }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private void TestFailAbortInternal(int version)
        {
            JobConf conf = new JobConf();

            conf.Set(FileSystem.FsDefaultNameKey, "faildel:///");
            conf.SetClass("fs.faildel.impl", typeof(TestFileOutputCommitter.FakeFileSystem),
                          typeof(FileSystem));
            conf.Set(JobContext.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            conf.SetInt(MRConstants.ApplicationAttemptId, 1);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobContext          jContext  = new JobContextImpl(conf, ((JobID)taskID.GetJobID()));
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter();

            // do setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            FilePath jobTmpDir = new FilePath(new Path(outDir, FileOutputCommitter.TempDirName
                                                       + Path.Separator + conf.GetInt(MRConstants.ApplicationAttemptId, 0) + Path.Separator
                                                       + FileOutputCommitter.TempDirName).ToString());
            FilePath taskTmpDir   = new FilePath(jobTmpDir, "_" + taskID);
            FilePath expectedFile = new FilePath(taskTmpDir, partFile);
            TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat();
            RecordWriter <object, object>     theRecordWriter = theOutputFormat.GetRecordWriter(null
                                                                                                , conf, expectedFile.GetAbsolutePath(), null);

            WriteOutput(theRecordWriter, tContext);
            // do abort
            Exception th = null;

            try
            {
                committer.AbortTask(tContext);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists
                                              ());
            th = null;
            try
            {
                committer.AbortJob(jContext, JobStatus.State.Failed);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists());
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Exemple #9
0
 // spilled map records, some records at the reduce
 public virtual void Configure(JobConf conf)
 {
     nMaps = conf.GetNumMapTasks();
     id    = nMaps - conf.GetInt(JobContext.TaskPartition, -1) - 1;
     Arrays.Fill(b, 0, 4096, unchecked ((byte)'V'));
     ((StringBuilder)fmt.Out()).Append(keyfmt);
 }
        /// <summary>List input directories.</summary>
        /// <remarks>
        /// List input directories.
        /// Subclasses may override to, e.g., select only files matching a regular
        /// expression.
        /// </remarks>
        /// <param name="job">the job to list input paths for</param>
        /// <returns>array of FileStatus objects</returns>
        /// <exception cref="System.IO.IOException">if zero items.</exception>
        protected internal virtual FileStatus[] ListStatus(JobConf job)
        {
            Path[] dirs = GetInputPaths(job);
            if (dirs.Length == 0)
            {
                throw new IOException("No input paths specified in job");
            }
            // get tokens for all the required FileSystems..
            TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), dirs, job);
            // Whether we need to recursive look into the directory structure
            bool recursive = job.GetBoolean(InputDirRecursive, false);
            // creates a MultiPathFilter with the hiddenFileFilter and the
            // user provided one (if any).
            IList <PathFilter> filters = new AList <PathFilter>();

            filters.AddItem(hiddenFileFilter);
            PathFilter jobFilter = GetInputPathFilter(job);

            if (jobFilter != null)
            {
                filters.AddItem(jobFilter);
            }
            PathFilter inputFilter = new FileInputFormat.MultiPathFilter(filters);

            FileStatus[] result;
            int          numThreads = job.GetInt(FileInputFormat.ListStatusNumThreads, FileInputFormat
                                                 .DefaultListStatusNumThreads);
            StopWatch sw = new StopWatch().Start();

            if (numThreads == 1)
            {
                IList <FileStatus> locatedFiles = SingleThreadedListStatus(job, dirs, inputFilter,
                                                                           recursive);
                result = Sharpen.Collections.ToArray(locatedFiles, new FileStatus[locatedFiles.Count
                                                     ]);
            }
            else
            {
                IEnumerable <FileStatus> locatedFiles = null;
                try
                {
                    LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
                        job, dirs, recursive, inputFilter, false);
                    locatedFiles = locatedFileStatusFetcher.GetFileStatuses();
                }
                catch (Exception)
                {
                    throw new IOException("Interrupted while getting file statuses");
                }
                result = Iterables.ToArray <FileStatus>(locatedFiles);
            }
            sw.Stop();
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Time taken to get FileStatuses: " + sw.Now(TimeUnit.Milliseconds));
            }
            Log.Info("Total input paths to process : " + result.Length);
            return(result);
        }
Exemple #11
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces
                                              , Path sortInput, Path sortOutput)
            {
                JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker));

                jobConf.SetJobName("sortvalidate-record-checker");
                jobConf.SetInputFormat(typeof(SequenceFileInputFormat));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(BytesWritable));
                jobConf.SetOutputValueClass(typeof(IntWritable));
                jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map));
                jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce));
                JobClient     client  = new JobClient(jobConf);
                ClusterStatus cluster = client.GetClusterStatus();

                if (noMaps == -1)
                {
                    noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10);
                }
                if (noReduces == -1)
                {
                    noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9);
                    string sortReduces = jobConf.Get(ReducesPerHost);
                    if (sortReduces != null)
                    {
                        noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces);
                    }
                }
                jobConf.SetNumMapTasks(noMaps);
                jobConf.SetNumReduceTasks(noReduces);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path       outputPath = new Path("/tmp/sortvalidate/recordchecker");
                FileSystem fs         = FileSystem.Get(defaults);

                if (fs.Exists(outputPath))
                {
                    fs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster
                                             .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths
                                             [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces
                                             + " reduces.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                DateTime end_time = new DateTime();

                System.Console.Out.WriteLine("Job ended: " + end_time);
                System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                    ()) / 1000 + " seconds.");
            }
Exemple #12
0
 // Dummy Input format to send 1 record - number of spits is numMapTasks
 public virtual InputSplit[] GetSplits(JobConf conf, int numSplits)
 {
     numSplits = conf.GetInt("LG.numMapTasks", 1);
     InputSplit[] ret = new InputSplit[numSplits];
     for (int i = 0; i < numSplits; ++i)
     {
         ret[i] = new LoadGeneratorMR.EmptySplit();
     }
     return(ret);
 }
        internal ShuffleClientMetrics(TaskAttemptID reduceId, JobConf jobConf)
        {
            this.numCopiers = jobConf.GetInt(MRJobConfig.ShuffleParallelCopies, 5);
            MetricsContext metricsContext = MetricsUtil.GetContext("mapred");

            this.shuffleMetrics = MetricsUtil.CreateRecord(metricsContext, "shuffleInput");
            this.shuffleMetrics.SetTag("user", jobConf.GetUser());
            this.shuffleMetrics.SetTag("jobName", jobConf.GetJobName());
            this.shuffleMetrics.SetTag("jobId", reduceId.GetJobID().ToString());
            this.shuffleMetrics.SetTag("taskId", reduceId.ToString());
            this.shuffleMetrics.SetTag("sessionId", jobConf.GetSessionId());
            metricsContext.RegisterUpdater(this);
        }
Exemple #14
0
        /// <summary>Configures all the chain elements for the task.</summary>
        /// <param name="jobConf">chain job's JobConf.</param>
        public virtual void Configure(JobConf jobConf)
        {
            string prefix = GetPrefix(isMap);

            chainJobConf = jobConf;
            SerializationFactory serializationFactory = new SerializationFactory(chainJobConf
                                                                                 );
            int index = jobConf.GetInt(prefix + ChainMapperSize, 0);

            for (int i = 0; i < index; i++)
            {
                Type    klass = jobConf.GetClass <Mapper>(prefix + ChainMapperClass + i, null);
                JobConf mConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainMapperConfig
                                                                + i));
                Mapper mapper = ReflectionUtils.NewInstance(klass, mConf);
                mappers.AddItem(mapper);
                if (mConf.GetBoolean(MapperByValue, true))
                {
                    mappersKeySerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass
                                                                                              (MapperOutputKeyClass, null)));
                    mappersValueSerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass
                                                                                                (MapperOutputValueClass, null)));
                }
                else
                {
                    mappersKeySerialization.AddItem(null);
                    mappersValueSerialization.AddItem(null);
                }
            }
            Type klass_1 = jobConf.GetClass <Reducer>(prefix + ChainReducerClass, null);

            if (klass_1 != null)
            {
                JobConf rConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainReducerConfig
                                                                ));
                reducer = ReflectionUtils.NewInstance(klass_1, rConf);
                if (rConf.GetBoolean(ReducerByValue, true))
                {
                    reducerKeySerialization = serializationFactory.GetSerialization(rConf.GetClass(ReducerOutputKeyClass
                                                                                                   , null));
                    reducerValueSerialization = serializationFactory.GetSerialization(rConf.GetClass(
                                                                                          ReducerOutputValueClass, null));
                }
                else
                {
                    reducerKeySerialization   = null;
                    reducerValueSerialization = null;
                }
            }
        }
        /// <summary>Helper function to generate a name that is unique for the task.</summary>
        /// <remarks>
        /// Helper function to generate a name that is unique for the task.
        /// <p>The generated name can be used to create custom files from within the
        /// different tasks for the job, the names for different tasks will not collide
        /// with each other.</p>
        /// <p>The given name is postfixed with the task type, 'm' for maps, 'r' for
        /// reduces and the task partition number. For example, give a name 'test'
        /// running on the first map o the job the generated name will be
        /// 'test-m-00000'.</p>
        /// </remarks>
        /// <param name="conf">the configuration for the job.</param>
        /// <param name="name">the name to make unique.</param>
        /// <returns>a unique name accross all tasks of the job.</returns>
        public static string GetUniqueName(JobConf conf, string name)
        {
            int partition = conf.GetInt(JobContext.TaskPartition, -1);

            if (partition == -1)
            {
                throw new ArgumentException("This method can only be called from within a Job");
            }
            string taskType = conf.GetBoolean(JobContext.TaskIsmap, JobContext.DefaultTaskIsmap
                                              ) ? "m" : "r";
            NumberFormat numberFormat = NumberFormat.GetInstance();

            numberFormat.SetMinimumIntegerDigits(5);
            numberFormat.SetGroupingUsed(false);
            return(name + "-" + taskType + "-" + numberFormat.Format(partition));
        }
        private static AList <ValueAggregatorDescriptor> GetAggregatorDescriptors(JobConf
                                                                                  job)
        {
            string advn = "aggregator.descriptor";
            int    num  = job.GetInt(advn + ".num", 0);
            AList <ValueAggregatorDescriptor> retv = new AList <ValueAggregatorDescriptor>(num);

            for (int i = 0; i < num; i++)
            {
                string spec = job.Get(advn + "." + i);
                ValueAggregatorDescriptor ad = GetValueAggregatorDescriptor(spec, job);
                if (ad != null)
                {
                    retv.AddItem(ad);
                }
            }
            return(retv);
        }
        public virtual void Configure(JobConf jobConf)
        {
            int numberOfThreads = jobConf.GetInt(MultithreadedMapper.NumThreads, 10);

            if (Log.IsDebugEnabled())
            {
                Log.Debug("Configuring jobConf " + jobConf.GetJobName() + " to use " + numberOfThreads
                          + " threads");
            }
            this.job = jobConf;
            //increment processed counter only if skipping feature is enabled
            this.incrProcCount = SkipBadRecords.GetMapperMaxSkipRecords(job) > 0 && SkipBadRecords
                                 .GetAutoIncrMapperProcCount(job);
            this.mapper = ReflectionUtils.NewInstance(jobConf.GetMapperClass(), jobConf);
            // Creating a threadpool of the configured size to execute the Mapper
            // map method in parallel.
            executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 0L, TimeUnit
                                                     .Milliseconds, new MultithreadedMapRunner.BlockingArrayQueue(numberOfThreads));
        }
Exemple #18
0
 public virtual void Configure(JobConf conf)
 {
     SetConf(conf);
     try
     {
         fs = FileSystem.Get(conf);
     }
     catch (Exception e)
     {
         throw new RuntimeException("Cannot create file system.", e);
     }
     bufferSize = conf.GetInt("test.io.file.buffer.size", 4096);
     buffer     = new byte[bufferSize];
     try
     {
         hostName = Sharpen.Runtime.GetLocalHost().GetHostName();
     }
     catch (Exception)
     {
         hostName = "localhost";
     }
 }
        /// <summary>Generate the outfile name based on a given anme and the input file name.
        ///     </summary>
        /// <remarks>
        /// Generate the outfile name based on a given anme and the input file name. If
        /// the
        /// <see cref="Org.Apache.Hadoop.Mapreduce.MRJobConfig.MapInputFile"/>
        /// does not exists (i.e. this is not for a map only job),
        /// the given name is returned unchanged. If the config value for
        /// "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
        /// name is returned unchanged. Otherwise, return a file name consisting of the
        /// N trailing legs of the input file name where N is the config value for
        /// "num.of.trailing.legs.to.use".
        /// </remarks>
        /// <param name="job">the job config</param>
        /// <param name="name">the output file name</param>
        /// <returns>the outfile name based on a given anme and the input file name.</returns>
        protected internal virtual string GetInputFileBasedOutputFileName(JobConf job, string
                                                                          name)
        {
            string infilepath = job.Get(MRJobConfig.MapInputFile);

            if (infilepath == null)
            {
                // if the {@link JobContext#MAP_INPUT_FILE} does not exists,
                // then return the given name
                return(name);
            }
            int numOfTrailingLegsToUse = job.GetInt("mapred.outputformat.numOfTrailingLegs",
                                                    0);

            if (numOfTrailingLegsToUse <= 0)
            {
                return(name);
            }
            Path   infile  = new Path(infilepath);
            Path   parent  = infile.GetParent();
            string midName = infile.GetName();
            Path   outPath = new Path(midName);

            for (int i = 1; i < numOfTrailingLegsToUse; i++)
            {
                if (parent == null)
                {
                    break;
                }
                midName = parent.GetName();
                if (midName.Length == 0)
                {
                    break;
                }
                parent  = parent.GetParent();
                outPath = new Path(midName, outPath);
            }
            return(outPath.ToString());
        }
Exemple #20
0
 public override void Configure(JobConf job)
 {
     // 'key' == sortInput for sort-input; key == sortOutput for sort-output
     key = DeduceInputFile(job);
     if (key == sortOutput)
     {
         partitioner = new HashPartitioner <WritableComparable, Writable>();
         // Figure the 'current' partition and no. of reduces of the 'sort'
         try
         {
             URI    inputURI  = new URI(job.Get(JobContext.MapInputFile));
             string inputFile = inputURI.GetPath();
             // part file is of the form part-r-xxxxx
             partition = Sharpen.Extensions.ValueOf(Sharpen.Runtime.Substring(inputFile, inputFile
                                                                              .LastIndexOf("part") + 7));
             noSortReducers = job.GetInt(SortReduces, -1);
         }
         catch (Exception e)
         {
             System.Console.Error.WriteLine("Caught: " + e);
             System.Environment.Exit(-1);
         }
     }
 }
Exemple #21
0
 public override void Configure(JobConf job)
 {
     pattern = Sharpen.Pattern.Compile(job.Get(RegexMapper.Pattern));
     group   = job.GetInt(RegexMapper.Group, 0);
 }
Exemple #22
0
            public virtual int GetPartition(Text key, Text value, int numPartitions)
            {
                int keyValue = 0;

                try
                {
                    keyValue = System.Convert.ToInt32(key.ToString());
                }
                catch (FormatException)
                {
                    keyValue = 0;
                }
                int partitionNumber = (numPartitions * (Math.Max(0, keyValue - 1))) / job.GetInt(
                    "mapred.test.num_lines", 10000);

                return(partitionNumber);
            }
Exemple #23
0
        /// <summary>
        /// This launches a given namenode operation (<code>-operation</code>),
        /// starting at a given time (<code>-startTime</code>).
        /// </summary>
        /// <remarks>
        /// This launches a given namenode operation (<code>-operation</code>),
        /// starting at a given time (<code>-startTime</code>).  The files used
        /// by the openRead, rename, and delete operations are the same files
        /// created by the createWrite operation.  Typically, the program
        /// would be run four times, once for each operation in this order:
        /// createWrite, openRead, rename, delete.
        /// <pre>
        /// Usage: nnbench
        /// -operation <one of createWrite, openRead, rename, or delete>
        /// -baseDir <base output/input DFS path>
        /// -startTime <time to start, given in seconds from the epoch>
        /// -numFiles <number of files to create, read, rename, or delete>
        /// -blocksPerFile <number of blocks to create per file>
        /// [-bytesPerBlock <number of bytes to write to each block, default is 1>]
        /// [-bytesPerChecksum <value for io.bytes.per.checksum>]
        /// </pre>
        /// </remarks>
        /// <param name="args">is an array of the program command line arguments</param>
        /// <exception cref="System.IO.IOException">indicates a problem with test startup</exception>
        public static void Main(string[] args)
        {
            string version = "NameNodeBenchmark.0.3";

            System.Console.Out.WriteLine(version);
            int    bytesPerChecksum = -1;
            string usage            = "Usage: nnbench " + "  -operation <one of createWrite, openRead, rename, or delete> "
                                      + "  -baseDir <base output/input DFS path> " + "  -startTime <time to start, given in seconds from the epoch> "
                                      + "  -numFiles <number of files to create> " + "  -blocksPerFile <number of blocks to create per file> "
                                      + "  [-bytesPerBlock <number of bytes to write to each block, default is 1>] "
                                      + "  [-bytesPerChecksum <value for io.bytes.per.checksum>]" + "Note: bytesPerBlock MUST be a multiple of bytesPerChecksum";
            string operation = null;

            for (int i = 0; i < args.Length; i++)
            {
                // parse command line
                if (args[i].Equals("-baseDir"))
                {
                    baseDir = new Path(args[++i]);
                }
                else
                {
                    if (args[i].Equals("-numFiles"))
                    {
                        numFiles = System.Convert.ToInt32(args[++i]);
                    }
                    else
                    {
                        if (args[i].Equals("-blocksPerFile"))
                        {
                            blocksPerFile = System.Convert.ToInt32(args[++i]);
                        }
                        else
                        {
                            if (args[i].Equals("-bytesPerBlock"))
                            {
                                bytesPerBlock = long.Parse(args[++i]);
                            }
                            else
                            {
                                if (args[i].Equals("-bytesPerChecksum"))
                                {
                                    bytesPerChecksum = System.Convert.ToInt32(args[++i]);
                                }
                                else
                                {
                                    if (args[i].Equals("-startTime"))
                                    {
                                        startTime = long.Parse(args[++i]) * 1000;
                                    }
                                    else
                                    {
                                        if (args[i].Equals("-operation"))
                                        {
                                            operation = args[++i];
                                        }
                                        else
                                        {
                                            System.Console.Out.WriteLine(usage);
                                            System.Environment.Exit(-1);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            bytesPerFile = bytesPerBlock * blocksPerFile;
            JobConf jobConf = new JobConf(new Configuration(), typeof(NNBench));

            if (bytesPerChecksum < 0)
            {
                // if it is not set in cmdline
                bytesPerChecksum = jobConf.GetInt("io.bytes.per.checksum", 512);
            }
            jobConf.Set("io.bytes.per.checksum", Sharpen.Extensions.ToString(bytesPerChecksum
                                                                             ));
            System.Console.Out.WriteLine("Inputs: ");
            System.Console.Out.WriteLine("   operation: " + operation);
            System.Console.Out.WriteLine("   baseDir: " + baseDir);
            System.Console.Out.WriteLine("   startTime: " + startTime);
            System.Console.Out.WriteLine("   numFiles: " + numFiles);
            System.Console.Out.WriteLine("   blocksPerFile: " + blocksPerFile);
            System.Console.Out.WriteLine("   bytesPerBlock: " + bytesPerBlock);
            System.Console.Out.WriteLine("   bytesPerChecksum: " + bytesPerChecksum);
            if (operation == null || baseDir == null || numFiles < 1 || blocksPerFile < 1 ||
                bytesPerBlock < 0 || bytesPerBlock % bytesPerChecksum != 0)
            {
                // verify args
                System.Console.Error.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            fileSys = FileSystem.Get(jobConf);
            string uniqueId = Sharpen.Runtime.GetLocalHost().GetHostName();

            taskDir = new Path(baseDir, uniqueId);
            // initialize buffer used for writing/reading file
            buffer = new byte[(int)Math.Min(bytesPerFile, 32768L)];
            DateTime execTime;
            DateTime endTime;
            long     duration;
            int      exceptions = 0;

            Barrier();
            // wait for coordinated start time
            execTime = new DateTime();
            System.Console.Out.WriteLine("Job started: " + startTime);
            if (operation.Equals("createWrite"))
            {
                if (!fileSys.Mkdirs(taskDir))
                {
                    throw new IOException("Mkdirs failed to create " + taskDir.ToString());
                }
                exceptions = CreateWrite();
            }
            else
            {
                if (operation.Equals("openRead"))
                {
                    exceptions = OpenRead();
                }
                else
                {
                    if (operation.Equals("rename"))
                    {
                        exceptions = Rename();
                    }
                    else
                    {
                        if (operation.Equals("delete"))
                        {
                            exceptions = Delete();
                        }
                        else
                        {
                            System.Console.Error.WriteLine(usage);
                            System.Environment.Exit(-1);
                        }
                    }
                }
            }
            endTime = new DateTime();
            System.Console.Out.WriteLine("Job ended: " + endTime);
            duration = (endTime.GetTime() - execTime.GetTime()) / 1000;
            System.Console.Out.WriteLine("The " + operation + " job took " + duration + " seconds."
                                         );
            System.Console.Out.WriteLine("The job recorded " + exceptions + " exceptions.");
        }
Exemple #24
0
        // TODO later:  add explicit "isUber()" checks of some sort
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        public virtual void TestContainerRollingLog()
        {
            if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists())
            {
                Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test."
                         );
                return;
            }
            SleepJob sleepJob  = new SleepJob();
            JobConf  sleepConf = new JobConf(mrCluster.GetConfig());

            sleepConf.Set(MRJobConfig.MapLogLevel, Level.All.ToString());
            long userLogKb = 4;

            sleepConf.SetLong(MRJobConfig.TaskUserlogLimit, userLogKb);
            sleepConf.SetInt(MRJobConfig.TaskLogBackups, 3);
            sleepConf.Set(MRJobConfig.MrAmLogLevel, Level.All.ToString());
            long amLogKb = 7;

            sleepConf.SetLong(MRJobConfig.MrAmLogKb, amLogKb);
            sleepConf.SetInt(MRJobConfig.MrAmLogBackups, 7);
            sleepJob.SetConf(sleepConf);
            Job job = sleepJob.CreateJob(1, 0, 1L, 100, 0L, 0);

            job.SetJarByClass(typeof(SleepJob));
            job.AddFileToClassPath(AppJar);
            // The AppMaster jar itself.
            job.WaitForCompletion(true);
            JobId         jobId       = TypeConverter.ToYarn(job.GetJobID());
            ApplicationId appID       = jobId.GetAppId();
            int           pollElapsed = 0;

            while (true)
            {
                Sharpen.Thread.Sleep(1000);
                pollElapsed += 1000;
                if (TerminalRmAppStates.Contains(mrCluster.GetResourceManager().GetRMContext().GetRMApps
                                                     ()[appID].GetState()))
                {
                    break;
                }
                if (pollElapsed >= 60000)
                {
                    Log.Warn("application did not reach terminal state within 60 seconds");
                    break;
                }
            }
            NUnit.Framework.Assert.AreEqual(RMAppState.Finished, mrCluster.GetResourceManager
                                                ().GetRMContext().GetRMApps()[appID].GetState());
            // Job finished, verify logs
            //
            string appIdStr    = appID.ToString();
            string appIdSuffix = Sharpen.Runtime.Substring(appIdStr, "application_".Length, appIdStr
                                                           .Length);
            string containerGlob = "container_" + appIdSuffix + "_*_*";
            string syslogGlob    = appIdStr + Path.Separator + containerGlob + Path.Separator +
                                   TaskLog.LogName.Syslog;
            int numAppMasters = 0;
            int numMapTasks   = 0;

            for (int i = 0; i < NumNodeMgrs; i++)
            {
                Configuration nmConf = mrCluster.GetNodeManager(i).GetConfig();
                foreach (string logDir in nmConf.GetTrimmedStrings(YarnConfiguration.NmLogDirs))
                {
                    Path absSyslogGlob = new Path(logDir + Path.Separator + syslogGlob);
                    Log.Info("Checking for glob: " + absSyslogGlob);
                    FileStatus[] syslogs = localFs.GlobStatus(absSyslogGlob);
                    foreach (FileStatus slog in syslogs)
                    {
                        bool foundAppMaster         = job.IsUber();
                        Path containerPathComponent = slog.GetPath().GetParent();
                        if (!foundAppMaster)
                        {
                            ContainerId cid = ConverterUtils.ToContainerId(containerPathComponent.GetName());
                            foundAppMaster = ((cid.GetContainerId() & ContainerId.ContainerIdBitmask) == 1);
                        }
                        FileStatus[] sysSiblings = localFs.GlobStatus(new Path(containerPathComponent, TaskLog.LogName
                                                                               .Syslog + "*"));
                        // sort to ensure for i > 0 sysSiblings[i] == "syslog.i"
                        Arrays.Sort(sysSiblings);
                        if (foundAppMaster)
                        {
                            numAppMasters++;
                        }
                        else
                        {
                            numMapTasks++;
                        }
                        if (foundAppMaster)
                        {
                            NUnit.Framework.Assert.AreSame("Unexpected number of AM sylog* files", sleepConf.
                                                           GetInt(MRJobConfig.MrAmLogBackups, 0) + 1, sysSiblings.Length);
                            NUnit.Framework.Assert.IsTrue("AM syslog.1 length kb should be >= " + amLogKb, sysSiblings
                                                          [1].GetLen() >= amLogKb * 1024);
                        }
                        else
                        {
                            NUnit.Framework.Assert.AreSame("Unexpected number of MR task sylog* files", sleepConf
                                                           .GetInt(MRJobConfig.TaskLogBackups, 0) + 1, sysSiblings.Length);
                            NUnit.Framework.Assert.IsTrue("MR syslog.1 length kb should be >= " + userLogKb,
                                                          sysSiblings[1].GetLen() >= userLogKb * 1024);
                        }
                    }
                }
            }
            // Make sure we checked non-empty set
            //
            NUnit.Framework.Assert.AreEqual("No AppMaster log found!", 1, numAppMasters);
            if (sleepConf.GetBoolean(MRJobConfig.JobUbertaskEnable, false))
            {
                NUnit.Framework.Assert.AreEqual("MapTask log with uber found!", 0, numMapTasks);
            }
            else
            {
                NUnit.Framework.Assert.AreEqual("No MapTask log found!", 1, numMapTasks);
            }
        }
Exemple #25
0
        public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS
                                , LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec
                                , Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter
                                spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter
                                mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase
                                , MapOutputFile mapOutputFile)
        {
            /* Maximum percentage of the in-memory limit that a single shuffle can
             * consume*/
            this.reduceId                  = reduceId;
            this.jobConf                   = jobConf;
            this.localDirAllocator         = localDirAllocator;
            this.exceptionReporter         = exceptionReporter;
            this.reporter                  = reporter;
            this.codec                     = codec;
            this.combinerClass             = combinerClass;
            this.combineCollector          = combineCollector;
            this.reduceCombineInputCounter = reduceCombineInputCounter;
            this.spilledRecordsCounter     = spilledRecordsCounter;
            this.mergedMapOutputsCounter   = mergedMapOutputsCounter;
            this.mapOutputFile             = mapOutputFile;
            this.mapOutputFile.SetConf(jobConf);
            this.localFS = localFS;
            this.rfs     = ((LocalFileSystem)localFS).GetRaw();
            float maxInMemCopyUse = jobConf.GetFloat(MRJobConfig.ShuffleInputBufferPercent, MRJobConfig
                                                     .DefaultShuffleInputBufferPercent);

            if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0)
            {
                throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleInputBufferPercent
                                            + ": " + maxInMemCopyUse);
            }
            // Allow unit tests to fix Runtime memory
            this.memoryLimit = (long)(jobConf.GetLong(MRJobConfig.ReduceMemoryTotalBytes, Runtime
                                                      .GetRuntime().MaxMemory()) * maxInMemCopyUse);
            this.ioSortFactor = jobConf.GetInt(MRJobConfig.IoSortFactor, 100);
            float singleShuffleMemoryLimitPercent = jobConf.GetFloat(MRJobConfig.ShuffleMemoryLimitPercent
                                                                     , DefaultShuffleMemoryLimitPercent);

            if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent >
                1.0f)
            {
                throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleMemoryLimitPercent
                                            + ": " + singleShuffleMemoryLimitPercent);
            }
            usedMemory   = 0L;
            commitMemory = 0L;
            this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent
                                                );
            this.memToMemMergeOutputsThreshold = jobConf.GetInt(MRJobConfig.ReduceMemtomemThreshold
                                                                , ioSortFactor);
            this.mergeThreshold = (long)(this.memoryLimit * jobConf.GetFloat(MRJobConfig.ShuffleMergePercent
                                                                             , 0.90f));
            Log.Info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit="
                     + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor="
                     + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold
                     );
            if (this.maxSingleShuffleLimit >= this.mergeThreshold)
            {
                throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold "
                                           + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " +
                                           this.mergeThreshold);
            }
            bool allowMemToMemMerge = jobConf.GetBoolean(MRJobConfig.ReduceMemtomemEnabled, false
                                                         );

            if (allowMemToMemMerge)
            {
                this.memToMemMerger = new MergeManagerImpl.IntermediateMemoryToMemoryMerger(this,
                                                                                            this, memToMemMergeOutputsThreshold);
                this.memToMemMerger.Start();
            }
            else
            {
                this.memToMemMerger = null;
            }
            this.inMemoryMerger = CreateInMemoryMerger();
            this.inMemoryMerger.Start();
            this.onDiskMerger = new MergeManagerImpl.OnDiskMerger(this, this);
            this.onDiskMerger.Start();
            this.mergePhase = mergePhase;
        }
Exemple #26
0
 public IndexCache(JobConf conf)
 {
     this.conf          = conf;
     totalMemoryAllowed = conf.GetInt(TTConfig.TtIndexCache, 10) * 1024 * 1024;
     Log.Info("IndexCache created with max memory = " + totalMemoryAllowed);
 }
 public virtual void Configure(JobConf conf)
 {
     N = conf.GetInt("mapreduce.input.lineinputformat.linespermap", 1);
 }
Exemple #28
0
 public virtual void Configure(JobConf job)
 {
     srcs = job.GetInt("testdatamerge.sources", 0);
     NUnit.Framework.Assert.IsTrue("Invalid src count: " + srcs, srcs > 0);
 }