コード例 #1
0
ファイル: Fetcher.cs プロジェクト: orf53975/hadoop.net
        internal Fetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V>
                         scheduler, MergeManager <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics
                         , ExceptionReporter exceptionReporter, SecretKey shuffleKey, int id)
        {
            /* Default read timeout (in milliseconds) */
            // Initiative value is 0, which means it hasn't retried yet.
            this.jobConf           = job;
            this.reporter          = reporter;
            this.scheduler         = scheduler;
            this.merger            = merger;
            this.metrics           = metrics;
            this.exceptionReporter = exceptionReporter;
            this.id               = id;
            this.reduce           = reduceId.GetTaskID().GetId();
            this.shuffleSecretKey = shuffleKey;
            ioErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.IoError.ToString
                                             ());
            wrongLengthErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongLength
                                                  .ToString());
            badIdErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.BadId.ToString
                                                ());
            wrongMapErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongMap
                                               .ToString());
            connectionErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.Connection
                                                 .ToString());
            wrongReduceErrs = reporter.GetCounter(ShuffleErrGrpName, Fetcher.ShuffleErrors.WrongReduce
                                                  .ToString());
            this.connectionTimeout = job.GetInt(MRJobConfig.ShuffleConnectTimeout, DefaultStalledCopyTimeout
                                                );
            this.readTimeout        = job.GetInt(MRJobConfig.ShuffleReadTimeout, DefaultReadTimeout);
            this.fetchRetryInterval = job.GetInt(MRJobConfig.ShuffleFetchRetryIntervalMs, MRJobConfig
                                                 .DefaultShuffleFetchRetryIntervalMs);
            this.fetchRetryTimeout = job.GetInt(MRJobConfig.ShuffleFetchRetryTimeoutMs, DefaultStalledCopyTimeout
                                                );
            bool shuffleFetchEnabledDefault = job.GetBoolean(YarnConfiguration.NmRecoveryEnabled
                                                             , YarnConfiguration.DefaultNmRecoveryEnabled);

            this.fetchRetryEnabled = job.GetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, shuffleFetchEnabledDefault
                                                    );
            SetName("fetcher#" + id);
            SetDaemon(true);
            lock (typeof(Org.Apache.Hadoop.Mapreduce.Task.Reduce.Fetcher))
            {
                sslShuffle = job.GetBoolean(MRConfig.ShuffleSslEnabledKey, MRConfig.ShuffleSslEnabledDefault
                                            );
                if (sslShuffle && sslFactory == null)
                {
                    sslFactory = new SSLFactory(SSLFactory.Mode.Client, job);
                    try
                    {
                        sslFactory.Init();
                    }
                    catch (Exception ex)
                    {
                        sslFactory.Destroy();
                        throw new RuntimeException(ex);
                    }
                }
            }
        }
コード例 #2
0
        /// <exception cref="System.IO.IOException"/>
        private static void CleanFlags(JobConf conf)
        {
            FileSystem fs = FileSystem.Get(conf);

            fs.Delete(GetFlagDir(conf.GetBoolean("localFS", true)), true);
            fs.Mkdirs(GetFlagDir(conf.GetBoolean("localFS", true)));
        }
コード例 #3
0
 public ShuffleSchedulerImpl(JobConf job, TaskStatus status, TaskAttemptID reduceId
                             , ExceptionReporter reporter, Progress progress, Counters.Counter shuffledMapsCounter
                             , Counters.Counter reduceShuffleBytes, Counters.Counter failedShuffleCounter)
 {
     referee                   = new ShuffleSchedulerImpl.Referee(this);
     totalMaps                 = job.GetNumMapTasks();
     abortFailureLimit         = Math.Max(30, totalMaps / 10);
     copyTimeTracker           = new ShuffleSchedulerImpl.CopyTimeTracker();
     remainingMaps             = totalMaps;
     finishedMaps              = new bool[remainingMaps];
     this.reporter             = reporter;
     this.status               = status;
     this.reduceId             = reduceId;
     this.progress             = progress;
     this.shuffledMapsCounter  = shuffledMapsCounter;
     this.reduceShuffleBytes   = reduceShuffleBytes;
     this.failedShuffleCounter = failedShuffleCounter;
     this.startTime            = Time.MonotonicNow();
     lastProgressTime          = startTime;
     referee.Start();
     this.maxFailedUniqueFetches          = Math.Min(totalMaps, 5);
     this.maxFetchFailuresBeforeReporting = job.GetInt(MRJobConfig.ShuffleFetchFailures
                                                       , ReportFailureLimit);
     this.reportReadErrorImmediately = job.GetBoolean(MRJobConfig.ShuffleNotifyReaderror
                                                      , true);
     this.maxDelay = job.GetLong(MRJobConfig.MaxShuffleFetchRetryDelay, MRJobConfig.DefaultMaxShuffleFetchRetryDelay
                                 );
     this.maxHostFailures = job.GetInt(MRJobConfig.MaxShuffleFetchHostFailures, MRJobConfig
                                       .DefaultMaxShuffleFetchHostFailures);
 }
コード例 #4
0
        public virtual void TestAMStandardEnv()
        {
            string  AdminLibPath = "foo";
            string  UserLibPath  = "bar";
            string  UserShell    = "shell";
            JobConf jobConf      = new JobConf();

            jobConf.Set(MRJobConfig.MrAmAdminUserEnv, "LD_LIBRARY_PATH=" + AdminLibPath);
            jobConf.Set(MRJobConfig.MrAmEnv, "LD_LIBRARY_PATH=" + UserLibPath);
            jobConf.Set(MRJobConfig.MapredAdminUserShell, UserShell);
            YARNRunner yarnRunner = new YARNRunner(jobConf);
            ApplicationSubmissionContext appSubCtx = BuildSubmitContext(yarnRunner, jobConf);
            // make sure PWD is first in the lib path
            ContainerLaunchContext       clc = appSubCtx.GetAMContainerSpec();
            IDictionary <string, string> env = clc.GetEnvironment();
            string libPath = env[ApplicationConstants.Environment.LdLibraryPath.ToString()];

            NUnit.Framework.Assert.IsNotNull("LD_LIBRARY_PATH not set", libPath);
            string cps = jobConf.GetBoolean(MRConfig.MapreduceAppSubmissionCrossPlatform, MRConfig
                                            .DefaultMapreduceAppSubmissionCrossPlatform) ? ApplicationConstants.ClassPathSeparator
                                 : FilePath.pathSeparator;

            NUnit.Framework.Assert.AreEqual("Bad AM LD_LIBRARY_PATH setting", MRApps.CrossPlatformifyMREnv
                                                (conf, ApplicationConstants.Environment.Pwd) + cps + AdminLibPath + cps + UserLibPath
                                            , libPath);
            // make sure SHELL is set
            string shell = env[ApplicationConstants.Environment.Shell.ToString()];

            NUnit.Framework.Assert.IsNotNull("SHELL not set", shell);
            NUnit.Framework.Assert.AreEqual("Bad SHELL setting", UserShell, shell);
        }
コード例 #5
0
        /// <summary>List input directories.</summary>
        /// <remarks>
        /// List input directories.
        /// Subclasses may override to, e.g., select only files matching a regular
        /// expression.
        /// </remarks>
        /// <param name="job">the job to list input paths for</param>
        /// <returns>array of FileStatus objects</returns>
        /// <exception cref="System.IO.IOException">if zero items.</exception>
        protected internal virtual FileStatus[] ListStatus(JobConf job)
        {
            Path[] dirs = GetInputPaths(job);
            if (dirs.Length == 0)
            {
                throw new IOException("No input paths specified in job");
            }
            // get tokens for all the required FileSystems..
            TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), dirs, job);
            // Whether we need to recursive look into the directory structure
            bool recursive = job.GetBoolean(InputDirRecursive, false);
            // creates a MultiPathFilter with the hiddenFileFilter and the
            // user provided one (if any).
            IList <PathFilter> filters = new AList <PathFilter>();

            filters.AddItem(hiddenFileFilter);
            PathFilter jobFilter = GetInputPathFilter(job);

            if (jobFilter != null)
            {
                filters.AddItem(jobFilter);
            }
            PathFilter inputFilter = new FileInputFormat.MultiPathFilter(filters);

            FileStatus[] result;
            int          numThreads = job.GetInt(FileInputFormat.ListStatusNumThreads, FileInputFormat
                                                 .DefaultListStatusNumThreads);
            StopWatch sw = new StopWatch().Start();

            if (numThreads == 1)
            {
                IList <FileStatus> locatedFiles = SingleThreadedListStatus(job, dirs, inputFilter,
                                                                           recursive);
                result = Sharpen.Collections.ToArray(locatedFiles, new FileStatus[locatedFiles.Count
                                                     ]);
            }
            else
            {
                IEnumerable <FileStatus> locatedFiles = null;
                try
                {
                    LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
                        job, dirs, recursive, inputFilter, false);
                    locatedFiles = locatedFileStatusFetcher.GetFileStatuses();
                }
                catch (Exception)
                {
                    throw new IOException("Interrupted while getting file statuses");
                }
                result = Iterables.ToArray <FileStatus>(locatedFiles);
            }
            sw.Stop();
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Time taken to get FileStatuses: " + sw.Now(TimeUnit.Milliseconds));
            }
            Log.Info("Total input paths to process : " + result.Length);
            return(result);
        }
コード例 #6
0
ファイル: PipesReducer.cs プロジェクト: orf53975/hadoop.net
 public virtual void Configure(JobConf job)
 {
     this.job = job;
     //disable the auto increment of the counter. For pipes, no of processed
     //records could be different(equal or less) than the no of records input.
     SkipBadRecords.SetAutoIncrReducerProcCount(job, false);
     skipping = job.GetBoolean(MRJobConfig.SkipRecords, false);
 }
コード例 #7
0
            // Mappers that simply checks if the desired user env are present or not
            public override void Configure(JobConf job)
            {
                bool oldConfigs = job.GetBoolean(OldConfigs, false);

                if (oldConfigs)
                {
                    string javaOpts = job.Get(JobConf.MapredTaskJavaOpts);
                    NUnit.Framework.Assert.IsNotNull(JobConf.MapredTaskJavaOpts + " is null!", javaOpts
                                                     );
                    NUnit.Framework.Assert.AreEqual(JobConf.MapredTaskJavaOpts + " has value of: " +
                                                    javaOpts, javaOpts, TaskOptsVal);
                }
                else
                {
                    string mapJavaOpts = job.Get(JobConf.MapredMapTaskJavaOpts);
                    NUnit.Framework.Assert.IsNotNull(JobConf.MapredMapTaskJavaOpts + " is null!", mapJavaOpts
                                                     );
                    NUnit.Framework.Assert.AreEqual(JobConf.MapredMapTaskJavaOpts + " has value of: "
                                                    + mapJavaOpts, mapJavaOpts, MapOptsVal);
                }
                string path = job.Get("path");
                // check if the pwd is there in LD_LIBRARY_PATH
                string pwd = Runtime.Getenv("PWD");

                NUnit.Framework.Assert.IsTrue("LD doesnt contain pwd", Runtime.Getenv("LD_LIBRARY_PATH"
                                                                                      ).Contains(pwd));
                // check if X=$X:/abc works for LD_LIBRARY_PATH
                CheckEnv("LD_LIBRARY_PATH", "/tmp", "append");
                // check if X=y works for an already existing parameter
                CheckEnv("LANG", "en_us_8859_1", "noappend");
                // check if X=/tmp for a new env variable
                CheckEnv("MY_PATH", "/tmp", "noappend");
                // check if X=$X:/tmp works for a new env var and results into :/tmp
                CheckEnv("NEW_PATH", FilePath.pathSeparator + "/tmp", "noappend");
                // check if X=$(tt's X var):/tmp for an old env variable inherited from
                // the tt
                if (Shell.Windows)
                {
                    // On Windows, PATH is replaced one more time as part of default config
                    // of "mapreduce.admin.user.env", i.e. on Windows,
                    // "mapreduce.admin.user.env" is set to
                    // "PATH=%PATH%;%HADOOP_COMMON_HOME%\\bin"
                    string hadoopHome = Runtime.Getenv("HADOOP_COMMON_HOME");
                    if (hadoopHome == null)
                    {
                        hadoopHome = string.Empty;
                    }
                    string hadoopLibLocation = hadoopHome + "\\bin";
                    path += FilePath.pathSeparator + hadoopLibLocation;
                    path += FilePath.pathSeparator + path;
                }
                CheckEnv("PATH", path + FilePath.pathSeparator + "/tmp", "noappend");
                string jobLocalDir = job.Get(MRJobConfig.JobLocalDir);

                NUnit.Framework.Assert.IsNotNull(MRJobConfig.JobLocalDir + " is null", jobLocalDir
                                                 );
            }
コード例 #8
0
ファイル: PipesMapRunner.cs プロジェクト: orf53975/hadoop.net
        /// <summary>Run the map task.</summary>
        /// <param name="input">the set of inputs</param>
        /// <param name="output">the object to collect the outputs of the map</param>
        /// <param name="reporter">the object to update with status</param>
        /// <exception cref="System.IO.IOException"/>
        public override void Run(RecordReader <K1, V1> input, OutputCollector <K2, V2> output
                                 , Reporter reporter)
        {
            Application <K1, V1, K2, V2> application = null;

            try
            {
                RecordReader <FloatWritable, NullWritable> fakeInput = (!Submitter.GetIsJavaRecordReader
                                                                            (job) && !Submitter.GetIsJavaMapper(job)) ? (RecordReader <FloatWritable, NullWritable
                                                                                                                                       >)input : null;
                application = new Application <K1, V1, K2, V2>(job, fakeInput, output, reporter, (
                                                                   Type)job.GetOutputKeyClass(), (Type)job.GetOutputValueClass());
            }
            catch (Exception ie)
            {
                throw new RuntimeException("interrupted", ie);
            }
            DownwardProtocol <K1, V1> downlink = application.GetDownlink();
            bool isJavaInput = Submitter.GetIsJavaRecordReader(job);

            downlink.RunMap(reporter.GetInputSplit(), job.GetNumReduceTasks(), isJavaInput);
            bool skipping = job.GetBoolean(MRJobConfig.SkipRecords, false);

            try
            {
                if (isJavaInput)
                {
                    // allocate key & value instances that are re-used for all entries
                    K1 key   = input.CreateKey();
                    V1 value = input.CreateValue();
                    downlink.SetInputTypes(key.GetType().FullName, value.GetType().FullName);
                    while (input.Next(key, value))
                    {
                        // map pair to output
                        downlink.MapItem(key, value);
                        if (skipping)
                        {
                            //flush the streams on every record input if running in skip mode
                            //so that we don't buffer other records surrounding a bad record.
                            downlink.Flush();
                        }
                    }
                    downlink.EndOfInput();
                }
                application.WaitForFinish();
            }
            catch (Exception t)
            {
                application.Abort(t);
            }
            finally
            {
                application.Cleanup();
            }
        }
コード例 #9
0
        public virtual void TestSetReducerWithReducerByValueAsFalse()
        {
            JobConf jobConf     = new JobConf();
            JobConf reducerConf = new JobConf();

            Chain.SetReducer(jobConf, typeof(TestChain.MyReducer), typeof(object), typeof(object
                                                                                          ), typeof(object), typeof(object), false, reducerConf);
            bool reduceByValue = reducerConf.GetBoolean("chain.reducer.byValue", true);

            NUnit.Framework.Assert.AreEqual("It should set chain.reducer.byValue as false " +
                                            "in reducerConf when we give value as false", false, reduceByValue);
        }
コード例 #10
0
        /// <exception cref="System.IO.IOException"/>
        private static void WriteFlag(JobConf conf, string flag)
        {
            FileSystem fs = FileSystem.Get(conf);

            if (GetFlag(conf, flag))
            {
                Fail("Flag " + flag + " already exists");
            }
            DataOutputStream file = fs.Create(new Path(GetFlagDir(conf.GetBoolean("localFS",
                                                                                  true)), flag));

            file.Close();
        }
コード例 #11
0
ファイル: Chain.cs プロジェクト: orf53975/hadoop.net
        /// <summary>Configures all the chain elements for the task.</summary>
        /// <param name="jobConf">chain job's JobConf.</param>
        public virtual void Configure(JobConf jobConf)
        {
            string prefix = GetPrefix(isMap);

            chainJobConf = jobConf;
            SerializationFactory serializationFactory = new SerializationFactory(chainJobConf
                                                                                 );
            int index = jobConf.GetInt(prefix + ChainMapperSize, 0);

            for (int i = 0; i < index; i++)
            {
                Type    klass = jobConf.GetClass <Mapper>(prefix + ChainMapperClass + i, null);
                JobConf mConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainMapperConfig
                                                                + i));
                Mapper mapper = ReflectionUtils.NewInstance(klass, mConf);
                mappers.AddItem(mapper);
                if (mConf.GetBoolean(MapperByValue, true))
                {
                    mappersKeySerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass
                                                                                              (MapperOutputKeyClass, null)));
                    mappersValueSerialization.AddItem(serializationFactory.GetSerialization(mConf.GetClass
                                                                                                (MapperOutputValueClass, null)));
                }
                else
                {
                    mappersKeySerialization.AddItem(null);
                    mappersValueSerialization.AddItem(null);
                }
            }
            Type klass_1 = jobConf.GetClass <Reducer>(prefix + ChainReducerClass, null);

            if (klass_1 != null)
            {
                JobConf rConf = new JobConf(GetChainElementConf(jobConf, prefix + ChainReducerConfig
                                                                ));
                reducer = ReflectionUtils.NewInstance(klass_1, rConf);
                if (rConf.GetBoolean(ReducerByValue, true))
                {
                    reducerKeySerialization = serializationFactory.GetSerialization(rConf.GetClass(ReducerOutputKeyClass
                                                                                                   , null));
                    reducerValueSerialization = serializationFactory.GetSerialization(rConf.GetClass(
                                                                                          ReducerOutputValueClass, null));
                }
                else
                {
                    reducerKeySerialization   = null;
                    reducerValueSerialization = null;
                }
            }
        }
コード例 #12
0
        /// <summary>Helper function to generate a name that is unique for the task.</summary>
        /// <remarks>
        /// Helper function to generate a name that is unique for the task.
        /// <p>The generated name can be used to create custom files from within the
        /// different tasks for the job, the names for different tasks will not collide
        /// with each other.</p>
        /// <p>The given name is postfixed with the task type, 'm' for maps, 'r' for
        /// reduces and the task partition number. For example, give a name 'test'
        /// running on the first map o the job the generated name will be
        /// 'test-m-00000'.</p>
        /// </remarks>
        /// <param name="conf">the configuration for the job.</param>
        /// <param name="name">the name to make unique.</param>
        /// <returns>a unique name accross all tasks of the job.</returns>
        public static string GetUniqueName(JobConf conf, string name)
        {
            int partition = conf.GetInt(JobContext.TaskPartition, -1);

            if (partition == -1)
            {
                throw new ArgumentException("This method can only be called from within a Job");
            }
            string taskType = conf.GetBoolean(JobContext.TaskIsmap, JobContext.DefaultTaskIsmap
                                              ) ? "m" : "r";
            NumberFormat numberFormat = NumberFormat.GetInstance();

            numberFormat.SetMinimumIntegerDigits(5);
            numberFormat.SetGroupingUsed(false);
            return(name + "-" + taskType + "-" + numberFormat.Format(partition));
        }
コード例 #13
0
ファイル: JobContextImpl.cs プロジェクト: orf53975/hadoop.net
 /// <summary>Get whether job-setup and job-cleanup is needed for the job</summary>
 /// <returns>boolean</returns>
 public virtual bool GetJobSetupCleanupNeeded()
 {
     return(conf.GetBoolean(MRJobConfig.SetupCleanupNeeded, true));
 }
コード例 #14
0
 /// <summary>Is the job output compressed?</summary>
 /// <param name="conf">
 /// the
 /// <see cref="JobConf"/>
 /// to look in
 /// </param>
 /// <returns>
 /// <code>true</code> if the job output should be compressed,
 /// <code>false</code> otherwise
 /// </returns>
 public static bool GetCompressOutput(JobConf conf)
 {
     return(conf.GetBoolean(FileOutputFormat.Compress, false));
 }
コード例 #15
0
ファイル: Submitter.cs プロジェクト: orf53975/hadoop.net
 /// <summary>Check whether the job is using a Java RecordReader</summary>
 /// <param name="conf">the configuration to check</param>
 /// <returns>is it a Java RecordReader?</returns>
 public static bool GetIsJavaRecordReader(JobConf conf)
 {
     return(conf.GetBoolean(Org.Apache.Hadoop.Mapred.Pipes.Submitter.IsJavaRr, false));
 }
コード例 #16
0
 /// <summary>Returns if a named output is multiple.</summary>
 /// <param name="conf">job conf</param>
 /// <param name="namedOutput">named output</param>
 /// <returns>
 /// <code>true</code> if the name output is multi, <code>false</code>
 /// if it is single. If the name output is not defined it returns
 /// <code>false</code>
 /// </returns>
 public static bool IsMultiNamedOutput(JobConf conf, string namedOutput)
 {
     CheckNamedOutput(conf, namedOutput, false);
     return(conf.GetBoolean(MoPrefix + namedOutput + Multi, false));
 }
コード例 #17
0
 public virtual void Configure(JobConf job)
 {
     ioEx = job.GetBoolean("multithreaded.ioException", false);
     rtEx = job.GetBoolean("multithreaded.runtimeException", false);
 }
コード例 #18
0
 /// <summary>Returns if the counters for the named outputs are enabled or not.</summary>
 /// <remarks>
 /// Returns if the counters for the named outputs are enabled or not.
 /// <p>
 /// By default these counters are disabled.
 /// <p>
 /// MultipleOutputs supports counters, by default the are disabled.
 /// The counters group is the
 /// <see cref="MultipleOutputs"/>
 /// class name.
 /// </p>
 /// The names of the counters are the same as the named outputs. For multi
 /// named outputs the name of the counter is the concatenation of the named
 /// output, and underscore '_' and the multiname.
 /// </remarks>
 /// <param name="conf">job conf to enableadd the named output.</param>
 /// <returns>TRUE if the counters are enabled, FALSE if they are disabled.</returns>
 public static bool GetCountersEnabled(JobConf conf)
 {
     return(conf.GetBoolean(CountersEnabled, false));
 }
コード例 #19
0
ファイル: Submitter.cs プロジェクト: orf53975/hadoop.net
 /// <summary>
 /// Does the user want to keep the command file for debugging? If this is
 /// true, pipes will write a copy of the command data to a file in the
 /// task directory named "downlink.data", which may be used to run the C++
 /// program under the debugger.
 /// </summary>
 /// <remarks>
 /// Does the user want to keep the command file for debugging? If this is
 /// true, pipes will write a copy of the command data to a file in the
 /// task directory named "downlink.data", which may be used to run the C++
 /// program under the debugger. You probably also want to set
 /// JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from
 /// being deleted.
 /// To run using the data file, set the environment variable
 /// "mapreduce.pipes.commandfile" to point to the file.
 /// </remarks>
 /// <param name="conf">the configuration to check</param>
 /// <returns>will the framework save the command file?</returns>
 public static bool GetKeepCommandFile(JobConf conf)
 {
     return(conf.GetBoolean(Org.Apache.Hadoop.Mapred.Pipes.Submitter.PreserveCommandfile
                            , false));
 }
コード例 #20
0
        public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS
                                , LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec
                                , Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter
                                spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter
                                mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase
                                , MapOutputFile mapOutputFile)
        {
            /* Maximum percentage of the in-memory limit that a single shuffle can
             * consume*/
            this.reduceId                  = reduceId;
            this.jobConf                   = jobConf;
            this.localDirAllocator         = localDirAllocator;
            this.exceptionReporter         = exceptionReporter;
            this.reporter                  = reporter;
            this.codec                     = codec;
            this.combinerClass             = combinerClass;
            this.combineCollector          = combineCollector;
            this.reduceCombineInputCounter = reduceCombineInputCounter;
            this.spilledRecordsCounter     = spilledRecordsCounter;
            this.mergedMapOutputsCounter   = mergedMapOutputsCounter;
            this.mapOutputFile             = mapOutputFile;
            this.mapOutputFile.SetConf(jobConf);
            this.localFS = localFS;
            this.rfs     = ((LocalFileSystem)localFS).GetRaw();
            float maxInMemCopyUse = jobConf.GetFloat(MRJobConfig.ShuffleInputBufferPercent, MRJobConfig
                                                     .DefaultShuffleInputBufferPercent);

            if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0)
            {
                throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleInputBufferPercent
                                            + ": " + maxInMemCopyUse);
            }
            // Allow unit tests to fix Runtime memory
            this.memoryLimit = (long)(jobConf.GetLong(MRJobConfig.ReduceMemoryTotalBytes, Runtime
                                                      .GetRuntime().MaxMemory()) * maxInMemCopyUse);
            this.ioSortFactor = jobConf.GetInt(MRJobConfig.IoSortFactor, 100);
            float singleShuffleMemoryLimitPercent = jobConf.GetFloat(MRJobConfig.ShuffleMemoryLimitPercent
                                                                     , DefaultShuffleMemoryLimitPercent);

            if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent >
                1.0f)
            {
                throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleMemoryLimitPercent
                                            + ": " + singleShuffleMemoryLimitPercent);
            }
            usedMemory   = 0L;
            commitMemory = 0L;
            this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent
                                                );
            this.memToMemMergeOutputsThreshold = jobConf.GetInt(MRJobConfig.ReduceMemtomemThreshold
                                                                , ioSortFactor);
            this.mergeThreshold = (long)(this.memoryLimit * jobConf.GetFloat(MRJobConfig.ShuffleMergePercent
                                                                             , 0.90f));
            Log.Info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit="
                     + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor="
                     + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold
                     );
            if (this.maxSingleShuffleLimit >= this.mergeThreshold)
            {
                throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold "
                                           + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " +
                                           this.mergeThreshold);
            }
            bool allowMemToMemMerge = jobConf.GetBoolean(MRJobConfig.ReduceMemtomemEnabled, false
                                                         );

            if (allowMemToMemMerge)
            {
                this.memToMemMerger = new MergeManagerImpl.IntermediateMemoryToMemoryMerger(this,
                                                                                            this, memToMemMergeOutputsThreshold);
                this.memToMemMerger.Start();
            }
            else
            {
                this.memToMemMerger = null;
            }
            this.inMemoryMerger = CreateInMemoryMerger();
            this.inMemoryMerger.Start();
            this.onDiskMerger = new MergeManagerImpl.OnDiskMerger(this, this);
            this.onDiskMerger.Start();
            this.mergePhase = mergePhase;
        }
コード例 #21
0
        // TODO later:  add explicit "isUber()" checks of some sort
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        public virtual void TestContainerRollingLog()
        {
            if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists())
            {
                Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test."
                         );
                return;
            }
            SleepJob sleepJob  = new SleepJob();
            JobConf  sleepConf = new JobConf(mrCluster.GetConfig());

            sleepConf.Set(MRJobConfig.MapLogLevel, Level.All.ToString());
            long userLogKb = 4;

            sleepConf.SetLong(MRJobConfig.TaskUserlogLimit, userLogKb);
            sleepConf.SetInt(MRJobConfig.TaskLogBackups, 3);
            sleepConf.Set(MRJobConfig.MrAmLogLevel, Level.All.ToString());
            long amLogKb = 7;

            sleepConf.SetLong(MRJobConfig.MrAmLogKb, amLogKb);
            sleepConf.SetInt(MRJobConfig.MrAmLogBackups, 7);
            sleepJob.SetConf(sleepConf);
            Job job = sleepJob.CreateJob(1, 0, 1L, 100, 0L, 0);

            job.SetJarByClass(typeof(SleepJob));
            job.AddFileToClassPath(AppJar);
            // The AppMaster jar itself.
            job.WaitForCompletion(true);
            JobId         jobId       = TypeConverter.ToYarn(job.GetJobID());
            ApplicationId appID       = jobId.GetAppId();
            int           pollElapsed = 0;

            while (true)
            {
                Sharpen.Thread.Sleep(1000);
                pollElapsed += 1000;
                if (TerminalRmAppStates.Contains(mrCluster.GetResourceManager().GetRMContext().GetRMApps
                                                     ()[appID].GetState()))
                {
                    break;
                }
                if (pollElapsed >= 60000)
                {
                    Log.Warn("application did not reach terminal state within 60 seconds");
                    break;
                }
            }
            NUnit.Framework.Assert.AreEqual(RMAppState.Finished, mrCluster.GetResourceManager
                                                ().GetRMContext().GetRMApps()[appID].GetState());
            // Job finished, verify logs
            //
            string appIdStr    = appID.ToString();
            string appIdSuffix = Sharpen.Runtime.Substring(appIdStr, "application_".Length, appIdStr
                                                           .Length);
            string containerGlob = "container_" + appIdSuffix + "_*_*";
            string syslogGlob    = appIdStr + Path.Separator + containerGlob + Path.Separator +
                                   TaskLog.LogName.Syslog;
            int numAppMasters = 0;
            int numMapTasks   = 0;

            for (int i = 0; i < NumNodeMgrs; i++)
            {
                Configuration nmConf = mrCluster.GetNodeManager(i).GetConfig();
                foreach (string logDir in nmConf.GetTrimmedStrings(YarnConfiguration.NmLogDirs))
                {
                    Path absSyslogGlob = new Path(logDir + Path.Separator + syslogGlob);
                    Log.Info("Checking for glob: " + absSyslogGlob);
                    FileStatus[] syslogs = localFs.GlobStatus(absSyslogGlob);
                    foreach (FileStatus slog in syslogs)
                    {
                        bool foundAppMaster         = job.IsUber();
                        Path containerPathComponent = slog.GetPath().GetParent();
                        if (!foundAppMaster)
                        {
                            ContainerId cid = ConverterUtils.ToContainerId(containerPathComponent.GetName());
                            foundAppMaster = ((cid.GetContainerId() & ContainerId.ContainerIdBitmask) == 1);
                        }
                        FileStatus[] sysSiblings = localFs.GlobStatus(new Path(containerPathComponent, TaskLog.LogName
                                                                               .Syslog + "*"));
                        // sort to ensure for i > 0 sysSiblings[i] == "syslog.i"
                        Arrays.Sort(sysSiblings);
                        if (foundAppMaster)
                        {
                            numAppMasters++;
                        }
                        else
                        {
                            numMapTasks++;
                        }
                        if (foundAppMaster)
                        {
                            NUnit.Framework.Assert.AreSame("Unexpected number of AM sylog* files", sleepConf.
                                                           GetInt(MRJobConfig.MrAmLogBackups, 0) + 1, sysSiblings.Length);
                            NUnit.Framework.Assert.IsTrue("AM syslog.1 length kb should be >= " + amLogKb, sysSiblings
                                                          [1].GetLen() >= amLogKb * 1024);
                        }
                        else
                        {
                            NUnit.Framework.Assert.AreSame("Unexpected number of MR task sylog* files", sleepConf
                                                           .GetInt(MRJobConfig.TaskLogBackups, 0) + 1, sysSiblings.Length);
                            NUnit.Framework.Assert.IsTrue("MR syslog.1 length kb should be >= " + userLogKb,
                                                          sysSiblings[1].GetLen() >= userLogKb * 1024);
                        }
                    }
                }
            }
            // Make sure we checked non-empty set
            //
            NUnit.Framework.Assert.AreEqual("No AppMaster log found!", 1, numAppMasters);
            if (sleepConf.GetBoolean(MRJobConfig.JobUbertaskEnable, false))
            {
                NUnit.Framework.Assert.AreEqual("MapTask log with uber found!", 0, numMapTasks);
            }
            else
            {
                NUnit.Framework.Assert.AreEqual("No MapTask log found!", 1, numMapTasks);
            }
        }
コード例 #22
0
        /// <exception cref="System.IO.IOException"/>
        private static bool GetFlag(JobConf conf, string flag)
        {
            FileSystem fs = FileSystem.Get(conf);

            return(fs.Exists(new Path(GetFlagDir(conf.GetBoolean("localFS", true)), flag)));
        }
コード例 #23
0
 public virtual void Configure(JobConf job)
 {
     SetConf(job);
     fastCheck = job.GetBoolean("fs.test.fastCheck", false);
 }