Пример #1
0
        /// <summary>
        /// Ensure that M/R 1.x applications can get and set task virtual memory with
        /// old property names
        /// </summary>
        public virtual void TestDeprecatedPropertyNameForTaskVmem()
        {
            JobConf configuration = new JobConf();

            configuration.SetLong(JobConf.MapredJobMapMemoryMbProperty, 1024);
            configuration.SetLong(JobConf.MapredJobReduceMemoryMbProperty, 1024);
            NUnit.Framework.Assert.AreEqual(1024, configuration.GetMemoryForMapTask());
            NUnit.Framework.Assert.AreEqual(1024, configuration.GetMemoryForReduceTask());
            // Make sure new property names aren't broken by the old ones
            configuration.SetLong(JobConf.MapreduceJobMapMemoryMbProperty, 1025);
            configuration.SetLong(JobConf.MapreduceJobReduceMemoryMbProperty, 1025);
            NUnit.Framework.Assert.AreEqual(1025, configuration.GetMemoryForMapTask());
            NUnit.Framework.Assert.AreEqual(1025, configuration.GetMemoryForReduceTask());
            configuration.SetMemoryForMapTask(2048);
            configuration.SetMemoryForReduceTask(2048);
            NUnit.Framework.Assert.AreEqual(2048, configuration.GetLong(JobConf.MapredJobMapMemoryMbProperty
                                                                        , -1));
            NUnit.Framework.Assert.AreEqual(2048, configuration.GetLong(JobConf.MapredJobReduceMemoryMbProperty
                                                                        , -1));
            // Make sure new property names aren't broken by the old ones
            NUnit.Framework.Assert.AreEqual(2048, configuration.GetLong(JobConf.MapreduceJobMapMemoryMbProperty
                                                                        , -1));
            NUnit.Framework.Assert.AreEqual(2048, configuration.GetLong(JobConf.MapreduceJobReduceMemoryMbProperty
                                                                        , -1));
        }
Пример #2
0
        /// <summary>When no input dir is specified, generate random data.</summary>
        /// <exception cref="System.IO.IOException"/>
        protected internal static void ConfRandom(JobConf job)
        {
            // from RandomWriter
            job.SetInputFormat(typeof(GenericMRLoadGenerator.RandomInputFormat));
            job.SetMapperClass(typeof(GenericMRLoadGenerator.RandomMapOutput));
            ClusterStatus cluster               = new JobClient(job).GetClusterStatus();
            int           numMapsPerHost        = job.GetInt(RandomTextWriter.MapsPerHost, 10);
            long          numBytesToWritePerMap = job.GetLong(RandomTextWriter.BytesPerMap, 1 * 1024 *
                                                              1024 * 1024);

            if (numBytesToWritePerMap == 0)
            {
                throw new IOException("Cannot have " + RandomTextWriter.BytesPerMap + " set to 0"
                                      );
            }
            long totalBytesToWrite = job.GetLong(RandomTextWriter.TotalBytes, numMapsPerHost
                                                 * numBytesToWritePerMap * cluster.GetTaskTrackers());
            int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);

            if (numMaps == 0 && totalBytesToWrite > 0)
            {
                numMaps = 1;
                job.SetLong(RandomTextWriter.BytesPerMap, totalBytesToWrite);
            }
            job.SetNumMapTasks(numMaps);
        }
Пример #3
0
        // fill keys, values with ~1.5 blocks for block-compressed seq fill
        private static void FillBlocks(JobConf conf)
        {
            Random r    = new Random();
            long   seed = conf.GetLong("filebench.seed", -1);

            if (seed > 0)
            {
                r.SetSeed(seed);
            }
            int            keylen = conf.GetInt("filebench.key.words", 5);
            int            vallen = conf.GetInt("filebench.val.words", 20);
            int            acc    = (3 * conf.GetInt("io.seqfile.compress.blocksize", 1000000)) >> 1;
            AList <string> k      = new AList <string>();
            AList <string> v      = new AList <string>();

            for (int i = 0; acc > 0; ++i)
            {
                string s = GenerateSentence(r, keylen);
                acc -= s.Length;
                k.AddItem(s);
                s    = GenerateSentence(r, vallen);
                acc -= s.Length;
                v.AddItem(s);
            }
            keys   = Sharpen.Collections.ToArray(k, new string[0]);
            values = Sharpen.Collections.ToArray(v, new string[0]);
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestNumInputs()
        {
            JobConf job = new JobConf(conf);

            dfs = NewDFSCluster(job);
            FileSystem fs = dfs.GetFileSystem();

            System.Console.Out.WriteLine("FileSystem " + fs.GetUri());
            Path   inputDir     = new Path("/foo/");
            int    numFiles     = 10;
            string fileNameBase = "part-0000";

            for (int i = 0; i < numFiles; ++i)
            {
                CreateInputs(fs, inputDir, fileNameBase + i.ToString());
            }
            CreateInputs(fs, inputDir, "_meta");
            CreateInputs(fs, inputDir, "_temp");
            // split it using a file input format
            TextInputFormat.AddInputPath(job, inputDir);
            TextInputFormat inFormat = new TextInputFormat();

            inFormat.Configure(job);
            InputSplit[] splits = inFormat.GetSplits(job, 1);
            NUnit.Framework.Assert.AreEqual("Expected value of " + FileInputFormat.NumInputFiles
                                            , numFiles, job.GetLong(FileInputFormat.NumInputFiles, 0));
        }
Пример #5
0
 public ShuffleSchedulerImpl(JobConf job, TaskStatus status, TaskAttemptID reduceId
                             , ExceptionReporter reporter, Progress progress, Counters.Counter shuffledMapsCounter
                             , Counters.Counter reduceShuffleBytes, Counters.Counter failedShuffleCounter)
 {
     referee                   = new ShuffleSchedulerImpl.Referee(this);
     totalMaps                 = job.GetNumMapTasks();
     abortFailureLimit         = Math.Max(30, totalMaps / 10);
     copyTimeTracker           = new ShuffleSchedulerImpl.CopyTimeTracker();
     remainingMaps             = totalMaps;
     finishedMaps              = new bool[remainingMaps];
     this.reporter             = reporter;
     this.status               = status;
     this.reduceId             = reduceId;
     this.progress             = progress;
     this.shuffledMapsCounter  = shuffledMapsCounter;
     this.reduceShuffleBytes   = reduceShuffleBytes;
     this.failedShuffleCounter = failedShuffleCounter;
     this.startTime            = Time.MonotonicNow();
     lastProgressTime          = startTime;
     referee.Start();
     this.maxFailedUniqueFetches          = Math.Min(totalMaps, 5);
     this.maxFetchFailuresBeforeReporting = job.GetInt(MRJobConfig.ShuffleFetchFailures
                                                       , ReportFailureLimit);
     this.reportReadErrorImmediately = job.GetBoolean(MRJobConfig.ShuffleNotifyReaderror
                                                      , true);
     this.maxDelay = job.GetLong(MRJobConfig.MaxShuffleFetchRetryDelay, MRJobConfig.DefaultMaxShuffleFetchRetryDelay
                                 );
     this.maxHostFailures = job.GetInt(MRJobConfig.MaxShuffleFetchHostFailures, MRJobConfig
                                       .DefaultMaxShuffleFetchHostFailures);
 }
Пример #6
0
 public override void Configure(JobConf job)
 {
     numBytesToWrite = job.GetLong("test.tmb.bytes_per_map", 128 * 1024 * 1024);
     minKeySize      = job.GetInt("test.tmb.min_key", 10);
     keySizeRange    = job.GetInt("test.tmb.max_key", 10) - minKeySize;
     minValueSize    = job.GetInt("test.tmb.min_value", 10);
     valueSizeRange  = job.GetInt("test.tmb.max_value", 10) - minValueSize;
 }
Пример #7
0
 public override void Configure(JobConf job)
 {
     bytesToWrite = job.GetLong(RandomTextWriter.BytesPerMap, 1 * 1024 * 1024 * 1024);
     keymin       = job.GetInt(RandomTextWriter.MinKey, 5);
     keymax       = job.GetInt(RandomTextWriter.MaxKey, 10);
     valmin       = job.GetInt(RandomTextWriter.MinValue, 5);
     valmax       = job.GetInt(RandomTextWriter.MaxValue, 10);
 }
        /// <exception cref="System.Exception"/>
        public virtual void TestLocality()
        {
            JobConf job = new JobConf(conf);

            dfs = NewDFSCluster(job);
            FileSystem fs = dfs.GetFileSystem();

            System.Console.Out.WriteLine("FileSystem " + fs.GetUri());
            Path   inputDir = new Path("/foo/");
            string fileName = "part-0000";

            CreateInputs(fs, inputDir, fileName);
            // split it using a file input format
            TextInputFormat.AddInputPath(job, inputDir);
            TextInputFormat inFormat = new TextInputFormat();

            inFormat.Configure(job);
            InputSplit[] splits     = inFormat.GetSplits(job, 1);
            FileStatus   fileStatus = fs.GetFileStatus(new Path(inputDir, fileName));

            BlockLocation[] locations = fs.GetFileBlockLocations(fileStatus, 0, fileStatus.GetLen
                                                                     ());
            System.Console.Out.WriteLine("Made splits");
            // make sure that each split is a block and the locations match
            for (int i = 0; i < splits.Length; ++i)
            {
                FileSplit fileSplit = (FileSplit)splits[i];
                System.Console.Out.WriteLine("File split: " + fileSplit);
                foreach (string h in fileSplit.GetLocations())
                {
                    System.Console.Out.WriteLine("Location: " + h);
                }
                System.Console.Out.WriteLine("Block: " + locations[i]);
                NUnit.Framework.Assert.AreEqual(locations[i].GetOffset(), fileSplit.GetStart());
                NUnit.Framework.Assert.AreEqual(locations[i].GetLength(), fileSplit.GetLength());
                string[] blockLocs = locations[i].GetHosts();
                string[] splitLocs = fileSplit.GetLocations();
                NUnit.Framework.Assert.AreEqual(2, blockLocs.Length);
                NUnit.Framework.Assert.AreEqual(2, splitLocs.Length);
                NUnit.Framework.Assert.IsTrue((blockLocs[0].Equals(splitLocs[0]) && blockLocs[1].
                                               Equals(splitLocs[1])) || (blockLocs[1].Equals(splitLocs[0]) && blockLocs[0].Equals
                                                                             (splitLocs[1])));
            }
            NUnit.Framework.Assert.AreEqual("Expected value of " + FileInputFormat.NumInputFiles
                                            , 1, job.GetLong(FileInputFormat.NumInputFiles, 0));
        }
Пример #9
0
        /// <exception cref="System.IO.IOException"/>
        internal static long WriteBench(JobConf conf)
        {
            // OutputFormat instantiation
            long filelen = conf.GetLong("filebench.file.bytes", 5 * 1024 * 1024 * 1024);

            Org.Apache.Hadoop.IO.Text key = new Org.Apache.Hadoop.IO.Text();
            Org.Apache.Hadoop.IO.Text val = new Org.Apache.Hadoop.IO.Text();
            string fn   = conf.Get("test.filebench.name", string.Empty);
            Path   outd = FileOutputFormat.GetOutputPath(conf);

            conf.Set("mapred.work.output.dir", outd.ToString());
            OutputFormat outf = conf.GetOutputFormat();
            RecordWriter <Org.Apache.Hadoop.IO.Text, Org.Apache.Hadoop.IO.Text> rw = outf.GetRecordWriter
                                                                                         (outd.GetFileSystem(conf), conf, fn, Reporter.Null);

            try
            {
                long     acc   = 0L;
                DateTime start = new DateTime();
                for (int i = 0; acc < filelen; ++i)
                {
                    i %= keys.Length;
                    key.Set(keys[i]);
                    val.Set(values[i]);
                    rw.Write(key, val);
                    acc += keys[i].Length;
                    acc += values[i].Length;
                }
                DateTime end = new DateTime();
                return(end.GetTime() - start.GetTime());
            }
            finally
            {
                rw.Close(Reporter.Null);
            }
        }
Пример #10
0
 public override void Configure(JobConf conf)
 {
     loader = new TestJobCounters.MemoryLoader(conf.GetLong(TargetValue, -1));
 }
 /// <summary>get the input file name.</summary>
 /// <param name="job">a job configuration object</param>
 public override void Configure(JobConf job)
 {
     base.Configure(job);
     maxNumItems = job.GetLong("aggregate.max.num.unique.values", long.MaxValue);
 }
Пример #12
0
        public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS
                                , LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec
                                , Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter
                                spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter
                                mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase
                                , MapOutputFile mapOutputFile)
        {
            /* Maximum percentage of the in-memory limit that a single shuffle can
             * consume*/
            this.reduceId                  = reduceId;
            this.jobConf                   = jobConf;
            this.localDirAllocator         = localDirAllocator;
            this.exceptionReporter         = exceptionReporter;
            this.reporter                  = reporter;
            this.codec                     = codec;
            this.combinerClass             = combinerClass;
            this.combineCollector          = combineCollector;
            this.reduceCombineInputCounter = reduceCombineInputCounter;
            this.spilledRecordsCounter     = spilledRecordsCounter;
            this.mergedMapOutputsCounter   = mergedMapOutputsCounter;
            this.mapOutputFile             = mapOutputFile;
            this.mapOutputFile.SetConf(jobConf);
            this.localFS = localFS;
            this.rfs     = ((LocalFileSystem)localFS).GetRaw();
            float maxInMemCopyUse = jobConf.GetFloat(MRJobConfig.ShuffleInputBufferPercent, MRJobConfig
                                                     .DefaultShuffleInputBufferPercent);

            if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0)
            {
                throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleInputBufferPercent
                                            + ": " + maxInMemCopyUse);
            }
            // Allow unit tests to fix Runtime memory
            this.memoryLimit = (long)(jobConf.GetLong(MRJobConfig.ReduceMemoryTotalBytes, Runtime
                                                      .GetRuntime().MaxMemory()) * maxInMemCopyUse);
            this.ioSortFactor = jobConf.GetInt(MRJobConfig.IoSortFactor, 100);
            float singleShuffleMemoryLimitPercent = jobConf.GetFloat(MRJobConfig.ShuffleMemoryLimitPercent
                                                                     , DefaultShuffleMemoryLimitPercent);

            if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent >
                1.0f)
            {
                throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleMemoryLimitPercent
                                            + ": " + singleShuffleMemoryLimitPercent);
            }
            usedMemory   = 0L;
            commitMemory = 0L;
            this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent
                                                );
            this.memToMemMergeOutputsThreshold = jobConf.GetInt(MRJobConfig.ReduceMemtomemThreshold
                                                                , ioSortFactor);
            this.mergeThreshold = (long)(this.memoryLimit * jobConf.GetFloat(MRJobConfig.ShuffleMergePercent
                                                                             , 0.90f));
            Log.Info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit="
                     + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor="
                     + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold
                     );
            if (this.maxSingleShuffleLimit >= this.mergeThreshold)
            {
                throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold "
                                           + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " +
                                           this.mergeThreshold);
            }
            bool allowMemToMemMerge = jobConf.GetBoolean(MRJobConfig.ReduceMemtomemEnabled, false
                                                         );

            if (allowMemToMemMerge)
            {
                this.memToMemMerger = new MergeManagerImpl.IntermediateMemoryToMemoryMerger(this,
                                                                                            this, memToMemMergeOutputsThreshold);
                this.memToMemMerger.Start();
            }
            else
            {
                this.memToMemMerger = null;
            }
            this.inMemoryMerger = CreateInMemoryMerger();
            this.inMemoryMerger.Start();
            this.onDiskMerger = new MergeManagerImpl.OnDiskMerger(this, this);
            this.onDiskMerger.Start();
            this.mergePhase = mergePhase;
        }
Пример #13
0
 public override void Configure(JobConf conf)
 {
     // Mapper
     base.Configure(conf);
     skipSize = conf.GetLong("test.io.skip.size", 0);
 }
Пример #14
0
        /// <summary>
        /// Splits files returned by
        /// <see cref="FileInputFormat{K, V}.ListStatus(JobConf)"/>
        /// when
        /// they're too big.
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        public virtual InputSplit[] GetSplits(JobConf job, int numSplits)
        {
            StopWatch sw = new StopWatch().Start();

            FileStatus[] files = ListStatus(job);
            // Save the number of input files for metrics/loadgen
            job.SetLong(NumInputFiles, files.Length);
            long totalSize = 0;

            // compute total size
            foreach (FileStatus file in files)
            {
                // check we have valid files
                if (file.IsDirectory())
                {
                    throw new IOException("Not a file: " + file.GetPath());
                }
                totalSize += file.GetLen();
            }
            long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
            long minSize  = Math.Max(job.GetLong(FileInputFormat.SplitMinsize, 1), minSplitSize
                                     );
            // generate splits
            AList <FileSplit> splits     = new AList <FileSplit>(numSplits);
            NetworkTopology   clusterMap = new NetworkTopology();

            foreach (FileStatus file_1 in files)
            {
                Path path   = file_1.GetPath();
                long length = file_1.GetLen();
                if (length != 0)
                {
                    FileSystem      fs = path.GetFileSystem(job);
                    BlockLocation[] blkLocations;
                    if (file_1 is LocatedFileStatus)
                    {
                        blkLocations = ((LocatedFileStatus)file_1).GetBlockLocations();
                    }
                    else
                    {
                        blkLocations = fs.GetFileBlockLocations(file_1, 0, length);
                    }
                    if (IsSplitable(fs, path))
                    {
                        long blockSize      = file_1.GetBlockSize();
                        long splitSize      = ComputeSplitSize(goalSize, minSize, blockSize);
                        long bytesRemaining = length;
                        while (((double)bytesRemaining) / splitSize > SplitSlop)
                        {
                            string[][] splitHosts = GetSplitHostsAndCachedHosts(blkLocations, length - bytesRemaining
                                                                                , splitSize, clusterMap);
                            splits.AddItem(MakeSplit(path, length - bytesRemaining, splitSize, splitHosts[0],
                                                     splitHosts[1]));
                            bytesRemaining -= splitSize;
                        }
                        if (bytesRemaining != 0)
                        {
                            string[][] splitHosts = GetSplitHostsAndCachedHosts(blkLocations, length - bytesRemaining
                                                                                , bytesRemaining, clusterMap);
                            splits.AddItem(MakeSplit(path, length - bytesRemaining, bytesRemaining, splitHosts
                                                     [0], splitHosts[1]));
                        }
                    }
                    else
                    {
                        string[][] splitHosts = GetSplitHostsAndCachedHosts(blkLocations, 0, length, clusterMap
                                                                            );
                        splits.AddItem(MakeSplit(path, 0, length, splitHosts[0], splitHosts[1]));
                    }
                }
                else
                {
                    //Create empty hosts array for zero length files
                    splits.AddItem(MakeSplit(path, 0, length, new string[0]));
                }
            }
            sw.Stop();
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Total # of splits generated by getSplits: " + splits.Count + ", TimeTaken: "
                          + sw.Now(TimeUnit.Milliseconds));
            }
            return(Sharpen.Collections.ToArray(splits, new FileSplit[splits.Count]));
        }