Esempio n. 1
0
        public virtual void TestInMemoryAndOnDiskMerger()
        {
            JobID         jobId     = new JobID("a", 0);
            TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 0)
                                                        , 0);
            TaskAttemptID                 mapId1       = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 1), 0);
            TaskAttemptID                 mapId2       = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 2), 0);
            LocalDirAllocator             lda          = new LocalDirAllocator(MRConfig.LocalDir);
            MergeManagerImpl <Text, Text> mergeManager = new MergeManagerImpl <Text, Text>(reduceId1
                                                                                           , jobConf, fs, lda, Reporter.Null, null, null, null, null, null, null, null, new
                                                                                           Progress(), new MROutputFiles());
            // write map outputs
            IDictionary <string, string> map1 = new SortedDictionary <string, string>();

            map1["apple"]  = "disgusting";
            map1["carrot"] = "delicious";
            IDictionary <string, string> map2 = new SortedDictionary <string, string>();

            map1["banana"] = "pretty good";
            byte[] mapOutputBytes1 = WriteMapOutput(conf, map1);
            byte[] mapOutputBytes2 = WriteMapOutput(conf, map2);
            InMemoryMapOutput <Text, Text> mapOutput1 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId1, mergeManager, mapOutputBytes1.Length, null, true);
            InMemoryMapOutput <Text, Text> mapOutput2 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId2, mergeManager, mapOutputBytes2.Length, null, true);

            System.Array.Copy(mapOutputBytes1, 0, mapOutput1.GetMemory(), 0, mapOutputBytes1.
                              Length);
            System.Array.Copy(mapOutputBytes2, 0, mapOutput2.GetMemory(), 0, mapOutputBytes2.
                              Length);
            // create merger and run merge
            MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger = mergeManager
                                                                                      .CreateInMemoryMerger();
            IList <InMemoryMapOutput <Text, Text> > mapOutputs1 = new AList <InMemoryMapOutput <Text
                                                                                                , Text> >();

            mapOutputs1.AddItem(mapOutput1);
            mapOutputs1.AddItem(mapOutput2);
            inMemoryMerger.Merge(mapOutputs1);
            NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count);
            TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 3)
                                                        , 0);
            TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 4), 0);
            TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 5), 0);
            // write map outputs
            IDictionary <string, string> map3 = new SortedDictionary <string, string>();

            map3["apple"]  = "awesome";
            map3["carrot"] = "amazing";
            IDictionary <string, string> map4 = new SortedDictionary <string, string>();

            map4["banana"] = "bla";
            byte[] mapOutputBytes3 = WriteMapOutput(conf, map3);
            byte[] mapOutputBytes4 = WriteMapOutput(conf, map4);
            InMemoryMapOutput <Text, Text> mapOutput3 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId3, mergeManager, mapOutputBytes3.Length, null, true);
            InMemoryMapOutput <Text, Text> mapOutput4 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId4, mergeManager, mapOutputBytes4.Length, null, true);

            System.Array.Copy(mapOutputBytes3, 0, mapOutput3.GetMemory(), 0, mapOutputBytes3.
                              Length);
            System.Array.Copy(mapOutputBytes4, 0, mapOutput4.GetMemory(), 0, mapOutputBytes4.
                              Length);
            //    // create merger and run merge
            MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger2 = mergeManager
                                                                                       .CreateInMemoryMerger();
            IList <InMemoryMapOutput <Text, Text> > mapOutputs2 = new AList <InMemoryMapOutput <Text
                                                                                                , Text> >();

            mapOutputs2.AddItem(mapOutput3);
            mapOutputs2.AddItem(mapOutput4);
            inMemoryMerger2.Merge(mapOutputs2);
            NUnit.Framework.Assert.AreEqual(2, mergeManager.onDiskMapOutputs.Count);
            IList <MergeManagerImpl.CompressAwarePath> paths = new AList <MergeManagerImpl.CompressAwarePath
                                                                          >();
            IEnumerator <MergeManagerImpl.CompressAwarePath> iterator = mergeManager.onDiskMapOutputs
                                                                        .GetEnumerator();
            IList <string> keys   = new AList <string>();
            IList <string> values = new AList <string>();

            while (iterator.HasNext())
            {
                MergeManagerImpl.CompressAwarePath next = iterator.Next();
                ReadOnDiskMapOutput(conf, fs, next, keys, values);
                paths.AddItem(next);
            }
            NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "banana", "carrot",
                                                                "apple", "banana", "carrot"));
            NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "bla", "amazing"
                                                                  , "disgusting", "pretty good", "delicious"));
            mergeManager.Close();
            mergeManager = new MergeManagerImpl <Text, Text>(reduceId2, jobConf, fs, lda, Reporter
                                                             .Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles
                                                                 ());
            MergeThread <MergeManagerImpl.CompressAwarePath, Text, Text> onDiskMerger = mergeManager
                                                                                        .CreateOnDiskMerger();

            onDiskMerger.Merge(paths);
            NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count);
            keys   = new AList <string>();
            values = new AList <string>();
            ReadOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.GetEnumerator().Next(
                                    ), keys, values);
            NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "apple", "banana", "banana"
                                                                , "carrot", "carrot"));
            NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "disgusting", "pretty good"
                                                                  , "bla", "amazing", "delicious"));
            mergeManager.Close();
            NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMapOutputs.Count);
            NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMergedMapOutputs.Count);
            NUnit.Framework.Assert.AreEqual(0, mergeManager.onDiskMapOutputs.Count);
        }
Esempio n. 2
0
        public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS
                                , LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec codec
                                , Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector, Counters.Counter
                                spilledRecordsCounter, Counters.Counter reduceCombineInputCounter, Counters.Counter
                                mergedMapOutputsCounter, ExceptionReporter exceptionReporter, Progress mergePhase
                                , MapOutputFile mapOutputFile)
        {
            /* Maximum percentage of the in-memory limit that a single shuffle can
             * consume*/
            this.reduceId                  = reduceId;
            this.jobConf                   = jobConf;
            this.localDirAllocator         = localDirAllocator;
            this.exceptionReporter         = exceptionReporter;
            this.reporter                  = reporter;
            this.codec                     = codec;
            this.combinerClass             = combinerClass;
            this.combineCollector          = combineCollector;
            this.reduceCombineInputCounter = reduceCombineInputCounter;
            this.spilledRecordsCounter     = spilledRecordsCounter;
            this.mergedMapOutputsCounter   = mergedMapOutputsCounter;
            this.mapOutputFile             = mapOutputFile;
            this.mapOutputFile.SetConf(jobConf);
            this.localFS = localFS;
            this.rfs     = ((LocalFileSystem)localFS).GetRaw();
            float maxInMemCopyUse = jobConf.GetFloat(MRJobConfig.ShuffleInputBufferPercent, MRJobConfig
                                                     .DefaultShuffleInputBufferPercent);

            if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0)
            {
                throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleInputBufferPercent
                                            + ": " + maxInMemCopyUse);
            }
            // Allow unit tests to fix Runtime memory
            this.memoryLimit = (long)(jobConf.GetLong(MRJobConfig.ReduceMemoryTotalBytes, Runtime
                                                      .GetRuntime().MaxMemory()) * maxInMemCopyUse);
            this.ioSortFactor = jobConf.GetInt(MRJobConfig.IoSortFactor, 100);
            float singleShuffleMemoryLimitPercent = jobConf.GetFloat(MRJobConfig.ShuffleMemoryLimitPercent
                                                                     , DefaultShuffleMemoryLimitPercent);

            if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent >
                1.0f)
            {
                throw new ArgumentException("Invalid value for " + MRJobConfig.ShuffleMemoryLimitPercent
                                            + ": " + singleShuffleMemoryLimitPercent);
            }
            usedMemory   = 0L;
            commitMemory = 0L;
            this.maxSingleShuffleLimit = (long)(memoryLimit * singleShuffleMemoryLimitPercent
                                                );
            this.memToMemMergeOutputsThreshold = jobConf.GetInt(MRJobConfig.ReduceMemtomemThreshold
                                                                , ioSortFactor);
            this.mergeThreshold = (long)(this.memoryLimit * jobConf.GetFloat(MRJobConfig.ShuffleMergePercent
                                                                             , 0.90f));
            Log.Info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit="
                     + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor="
                     + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold
                     );
            if (this.maxSingleShuffleLimit >= this.mergeThreshold)
            {
                throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold "
                                           + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + "mergeThreshold: " +
                                           this.mergeThreshold);
            }
            bool allowMemToMemMerge = jobConf.GetBoolean(MRJobConfig.ReduceMemtomemEnabled, false
                                                         );

            if (allowMemToMemMerge)
            {
                this.memToMemMerger = new MergeManagerImpl.IntermediateMemoryToMemoryMerger(this,
                                                                                            this, memToMemMergeOutputsThreshold);
                this.memToMemMerger.Start();
            }
            else
            {
                this.memToMemMerger = null;
            }
            this.inMemoryMerger = CreateInMemoryMerger();
            this.inMemoryMerger.Start();
            this.onDiskMerger = new MergeManagerImpl.OnDiskMerger(this, this);
            this.onDiskMerger.Start();
            this.mergePhase = mergePhase;
        }
Esempio n. 3
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.URISyntaxException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestOnDiskMerger()
        {
            JobConf jobConf    = new JobConf();
            int     SortFactor = 5;

            jobConf.SetInt(MRJobConfig.IoSortFactor, SortFactor);
            MapOutputFile mapOutputFile = new MROutputFiles();
            FileSystem    fs            = FileSystem.GetLocal(jobConf);
            MergeManagerImpl <IntWritable, IntWritable> manager = new MergeManagerImpl <IntWritable
                                                                                        , IntWritable>(null, jobConf, fs, null, null, null, null, null, null, null, null
                                                                                                       , null, null, mapOutputFile);
            MergeThread <MapOutput <IntWritable, IntWritable>, IntWritable, IntWritable> onDiskMerger
                = (MergeThread <MapOutput <IntWritable, IntWritable>, IntWritable, IntWritable>)Whitebox
                  .GetInternalState(manager, "onDiskMerger");
            int mergeFactor = (int)Whitebox.GetInternalState(onDiskMerger, "mergeFactor");

            // make sure the io.sort.factor is set properly
            NUnit.Framework.Assert.AreEqual(mergeFactor, SortFactor);
            // Stop the onDiskMerger thread so that we can intercept the list of files
            // waiting to be merged.
            onDiskMerger.Suspend();
            //Send the list of fake files waiting to be merged
            Random rand = new Random();

            for (int i = 0; i < 2 * SortFactor; ++i)
            {
                Path path = new Path("somePath");
                MergeManagerImpl.CompressAwarePath cap = new MergeManagerImpl.CompressAwarePath(path
                                                                                                , 1l, rand.Next());
                manager.CloseOnDiskFile(cap);
            }
            //Check that the files pending to be merged are in sorted order.
            List <IList <MergeManagerImpl.CompressAwarePath> > pendingToBeMerged = (List <IList <MergeManagerImpl.CompressAwarePath
                                                                                                 > >)Whitebox.GetInternalState(onDiskMerger, "pendingToBeMerged");

            NUnit.Framework.Assert.IsTrue("No inputs were added to list pending to merge", pendingToBeMerged
                                          .Count > 0);
            for (int i_1 = 0; i_1 < pendingToBeMerged.Count; ++i_1)
            {
                IList <MergeManagerImpl.CompressAwarePath> inputs = pendingToBeMerged[i_1];
                for (int j = 1; j < inputs.Count; ++j)
                {
                    NUnit.Framework.Assert.IsTrue("Not enough / too many inputs were going to be merged"
                                                  , inputs.Count > 0 && inputs.Count <= SortFactor);
                    NUnit.Framework.Assert.IsTrue("Inputs to be merged were not sorted according to size: "
                                                  , inputs[j].GetCompressedSize() >= inputs[j - 1].GetCompressedSize());
                }
            }
        }