Пример #1
0
 /// <exception cref="System.IO.IOException"/>
 public RawKVIteratorReader(MergeManagerImpl <K, V> _enclosing, RawKeyValueIterator
                            kvIter, long size)
     : base(null, null, size, null, this._enclosing.spilledRecordsCounter)
 {
     this._enclosing = _enclosing;
     this.kvIter     = kvIter;
 }
Пример #2
0
        public virtual void Setup()
        {
            // mocked generics
            Log.Info(">>>> " + name.GetMethodName());
            job = new JobConf();
            job.SetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, false);
            jobWithRetry = new JobConf();
            jobWithRetry.SetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, true);
            id         = TaskAttemptID.ForName("attempt_0_1_r_1_1");
            ss         = Org.Mockito.Mockito.Mock <ShuffleSchedulerImpl>();
            mm         = Org.Mockito.Mockito.Mock <MergeManagerImpl>();
            r          = Org.Mockito.Mockito.Mock <Reporter>();
            metrics    = Org.Mockito.Mockito.Mock <ShuffleClientMetrics>();
            except     = Org.Mockito.Mockito.Mock <ExceptionReporter>();
            key        = JobTokenSecretManager.CreateSecretKey(new byte[] { 0, 0, 0, 0 });
            connection = Org.Mockito.Mockito.Mock <HttpURLConnection>();
            allErrs    = Org.Mockito.Mockito.Mock <Counters.Counter>();
            Org.Mockito.Mockito.When(r.GetCounter(Matchers.AnyString(), Matchers.AnyString())
                                     ).ThenReturn(allErrs);
            AList <TaskAttemptID> maps = new AList <TaskAttemptID>(1);

            maps.AddItem(map1ID);
            maps.AddItem(map2ID);
            Org.Mockito.Mockito.When(ss.GetMapsForHost(host)).ThenReturn(maps);
        }
Пример #3
0
        public virtual void TestLargeMemoryLimits()
        {
            JobConf conf = new JobConf();

            // Xmx in production
            conf.SetLong(MRJobConfig.ReduceMemoryTotalBytes, 8L * 1024 * 1024 * 1024);
            // M1 = Xmx fraction for map outputs
            conf.SetFloat(MRJobConfig.ShuffleInputBufferPercent, 1.0f);
            // M2 = max M1 fraction for a single maple output
            conf.SetFloat(MRJobConfig.ShuffleMemoryLimitPercent, 0.95f);
            // M3 = M1 fraction at which in memory merge is triggered
            conf.SetFloat(MRJobConfig.ShuffleMergePercent, 1.0f);
            // M4 = M1 fraction of map outputs remaining in memory for a reduce
            conf.SetFloat(MRJobConfig.ReduceInputBufferPercent, 1.0f);
            MergeManagerImpl <Text, Text> mgr = new MergeManagerImpl <Text, Text>(null, conf, Org.Mockito.Mockito.Mock
                                                                                  <LocalFileSystem>(), null, null, null, null, null, null, null, null, null, null,
                                                                                  new MROutputFiles());

            NUnit.Framework.Assert.IsTrue("Large shuffle area unusable: " + mgr.memoryLimit,
                                          mgr.memoryLimit > int.MaxValue);
            long maxInMemReduce = mgr.GetMaxInMemReduceLimit();

            NUnit.Framework.Assert.IsTrue("Large in-memory reduce area unusable: " + maxInMemReduce
                                          , maxInMemReduce > int.MaxValue);
        }
Пример #4
0
 public MergeThread(MergeManagerImpl <K, V> manager, int mergeFactor, ExceptionReporter
                    reporter)
 {
     this.pendingToBeMerged = new List <IList <T> >();
     this.manager           = manager;
     this.mergeFactor       = mergeFactor;
     this.reporter          = reporter;
 }
Пример #5
0
 public OnDiskMerger(MergeManagerImpl <K, V> _enclosing, MergeManagerImpl <K, V> manager
                     )
     : base(manager, this._enclosing.ioSortFactor, this._enclosing.exceptionReporter)
 {
     this._enclosing = _enclosing;
     this.SetName("OnDiskMerger - Thread to merge on-disk map-outputs");
     this.SetDaemon(true);
 }
Пример #6
0
 public InMemoryMerger(MergeManagerImpl <K, V> _enclosing, MergeManagerImpl <K, V> manager
                       )
     : base(manager, int.MaxValue, this._enclosing.exceptionReporter)
 {
     this._enclosing = _enclosing;
     this.SetName("InMemoryMerger - Thread to merge in-memory shuffled map-outputs");
     this.SetDaemon(true);
 }
Пример #7
0
 /// <exception cref="System.IO.IOException"/>
 public OnDiskMapOutput(TaskAttemptID mapId, TaskAttemptID reduceId, MergeManagerImpl
                        <K, V> merger, long size, JobConf conf, MapOutputFile mapOutputFile, int fetcher
                        , bool primaryMapOutput)
     : this(mapId, reduceId, merger, size, conf, mapOutputFile, fetcher, primaryMapOutput
            , FileSystem.GetLocal(conf).GetRaw(), mapOutputFile.GetInputFileForWrite(mapId.GetTaskID
                                                                                         (), size))
 {
 }
Пример #8
0
 public IntermediateMemoryToMemoryMerger(MergeManagerImpl <K, V> _enclosing, MergeManagerImpl
                                         <K, V> manager, int mergeFactor)
     : base(manager, mergeFactor, this._enclosing.exceptionReporter)
 {
     this._enclosing = _enclosing;
     this.SetName("InMemoryMerger - Thread to do in-memory merge of in-memory " + "shuffled map-outputs"
                  );
     this.SetDaemon(true);
 }
Пример #9
0
 internal OnDiskMapOutput(TaskAttemptID mapId, TaskAttemptID reduceId, MergeManagerImpl
                          <K, V> merger, long size, JobConf conf, MapOutputFile mapOutputFile, int fetcher
                          , bool primaryMapOutput, FileSystem fs, Path outputPath)
     : base(mapId, size, primaryMapOutput)
 {
     this.fs         = fs;
     this.merger     = merger;
     this.outputPath = outputPath;
     tmpOutputPath   = GetTempPath(outputPath, fetcher);
     disk            = CryptoUtils.WrapIfNecessary(conf, fs.Create(tmpOutputPath));
     this.conf       = conf;
 }
Пример #10
0
 /// <exception cref="System.IO.IOException"/>
 public InMemoryReader(MergeManagerImpl <K, V> merger, TaskAttemptID taskAttemptId,
                       byte[] data, int start, int length, Configuration conf)
     : base(conf, null, length - start, null, null)
 {
     this.merger        = merger;
     this.taskAttemptId = taskAttemptId;
     buffer             = data;
     bufferSize         = (int)fileLength;
     memDataIn.Reset(buffer, start, length - start);
     this.start  = start;
     this.length = length;
 }
Пример #11
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.URISyntaxException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestOnDiskMerger()
        {
            JobConf jobConf    = new JobConf();
            int     SortFactor = 5;

            jobConf.SetInt(MRJobConfig.IoSortFactor, SortFactor);
            MapOutputFile mapOutputFile = new MROutputFiles();
            FileSystem    fs            = FileSystem.GetLocal(jobConf);
            MergeManagerImpl <IntWritable, IntWritable> manager = new MergeManagerImpl <IntWritable
                                                                                        , IntWritable>(null, jobConf, fs, null, null, null, null, null, null, null, null
                                                                                                       , null, null, mapOutputFile);
            MergeThread <MapOutput <IntWritable, IntWritable>, IntWritable, IntWritable> onDiskMerger
                = (MergeThread <MapOutput <IntWritable, IntWritable>, IntWritable, IntWritable>)Whitebox
                  .GetInternalState(manager, "onDiskMerger");
            int mergeFactor = (int)Whitebox.GetInternalState(onDiskMerger, "mergeFactor");

            // make sure the io.sort.factor is set properly
            NUnit.Framework.Assert.AreEqual(mergeFactor, SortFactor);
            // Stop the onDiskMerger thread so that we can intercept the list of files
            // waiting to be merged.
            onDiskMerger.Suspend();
            //Send the list of fake files waiting to be merged
            Random rand = new Random();

            for (int i = 0; i < 2 * SortFactor; ++i)
            {
                Path path = new Path("somePath");
                MergeManagerImpl.CompressAwarePath cap = new MergeManagerImpl.CompressAwarePath(path
                                                                                                , 1l, rand.Next());
                manager.CloseOnDiskFile(cap);
            }
            //Check that the files pending to be merged are in sorted order.
            List <IList <MergeManagerImpl.CompressAwarePath> > pendingToBeMerged = (List <IList <MergeManagerImpl.CompressAwarePath
                                                                                                 > >)Whitebox.GetInternalState(onDiskMerger, "pendingToBeMerged");

            NUnit.Framework.Assert.IsTrue("No inputs were added to list pending to merge", pendingToBeMerged
                                          .Count > 0);
            for (int i_1 = 0; i_1 < pendingToBeMerged.Count; ++i_1)
            {
                IList <MergeManagerImpl.CompressAwarePath> inputs = pendingToBeMerged[i_1];
                for (int j = 1; j < inputs.Count; ++j)
                {
                    NUnit.Framework.Assert.IsTrue("Not enough / too many inputs were going to be merged"
                                                  , inputs.Count > 0 && inputs.Count <= SortFactor);
                    NUnit.Framework.Assert.IsTrue("Inputs to be merged were not sorted according to size: "
                                                  , inputs[j].GetCompressedSize() >= inputs[j - 1].GetCompressedSize());
                }
            }
        }
Пример #12
0
 public InMemoryMapOutput(Configuration conf, TaskAttemptID mapId, MergeManagerImpl
                          <K, V> merger, int size, CompressionCodec codec, bool primaryMapOutput)
     : base(mapId, (long)size, primaryMapOutput)
 {
     // Decompression of map-outputs
     this.conf   = conf;
     this.merger = merger;
     this.codec  = codec;
     byteStream  = new BoundedByteArrayOutputStream(size);
     memory      = byteStream.GetBuffer();
     if (codec != null)
     {
         decompressor = CodecPool.GetDecompressor(codec);
     }
     else
     {
         decompressor = null;
     }
 }
Пример #13
0
        public virtual void TestInMemoryAndOnDiskMerger()
        {
            JobID         jobId     = new JobID("a", 0);
            TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 0)
                                                        , 0);
            TaskAttemptID                 mapId1       = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 1), 0);
            TaskAttemptID                 mapId2       = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 2), 0);
            LocalDirAllocator             lda          = new LocalDirAllocator(MRConfig.LocalDir);
            MergeManagerImpl <Text, Text> mergeManager = new MergeManagerImpl <Text, Text>(reduceId1
                                                                                           , jobConf, fs, lda, Reporter.Null, null, null, null, null, null, null, null, new
                                                                                           Progress(), new MROutputFiles());
            // write map outputs
            IDictionary <string, string> map1 = new SortedDictionary <string, string>();

            map1["apple"]  = "disgusting";
            map1["carrot"] = "delicious";
            IDictionary <string, string> map2 = new SortedDictionary <string, string>();

            map1["banana"] = "pretty good";
            byte[] mapOutputBytes1 = WriteMapOutput(conf, map1);
            byte[] mapOutputBytes2 = WriteMapOutput(conf, map2);
            InMemoryMapOutput <Text, Text> mapOutput1 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId1, mergeManager, mapOutputBytes1.Length, null, true);
            InMemoryMapOutput <Text, Text> mapOutput2 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId2, mergeManager, mapOutputBytes2.Length, null, true);

            System.Array.Copy(mapOutputBytes1, 0, mapOutput1.GetMemory(), 0, mapOutputBytes1.
                              Length);
            System.Array.Copy(mapOutputBytes2, 0, mapOutput2.GetMemory(), 0, mapOutputBytes2.
                              Length);
            // create merger and run merge
            MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger = mergeManager
                                                                                      .CreateInMemoryMerger();
            IList <InMemoryMapOutput <Text, Text> > mapOutputs1 = new AList <InMemoryMapOutput <Text
                                                                                                , Text> >();

            mapOutputs1.AddItem(mapOutput1);
            mapOutputs1.AddItem(mapOutput2);
            inMemoryMerger.Merge(mapOutputs1);
            NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count);
            TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 3)
                                                        , 0);
            TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 4), 0);
            TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 5), 0);
            // write map outputs
            IDictionary <string, string> map3 = new SortedDictionary <string, string>();

            map3["apple"]  = "awesome";
            map3["carrot"] = "amazing";
            IDictionary <string, string> map4 = new SortedDictionary <string, string>();

            map4["banana"] = "bla";
            byte[] mapOutputBytes3 = WriteMapOutput(conf, map3);
            byte[] mapOutputBytes4 = WriteMapOutput(conf, map4);
            InMemoryMapOutput <Text, Text> mapOutput3 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId3, mergeManager, mapOutputBytes3.Length, null, true);
            InMemoryMapOutput <Text, Text> mapOutput4 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId4, mergeManager, mapOutputBytes4.Length, null, true);

            System.Array.Copy(mapOutputBytes3, 0, mapOutput3.GetMemory(), 0, mapOutputBytes3.
                              Length);
            System.Array.Copy(mapOutputBytes4, 0, mapOutput4.GetMemory(), 0, mapOutputBytes4.
                              Length);
            //    // create merger and run merge
            MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger2 = mergeManager
                                                                                       .CreateInMemoryMerger();
            IList <InMemoryMapOutput <Text, Text> > mapOutputs2 = new AList <InMemoryMapOutput <Text
                                                                                                , Text> >();

            mapOutputs2.AddItem(mapOutput3);
            mapOutputs2.AddItem(mapOutput4);
            inMemoryMerger2.Merge(mapOutputs2);
            NUnit.Framework.Assert.AreEqual(2, mergeManager.onDiskMapOutputs.Count);
            IList <MergeManagerImpl.CompressAwarePath> paths = new AList <MergeManagerImpl.CompressAwarePath
                                                                          >();
            IEnumerator <MergeManagerImpl.CompressAwarePath> iterator = mergeManager.onDiskMapOutputs
                                                                        .GetEnumerator();
            IList <string> keys   = new AList <string>();
            IList <string> values = new AList <string>();

            while (iterator.HasNext())
            {
                MergeManagerImpl.CompressAwarePath next = iterator.Next();
                ReadOnDiskMapOutput(conf, fs, next, keys, values);
                paths.AddItem(next);
            }
            NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "banana", "carrot",
                                                                "apple", "banana", "carrot"));
            NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "bla", "amazing"
                                                                  , "disgusting", "pretty good", "delicious"));
            mergeManager.Close();
            mergeManager = new MergeManagerImpl <Text, Text>(reduceId2, jobConf, fs, lda, Reporter
                                                             .Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles
                                                                 ());
            MergeThread <MergeManagerImpl.CompressAwarePath, Text, Text> onDiskMerger = mergeManager
                                                                                        .CreateOnDiskMerger();

            onDiskMerger.Merge(paths);
            NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count);
            keys   = new AList <string>();
            values = new AList <string>();
            ReadOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.GetEnumerator().Next(
                                    ), keys, values);
            NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "apple", "banana", "banana"
                                                                , "carrot", "carrot"));
            NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "disgusting", "pretty good"
                                                                  , "bla", "amazing", "delicious"));
            mergeManager.Close();
            NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMapOutputs.Count);
            NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMergedMapOutputs.Count);
            NUnit.Framework.Assert.AreEqual(0, mergeManager.onDiskMapOutputs.Count);
        }
Пример #14
0
 public FakeFetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V
                                                                               > scheduler, MergeManagerImpl <K, V> merger, Reporter reporter, ShuffleClientMetrics
                    metrics, ExceptionReporter exceptionReporter, SecretKey jobTokenSecret, HttpURLConnection
                    connection, int id)
     : base(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter, jobTokenSecret
            , id)
 {
     this.connection = connection;
 }
Пример #15
0
 public TestMergeThread(MergeManagerImpl <Text, Text> mergeManager, ExceptionReporter
                        reporter)
     : base(mergeManager, int.MaxValue, reporter)
 {
     numMerges = new AtomicInteger(0);
 }