Ejemplo n.º 1
0
        /// <exception cref="System.IO.IOException"/>
        private long CreateInMemorySegments(IList <InMemoryMapOutput <K, V> > inMemoryMapOutputs
                                            , IList <Merger.Segment <K, V> > inMemorySegments, long leaveBytes)
        {
            long totalSize = 0L;
            // We could use fullSize could come from the RamManager, but files can be
            // closed but not yet present in inMemoryMapOutputs
            long fullSize = 0L;

            foreach (InMemoryMapOutput <K, V> mo in inMemoryMapOutputs)
            {
                fullSize += mo.GetMemory().Length;
            }
            while (fullSize > leaveBytes)
            {
                InMemoryMapOutput <K, V> mo_1 = inMemoryMapOutputs.Remove(0);
                byte[] data = mo_1.GetMemory();
                long   size = data.Length;
                totalSize += size;
                fullSize  -= size;
                IFile.Reader <K, V> reader = new InMemoryReader <K, V>(this, mo_1.GetMapId(), data,
                                                                       0, (int)size, jobConf);
                inMemorySegments.AddItem(new Merger.Segment <K, V>(reader, true, (mo_1.IsPrimaryMapOutput
                                                                                      () ? mergedMapOutputsCounter : null)));
            }
            return(totalSize);
        }
Ejemplo n.º 2
0
        public virtual void TestInMemoryAndOnDiskMerger()
        {
            JobID         jobId     = new JobID("a", 0);
            TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 0)
                                                        , 0);
            TaskAttemptID                 mapId1       = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 1), 0);
            TaskAttemptID                 mapId2       = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 2), 0);
            LocalDirAllocator             lda          = new LocalDirAllocator(MRConfig.LocalDir);
            MergeManagerImpl <Text, Text> mergeManager = new MergeManagerImpl <Text, Text>(reduceId1
                                                                                           , jobConf, fs, lda, Reporter.Null, null, null, null, null, null, null, null, new
                                                                                           Progress(), new MROutputFiles());
            // write map outputs
            IDictionary <string, string> map1 = new SortedDictionary <string, string>();

            map1["apple"]  = "disgusting";
            map1["carrot"] = "delicious";
            IDictionary <string, string> map2 = new SortedDictionary <string, string>();

            map1["banana"] = "pretty good";
            byte[] mapOutputBytes1 = WriteMapOutput(conf, map1);
            byte[] mapOutputBytes2 = WriteMapOutput(conf, map2);
            InMemoryMapOutput <Text, Text> mapOutput1 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId1, mergeManager, mapOutputBytes1.Length, null, true);
            InMemoryMapOutput <Text, Text> mapOutput2 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId2, mergeManager, mapOutputBytes2.Length, null, true);

            System.Array.Copy(mapOutputBytes1, 0, mapOutput1.GetMemory(), 0, mapOutputBytes1.
                              Length);
            System.Array.Copy(mapOutputBytes2, 0, mapOutput2.GetMemory(), 0, mapOutputBytes2.
                              Length);
            // create merger and run merge
            MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger = mergeManager
                                                                                      .CreateInMemoryMerger();
            IList <InMemoryMapOutput <Text, Text> > mapOutputs1 = new AList <InMemoryMapOutput <Text
                                                                                                , Text> >();

            mapOutputs1.AddItem(mapOutput1);
            mapOutputs1.AddItem(mapOutput2);
            inMemoryMerger.Merge(mapOutputs1);
            NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count);
            TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 3)
                                                        , 0);
            TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 4), 0);
            TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 5), 0);
            // write map outputs
            IDictionary <string, string> map3 = new SortedDictionary <string, string>();

            map3["apple"]  = "awesome";
            map3["carrot"] = "amazing";
            IDictionary <string, string> map4 = new SortedDictionary <string, string>();

            map4["banana"] = "bla";
            byte[] mapOutputBytes3 = WriteMapOutput(conf, map3);
            byte[] mapOutputBytes4 = WriteMapOutput(conf, map4);
            InMemoryMapOutput <Text, Text> mapOutput3 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId3, mergeManager, mapOutputBytes3.Length, null, true);
            InMemoryMapOutput <Text, Text> mapOutput4 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId4, mergeManager, mapOutputBytes4.Length, null, true);

            System.Array.Copy(mapOutputBytes3, 0, mapOutput3.GetMemory(), 0, mapOutputBytes3.
                              Length);
            System.Array.Copy(mapOutputBytes4, 0, mapOutput4.GetMemory(), 0, mapOutputBytes4.
                              Length);
            //    // create merger and run merge
            MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger2 = mergeManager
                                                                                       .CreateInMemoryMerger();
            IList <InMemoryMapOutput <Text, Text> > mapOutputs2 = new AList <InMemoryMapOutput <Text
                                                                                                , Text> >();

            mapOutputs2.AddItem(mapOutput3);
            mapOutputs2.AddItem(mapOutput4);
            inMemoryMerger2.Merge(mapOutputs2);
            NUnit.Framework.Assert.AreEqual(2, mergeManager.onDiskMapOutputs.Count);
            IList <MergeManagerImpl.CompressAwarePath> paths = new AList <MergeManagerImpl.CompressAwarePath
                                                                          >();
            IEnumerator <MergeManagerImpl.CompressAwarePath> iterator = mergeManager.onDiskMapOutputs
                                                                        .GetEnumerator();
            IList <string> keys   = new AList <string>();
            IList <string> values = new AList <string>();

            while (iterator.HasNext())
            {
                MergeManagerImpl.CompressAwarePath next = iterator.Next();
                ReadOnDiskMapOutput(conf, fs, next, keys, values);
                paths.AddItem(next);
            }
            NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "banana", "carrot",
                                                                "apple", "banana", "carrot"));
            NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "bla", "amazing"
                                                                  , "disgusting", "pretty good", "delicious"));
            mergeManager.Close();
            mergeManager = new MergeManagerImpl <Text, Text>(reduceId2, jobConf, fs, lda, Reporter
                                                             .Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles
                                                                 ());
            MergeThread <MergeManagerImpl.CompressAwarePath, Text, Text> onDiskMerger = mergeManager
                                                                                        .CreateOnDiskMerger();

            onDiskMerger.Merge(paths);
            NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count);
            keys   = new AList <string>();
            values = new AList <string>();
            ReadOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.GetEnumerator().Next(
                                    ), keys, values);
            NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "apple", "banana", "banana"
                                                                , "carrot", "carrot"));
            NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "disgusting", "pretty good"
                                                                  , "bla", "amazing", "delicious"));
            mergeManager.Close();
            NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMapOutputs.Count);
            NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMergedMapOutputs.Count);
            NUnit.Framework.Assert.AreEqual(0, mergeManager.onDiskMapOutputs.Count);
        }