/// <exception cref="System.IO.IOException"/> private long CreateInMemorySegments(IList <InMemoryMapOutput <K, V> > inMemoryMapOutputs , IList <Merger.Segment <K, V> > inMemorySegments, long leaveBytes) { long totalSize = 0L; // We could use fullSize could come from the RamManager, but files can be // closed but not yet present in inMemoryMapOutputs long fullSize = 0L; foreach (InMemoryMapOutput <K, V> mo in inMemoryMapOutputs) { fullSize += mo.GetMemory().Length; } while (fullSize > leaveBytes) { InMemoryMapOutput <K, V> mo_1 = inMemoryMapOutputs.Remove(0); byte[] data = mo_1.GetMemory(); long size = data.Length; totalSize += size; fullSize -= size; IFile.Reader <K, V> reader = new InMemoryReader <K, V>(this, mo_1.GetMapId(), data, 0, (int)size, jobConf); inMemorySegments.AddItem(new Merger.Segment <K, V>(reader, true, (mo_1.IsPrimaryMapOutput () ? mergedMapOutputsCounter : null))); } return(totalSize); }
public virtual void TestInMemoryAndOnDiskMerger() { JobID jobId = new JobID("a", 0); TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 0) , 0); TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 1), 0); TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 2), 0); LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LocalDir); MergeManagerImpl <Text, Text> mergeManager = new MergeManagerImpl <Text, Text>(reduceId1 , jobConf, fs, lda, Reporter.Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles()); // write map outputs IDictionary <string, string> map1 = new SortedDictionary <string, string>(); map1["apple"] = "disgusting"; map1["carrot"] = "delicious"; IDictionary <string, string> map2 = new SortedDictionary <string, string>(); map1["banana"] = "pretty good"; byte[] mapOutputBytes1 = WriteMapOutput(conf, map1); byte[] mapOutputBytes2 = WriteMapOutput(conf, map2); InMemoryMapOutput <Text, Text> mapOutput1 = new InMemoryMapOutput <Text, Text>(conf , mapId1, mergeManager, mapOutputBytes1.Length, null, true); InMemoryMapOutput <Text, Text> mapOutput2 = new InMemoryMapOutput <Text, Text>(conf , mapId2, mergeManager, mapOutputBytes2.Length, null, true); System.Array.Copy(mapOutputBytes1, 0, mapOutput1.GetMemory(), 0, mapOutputBytes1. Length); System.Array.Copy(mapOutputBytes2, 0, mapOutput2.GetMemory(), 0, mapOutputBytes2. Length); // create merger and run merge MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger = mergeManager .CreateInMemoryMerger(); IList <InMemoryMapOutput <Text, Text> > mapOutputs1 = new AList <InMemoryMapOutput <Text , Text> >(); mapOutputs1.AddItem(mapOutput1); mapOutputs1.AddItem(mapOutput2); inMemoryMerger.Merge(mapOutputs1); NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count); TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 3) , 0); TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 4), 0); TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 5), 0); // write map outputs IDictionary <string, string> map3 = new SortedDictionary <string, string>(); map3["apple"] = "awesome"; map3["carrot"] = "amazing"; IDictionary <string, string> map4 = new SortedDictionary <string, string>(); map4["banana"] = "bla"; byte[] mapOutputBytes3 = WriteMapOutput(conf, map3); byte[] mapOutputBytes4 = WriteMapOutput(conf, map4); InMemoryMapOutput <Text, Text> mapOutput3 = new InMemoryMapOutput <Text, Text>(conf , mapId3, mergeManager, mapOutputBytes3.Length, null, true); InMemoryMapOutput <Text, Text> mapOutput4 = new InMemoryMapOutput <Text, Text>(conf , mapId4, mergeManager, mapOutputBytes4.Length, null, true); System.Array.Copy(mapOutputBytes3, 0, mapOutput3.GetMemory(), 0, mapOutputBytes3. Length); System.Array.Copy(mapOutputBytes4, 0, mapOutput4.GetMemory(), 0, mapOutputBytes4. Length); // // create merger and run merge MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger2 = mergeManager .CreateInMemoryMerger(); IList <InMemoryMapOutput <Text, Text> > mapOutputs2 = new AList <InMemoryMapOutput <Text , Text> >(); mapOutputs2.AddItem(mapOutput3); mapOutputs2.AddItem(mapOutput4); inMemoryMerger2.Merge(mapOutputs2); NUnit.Framework.Assert.AreEqual(2, mergeManager.onDiskMapOutputs.Count); IList <MergeManagerImpl.CompressAwarePath> paths = new AList <MergeManagerImpl.CompressAwarePath >(); IEnumerator <MergeManagerImpl.CompressAwarePath> iterator = mergeManager.onDiskMapOutputs .GetEnumerator(); IList <string> keys = new AList <string>(); IList <string> values = new AList <string>(); while (iterator.HasNext()) { MergeManagerImpl.CompressAwarePath next = iterator.Next(); ReadOnDiskMapOutput(conf, fs, next, keys, values); paths.AddItem(next); } NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "banana", "carrot", "apple", "banana", "carrot")); NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "bla", "amazing" , "disgusting", "pretty good", "delicious")); mergeManager.Close(); mergeManager = new MergeManagerImpl <Text, Text>(reduceId2, jobConf, fs, lda, Reporter .Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles ()); MergeThread <MergeManagerImpl.CompressAwarePath, Text, Text> onDiskMerger = mergeManager .CreateOnDiskMerger(); onDiskMerger.Merge(paths); NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count); keys = new AList <string>(); values = new AList <string>(); ReadOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.GetEnumerator().Next( ), keys, values); NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "apple", "banana", "banana" , "carrot", "carrot")); NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "disgusting", "pretty good" , "bla", "amazing", "delicious")); mergeManager.Close(); NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMapOutputs.Count); NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMergedMapOutputs.Count); NUnit.Framework.Assert.AreEqual(0, mergeManager.onDiskMapOutputs.Count); }