/// <exception cref="System.IO.IOException"/> public RawKVIteratorReader(MergeManagerImpl <K, V> _enclosing, RawKeyValueIterator kvIter, long size) : base(null, null, size, null, this._enclosing.spilledRecordsCounter) { this._enclosing = _enclosing; this.kvIter = kvIter; }
public virtual void Setup() { // mocked generics Log.Info(">>>> " + name.GetMethodName()); job = new JobConf(); job.SetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, false); jobWithRetry = new JobConf(); jobWithRetry.SetBoolean(MRJobConfig.ShuffleFetchRetryEnabled, true); id = TaskAttemptID.ForName("attempt_0_1_r_1_1"); ss = Org.Mockito.Mockito.Mock <ShuffleSchedulerImpl>(); mm = Org.Mockito.Mockito.Mock <MergeManagerImpl>(); r = Org.Mockito.Mockito.Mock <Reporter>(); metrics = Org.Mockito.Mockito.Mock <ShuffleClientMetrics>(); except = Org.Mockito.Mockito.Mock <ExceptionReporter>(); key = JobTokenSecretManager.CreateSecretKey(new byte[] { 0, 0, 0, 0 }); connection = Org.Mockito.Mockito.Mock <HttpURLConnection>(); allErrs = Org.Mockito.Mockito.Mock <Counters.Counter>(); Org.Mockito.Mockito.When(r.GetCounter(Matchers.AnyString(), Matchers.AnyString()) ).ThenReturn(allErrs); AList <TaskAttemptID> maps = new AList <TaskAttemptID>(1); maps.AddItem(map1ID); maps.AddItem(map2ID); Org.Mockito.Mockito.When(ss.GetMapsForHost(host)).ThenReturn(maps); }
public virtual void TestLargeMemoryLimits() { JobConf conf = new JobConf(); // Xmx in production conf.SetLong(MRJobConfig.ReduceMemoryTotalBytes, 8L * 1024 * 1024 * 1024); // M1 = Xmx fraction for map outputs conf.SetFloat(MRJobConfig.ShuffleInputBufferPercent, 1.0f); // M2 = max M1 fraction for a single maple output conf.SetFloat(MRJobConfig.ShuffleMemoryLimitPercent, 0.95f); // M3 = M1 fraction at which in memory merge is triggered conf.SetFloat(MRJobConfig.ShuffleMergePercent, 1.0f); // M4 = M1 fraction of map outputs remaining in memory for a reduce conf.SetFloat(MRJobConfig.ReduceInputBufferPercent, 1.0f); MergeManagerImpl <Text, Text> mgr = new MergeManagerImpl <Text, Text>(null, conf, Org.Mockito.Mockito.Mock <LocalFileSystem>(), null, null, null, null, null, null, null, null, null, null, new MROutputFiles()); NUnit.Framework.Assert.IsTrue("Large shuffle area unusable: " + mgr.memoryLimit, mgr.memoryLimit > int.MaxValue); long maxInMemReduce = mgr.GetMaxInMemReduceLimit(); NUnit.Framework.Assert.IsTrue("Large in-memory reduce area unusable: " + maxInMemReduce , maxInMemReduce > int.MaxValue); }
public MergeThread(MergeManagerImpl <K, V> manager, int mergeFactor, ExceptionReporter reporter) { this.pendingToBeMerged = new List <IList <T> >(); this.manager = manager; this.mergeFactor = mergeFactor; this.reporter = reporter; }
public OnDiskMerger(MergeManagerImpl <K, V> _enclosing, MergeManagerImpl <K, V> manager ) : base(manager, this._enclosing.ioSortFactor, this._enclosing.exceptionReporter) { this._enclosing = _enclosing; this.SetName("OnDiskMerger - Thread to merge on-disk map-outputs"); this.SetDaemon(true); }
public InMemoryMerger(MergeManagerImpl <K, V> _enclosing, MergeManagerImpl <K, V> manager ) : base(manager, int.MaxValue, this._enclosing.exceptionReporter) { this._enclosing = _enclosing; this.SetName("InMemoryMerger - Thread to merge in-memory shuffled map-outputs"); this.SetDaemon(true); }
/// <exception cref="System.IO.IOException"/> public OnDiskMapOutput(TaskAttemptID mapId, TaskAttemptID reduceId, MergeManagerImpl <K, V> merger, long size, JobConf conf, MapOutputFile mapOutputFile, int fetcher , bool primaryMapOutput) : this(mapId, reduceId, merger, size, conf, mapOutputFile, fetcher, primaryMapOutput , FileSystem.GetLocal(conf).GetRaw(), mapOutputFile.GetInputFileForWrite(mapId.GetTaskID (), size)) { }
public IntermediateMemoryToMemoryMerger(MergeManagerImpl <K, V> _enclosing, MergeManagerImpl <K, V> manager, int mergeFactor) : base(manager, mergeFactor, this._enclosing.exceptionReporter) { this._enclosing = _enclosing; this.SetName("InMemoryMerger - Thread to do in-memory merge of in-memory " + "shuffled map-outputs" ); this.SetDaemon(true); }
internal OnDiskMapOutput(TaskAttemptID mapId, TaskAttemptID reduceId, MergeManagerImpl <K, V> merger, long size, JobConf conf, MapOutputFile mapOutputFile, int fetcher , bool primaryMapOutput, FileSystem fs, Path outputPath) : base(mapId, size, primaryMapOutput) { this.fs = fs; this.merger = merger; this.outputPath = outputPath; tmpOutputPath = GetTempPath(outputPath, fetcher); disk = CryptoUtils.WrapIfNecessary(conf, fs.Create(tmpOutputPath)); this.conf = conf; }
/// <exception cref="System.IO.IOException"/> public InMemoryReader(MergeManagerImpl <K, V> merger, TaskAttemptID taskAttemptId, byte[] data, int start, int length, Configuration conf) : base(conf, null, length - start, null, null) { this.merger = merger; this.taskAttemptId = taskAttemptId; buffer = data; bufferSize = (int)fileLength; memDataIn.Reset(buffer, start, length - start); this.start = start; this.length = length; }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.URISyntaxException"/> /// <exception cref="System.Exception"/> public virtual void TestOnDiskMerger() { JobConf jobConf = new JobConf(); int SortFactor = 5; jobConf.SetInt(MRJobConfig.IoSortFactor, SortFactor); MapOutputFile mapOutputFile = new MROutputFiles(); FileSystem fs = FileSystem.GetLocal(jobConf); MergeManagerImpl <IntWritable, IntWritable> manager = new MergeManagerImpl <IntWritable , IntWritable>(null, jobConf, fs, null, null, null, null, null, null, null, null , null, null, mapOutputFile); MergeThread <MapOutput <IntWritable, IntWritable>, IntWritable, IntWritable> onDiskMerger = (MergeThread <MapOutput <IntWritable, IntWritable>, IntWritable, IntWritable>)Whitebox .GetInternalState(manager, "onDiskMerger"); int mergeFactor = (int)Whitebox.GetInternalState(onDiskMerger, "mergeFactor"); // make sure the io.sort.factor is set properly NUnit.Framework.Assert.AreEqual(mergeFactor, SortFactor); // Stop the onDiskMerger thread so that we can intercept the list of files // waiting to be merged. onDiskMerger.Suspend(); //Send the list of fake files waiting to be merged Random rand = new Random(); for (int i = 0; i < 2 * SortFactor; ++i) { Path path = new Path("somePath"); MergeManagerImpl.CompressAwarePath cap = new MergeManagerImpl.CompressAwarePath(path , 1l, rand.Next()); manager.CloseOnDiskFile(cap); } //Check that the files pending to be merged are in sorted order. List <IList <MergeManagerImpl.CompressAwarePath> > pendingToBeMerged = (List <IList <MergeManagerImpl.CompressAwarePath > >)Whitebox.GetInternalState(onDiskMerger, "pendingToBeMerged"); NUnit.Framework.Assert.IsTrue("No inputs were added to list pending to merge", pendingToBeMerged .Count > 0); for (int i_1 = 0; i_1 < pendingToBeMerged.Count; ++i_1) { IList <MergeManagerImpl.CompressAwarePath> inputs = pendingToBeMerged[i_1]; for (int j = 1; j < inputs.Count; ++j) { NUnit.Framework.Assert.IsTrue("Not enough / too many inputs were going to be merged" , inputs.Count > 0 && inputs.Count <= SortFactor); NUnit.Framework.Assert.IsTrue("Inputs to be merged were not sorted according to size: " , inputs[j].GetCompressedSize() >= inputs[j - 1].GetCompressedSize()); } } }
public InMemoryMapOutput(Configuration conf, TaskAttemptID mapId, MergeManagerImpl <K, V> merger, int size, CompressionCodec codec, bool primaryMapOutput) : base(mapId, (long)size, primaryMapOutput) { // Decompression of map-outputs this.conf = conf; this.merger = merger; this.codec = codec; byteStream = new BoundedByteArrayOutputStream(size); memory = byteStream.GetBuffer(); if (codec != null) { decompressor = CodecPool.GetDecompressor(codec); } else { decompressor = null; } }
public virtual void TestInMemoryAndOnDiskMerger() { JobID jobId = new JobID("a", 0); TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 0) , 0); TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 1), 0); TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 2), 0); LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LocalDir); MergeManagerImpl <Text, Text> mergeManager = new MergeManagerImpl <Text, Text>(reduceId1 , jobConf, fs, lda, Reporter.Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles()); // write map outputs IDictionary <string, string> map1 = new SortedDictionary <string, string>(); map1["apple"] = "disgusting"; map1["carrot"] = "delicious"; IDictionary <string, string> map2 = new SortedDictionary <string, string>(); map1["banana"] = "pretty good"; byte[] mapOutputBytes1 = WriteMapOutput(conf, map1); byte[] mapOutputBytes2 = WriteMapOutput(conf, map2); InMemoryMapOutput <Text, Text> mapOutput1 = new InMemoryMapOutput <Text, Text>(conf , mapId1, mergeManager, mapOutputBytes1.Length, null, true); InMemoryMapOutput <Text, Text> mapOutput2 = new InMemoryMapOutput <Text, Text>(conf , mapId2, mergeManager, mapOutputBytes2.Length, null, true); System.Array.Copy(mapOutputBytes1, 0, mapOutput1.GetMemory(), 0, mapOutputBytes1. Length); System.Array.Copy(mapOutputBytes2, 0, mapOutput2.GetMemory(), 0, mapOutputBytes2. Length); // create merger and run merge MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger = mergeManager .CreateInMemoryMerger(); IList <InMemoryMapOutput <Text, Text> > mapOutputs1 = new AList <InMemoryMapOutput <Text , Text> >(); mapOutputs1.AddItem(mapOutput1); mapOutputs1.AddItem(mapOutput2); inMemoryMerger.Merge(mapOutputs1); NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count); TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 3) , 0); TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 4), 0); TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 5), 0); // write map outputs IDictionary <string, string> map3 = new SortedDictionary <string, string>(); map3["apple"] = "awesome"; map3["carrot"] = "amazing"; IDictionary <string, string> map4 = new SortedDictionary <string, string>(); map4["banana"] = "bla"; byte[] mapOutputBytes3 = WriteMapOutput(conf, map3); byte[] mapOutputBytes4 = WriteMapOutput(conf, map4); InMemoryMapOutput <Text, Text> mapOutput3 = new InMemoryMapOutput <Text, Text>(conf , mapId3, mergeManager, mapOutputBytes3.Length, null, true); InMemoryMapOutput <Text, Text> mapOutput4 = new InMemoryMapOutput <Text, Text>(conf , mapId4, mergeManager, mapOutputBytes4.Length, null, true); System.Array.Copy(mapOutputBytes3, 0, mapOutput3.GetMemory(), 0, mapOutputBytes3. Length); System.Array.Copy(mapOutputBytes4, 0, mapOutput4.GetMemory(), 0, mapOutputBytes4. Length); // // create merger and run merge MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger2 = mergeManager .CreateInMemoryMerger(); IList <InMemoryMapOutput <Text, Text> > mapOutputs2 = new AList <InMemoryMapOutput <Text , Text> >(); mapOutputs2.AddItem(mapOutput3); mapOutputs2.AddItem(mapOutput4); inMemoryMerger2.Merge(mapOutputs2); NUnit.Framework.Assert.AreEqual(2, mergeManager.onDiskMapOutputs.Count); IList <MergeManagerImpl.CompressAwarePath> paths = new AList <MergeManagerImpl.CompressAwarePath >(); IEnumerator <MergeManagerImpl.CompressAwarePath> iterator = mergeManager.onDiskMapOutputs .GetEnumerator(); IList <string> keys = new AList <string>(); IList <string> values = new AList <string>(); while (iterator.HasNext()) { MergeManagerImpl.CompressAwarePath next = iterator.Next(); ReadOnDiskMapOutput(conf, fs, next, keys, values); paths.AddItem(next); } NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "banana", "carrot", "apple", "banana", "carrot")); NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "bla", "amazing" , "disgusting", "pretty good", "delicious")); mergeManager.Close(); mergeManager = new MergeManagerImpl <Text, Text>(reduceId2, jobConf, fs, lda, Reporter .Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles ()); MergeThread <MergeManagerImpl.CompressAwarePath, Text, Text> onDiskMerger = mergeManager .CreateOnDiskMerger(); onDiskMerger.Merge(paths); NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count); keys = new AList <string>(); values = new AList <string>(); ReadOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.GetEnumerator().Next( ), keys, values); NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "apple", "banana", "banana" , "carrot", "carrot")); NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "disgusting", "pretty good" , "bla", "amazing", "delicious")); mergeManager.Close(); NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMapOutputs.Count); NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMergedMapOutputs.Count); NUnit.Framework.Assert.AreEqual(0, mergeManager.onDiskMapOutputs.Count); }
public FakeFetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl <K, V > scheduler, MergeManagerImpl <K, V> merger, Reporter reporter, ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter, SecretKey jobTokenSecret, HttpURLConnection connection, int id) : base(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter, jobTokenSecret , id) { this.connection = connection; }
public TestMergeThread(MergeManagerImpl <Text, Text> mergeManager, ExceptionReporter reporter) : base(mergeManager, int.MaxValue, reporter) { numMerges = new AtomicInteger(0); }