/// <exception cref="System.IO.IOException"/> private RawKeyValueIterator FinalMerge(JobConf job, FileSystem fs, IList <InMemoryMapOutput <K, V> > inMemoryMapOutputs, IList <MergeManagerImpl.CompressAwarePath> onDiskMapOutputs ) { Log.Info("finalMerge called with " + inMemoryMapOutputs.Count + " in-memory map-outputs and " + onDiskMapOutputs.Count + " on-disk map-outputs"); long maxInMemReduce = GetMaxInMemReduceLimit(); // merge config params Type keyClass = (Type)job.GetMapOutputKeyClass(); Type valueClass = (Type)job.GetMapOutputValueClass(); bool keepInputs = job.GetKeepFailedTaskFiles(); Path tmpDir = new Path(reduceId.ToString()); RawComparator <K> comparator = (RawComparator <K>)job.GetOutputKeyComparator(); // segments required to vacate memory IList <Merger.Segment <K, V> > memDiskSegments = new AList <Merger.Segment <K, V> >(); long inMemToDiskBytes = 0; bool mergePhaseFinished = false; if (inMemoryMapOutputs.Count > 0) { TaskID mapId = inMemoryMapOutputs[0].GetMapId().GetTaskID(); inMemToDiskBytes = CreateInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce ); int numMemDiskSegments = memDiskSegments.Count; if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.Count) { // If we reach here, it implies that we have less than io.sort.factor // disk segments and this will be incremented by 1 (result of the // memory segments merge). Since this total would still be // <= io.sort.factor, we will not do any more intermediate merges, // the merge of all these disk segments would be directly fed to the // reduce method mergePhaseFinished = true; // must spill to disk, but can't retain in-mem for intermediate merge Path outputPath = mapOutputFile.GetInputFileForWrite(mapId, inMemToDiskBytes).Suffix (Org.Apache.Hadoop.Mapred.Task.MergedOutputPrefix); RawKeyValueIterator rIter = Merger.Merge(job, fs, keyClass, valueClass, memDiskSegments , numMemDiskSegments, tmpDir, comparator, reporter, spilledRecordsCounter, null, mergePhase); FSDataOutputStream @out = CryptoUtils.WrapIfNecessary(job, fs.Create(outputPath)); IFile.Writer <K, V> writer = new IFile.Writer <K, V>(job, @out, keyClass, valueClass , codec, null, true); try { Merger.WriteFile(rIter, writer, reporter, job); writer.Close(); onDiskMapOutputs.AddItem(new MergeManagerImpl.CompressAwarePath(outputPath, writer .GetRawLength(), writer.GetCompressedLength())); writer = null; } catch (IOException e) { // add to list of final disk outputs. if (null != outputPath) { try { fs.Delete(outputPath, true); } catch (IOException) { } } // NOTHING throw; } finally { if (null != writer) { writer.Close(); } } Log.Info("Merged " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes to disk to satisfy " + "reduce memory limit"); inMemToDiskBytes = 0; memDiskSegments.Clear(); } else { if (inMemToDiskBytes != 0) { Log.Info("Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes in memory for " + "intermediate, on-disk merge"); } } } // segments on disk IList <Merger.Segment <K, V> > diskSegments = new AList <Merger.Segment <K, V> >(); long onDiskBytes = inMemToDiskBytes; long rawBytes = inMemToDiskBytes; MergeManagerImpl.CompressAwarePath[] onDisk = Sharpen.Collections.ToArray(onDiskMapOutputs , new MergeManagerImpl.CompressAwarePath[onDiskMapOutputs.Count]); foreach (MergeManagerImpl.CompressAwarePath file in onDisk) { long fileLength = fs.GetFileStatus(file).GetLen(); onDiskBytes += fileLength; rawBytes += (file.GetRawDataLength() > 0) ? file.GetRawDataLength() : fileLength; Log.Debug("Disk file: " + file + " Length is " + fileLength); diskSegments.AddItem(new Merger.Segment <K, V>(job, fs, file, codec, keepInputs, ( file.ToString().EndsWith(Org.Apache.Hadoop.Mapred.Task.MergedOutputPrefix) ? null : mergedMapOutputsCounter), file.GetRawDataLength())); } Log.Info("Merging " + onDisk.Length + " files, " + onDiskBytes + " bytes from disk" ); diskSegments.Sort(new _IComparer_786()); // build final list of segments from merged backed by disk + in-mem IList <Merger.Segment <K, V> > finalSegments = new AList <Merger.Segment <K, V> >(); long inMemBytes = CreateInMemorySegments(inMemoryMapOutputs, finalSegments, 0); Log.Info("Merging " + finalSegments.Count + " segments, " + inMemBytes + " bytes from memory into reduce" ); if (0 != onDiskBytes) { int numInMemSegments = memDiskSegments.Count; diskSegments.AddRange(0, memDiskSegments); memDiskSegments.Clear(); // Pass mergePhase only if there is a going to be intermediate // merges. See comment where mergePhaseFinished is being set Progress thisPhase = (mergePhaseFinished) ? null : mergePhase; RawKeyValueIterator diskMerge = Merger.Merge(job, fs, keyClass, valueClass, codec , diskSegments, ioSortFactor, numInMemSegments, tmpDir, comparator, reporter, false , spilledRecordsCounter, null, thisPhase); diskSegments.Clear(); if (0 == finalSegments.Count) { return(diskMerge); } finalSegments.AddItem(new Merger.Segment <K, V>(new MergeManagerImpl.RawKVIteratorReader (this, diskMerge, onDiskBytes), true, rawBytes)); } return(Merger.Merge(job, fs, keyClass, valueClass, finalSegments, finalSegments.Count , tmpDir, comparator, reporter, spilledRecordsCounter, null, null)); }
/// <summary>test getters and setters of JobConf</summary> public virtual void TestJobConf() { JobConf conf = new JobConf(); // test default value Sharpen.Pattern pattern = conf.GetJarUnpackPattern(); NUnit.Framework.Assert.AreEqual(Sharpen.Pattern.Compile("(?:classes/|lib/).*").ToString (), pattern.ToString()); // default value NUnit.Framework.Assert.IsFalse(conf.GetKeepFailedTaskFiles()); conf.SetKeepFailedTaskFiles(true); NUnit.Framework.Assert.IsTrue(conf.GetKeepFailedTaskFiles()); // default value NUnit.Framework.Assert.IsNull(conf.GetKeepTaskFilesPattern()); conf.SetKeepTaskFilesPattern("123454"); NUnit.Framework.Assert.AreEqual("123454", conf.GetKeepTaskFilesPattern()); // default value NUnit.Framework.Assert.IsNotNull(conf.GetWorkingDirectory()); conf.SetWorkingDirectory(new Path("test")); NUnit.Framework.Assert.IsTrue(conf.GetWorkingDirectory().ToString().EndsWith("test" )); // default value NUnit.Framework.Assert.AreEqual(1, conf.GetNumTasksToExecutePerJvm()); // default value NUnit.Framework.Assert.IsNull(conf.GetKeyFieldComparatorOption()); conf.SetKeyFieldComparatorOptions("keySpec"); NUnit.Framework.Assert.AreEqual("keySpec", conf.GetKeyFieldComparatorOption()); // default value NUnit.Framework.Assert.IsFalse(conf.GetUseNewReducer()); conf.SetUseNewReducer(true); NUnit.Framework.Assert.IsTrue(conf.GetUseNewReducer()); // default NUnit.Framework.Assert.IsTrue(conf.GetMapSpeculativeExecution()); NUnit.Framework.Assert.IsTrue(conf.GetReduceSpeculativeExecution()); NUnit.Framework.Assert.IsTrue(conf.GetSpeculativeExecution()); conf.SetReduceSpeculativeExecution(false); NUnit.Framework.Assert.IsTrue(conf.GetSpeculativeExecution()); conf.SetMapSpeculativeExecution(false); NUnit.Framework.Assert.IsFalse(conf.GetSpeculativeExecution()); NUnit.Framework.Assert.IsFalse(conf.GetMapSpeculativeExecution()); NUnit.Framework.Assert.IsFalse(conf.GetReduceSpeculativeExecution()); conf.SetSessionId("ses"); NUnit.Framework.Assert.AreEqual("ses", conf.GetSessionId()); NUnit.Framework.Assert.AreEqual(3, conf.GetMaxTaskFailuresPerTracker()); conf.SetMaxTaskFailuresPerTracker(2); NUnit.Framework.Assert.AreEqual(2, conf.GetMaxTaskFailuresPerTracker()); NUnit.Framework.Assert.AreEqual(0, conf.GetMaxMapTaskFailuresPercent()); conf.SetMaxMapTaskFailuresPercent(50); NUnit.Framework.Assert.AreEqual(50, conf.GetMaxMapTaskFailuresPercent()); NUnit.Framework.Assert.AreEqual(0, conf.GetMaxReduceTaskFailuresPercent()); conf.SetMaxReduceTaskFailuresPercent(70); NUnit.Framework.Assert.AreEqual(70, conf.GetMaxReduceTaskFailuresPercent()); // by default NUnit.Framework.Assert.AreEqual(JobPriority.Normal.ToString(), conf.GetJobPriority ().ToString()); conf.SetJobPriority(JobPriority.High); NUnit.Framework.Assert.AreEqual(JobPriority.High.ToString(), conf.GetJobPriority( ).ToString()); NUnit.Framework.Assert.IsNull(conf.GetJobSubmitHostName()); conf.SetJobSubmitHostName("hostname"); NUnit.Framework.Assert.AreEqual("hostname", conf.GetJobSubmitHostName()); // default NUnit.Framework.Assert.IsNull(conf.GetJobSubmitHostAddress()); conf.SetJobSubmitHostAddress("ww"); NUnit.Framework.Assert.AreEqual("ww", conf.GetJobSubmitHostAddress()); // default value NUnit.Framework.Assert.IsFalse(conf.GetProfileEnabled()); conf.SetProfileEnabled(true); NUnit.Framework.Assert.IsTrue(conf.GetProfileEnabled()); // default value NUnit.Framework.Assert.AreEqual(conf.GetProfileTaskRange(true).ToString(), "0-2"); NUnit.Framework.Assert.AreEqual(conf.GetProfileTaskRange(false).ToString(), "0-2" ); conf.SetProfileTaskRange(true, "0-3"); NUnit.Framework.Assert.AreEqual(conf.GetProfileTaskRange(false).ToString(), "0-2" ); NUnit.Framework.Assert.AreEqual(conf.GetProfileTaskRange(true).ToString(), "0-3"); // default value NUnit.Framework.Assert.IsNull(conf.GetMapDebugScript()); conf.SetMapDebugScript("mDbgScript"); NUnit.Framework.Assert.AreEqual("mDbgScript", conf.GetMapDebugScript()); // default value NUnit.Framework.Assert.IsNull(conf.GetReduceDebugScript()); conf.SetReduceDebugScript("rDbgScript"); NUnit.Framework.Assert.AreEqual("rDbgScript", conf.GetReduceDebugScript()); // default value NUnit.Framework.Assert.IsNull(conf.GetJobLocalDir()); NUnit.Framework.Assert.AreEqual("default", conf.GetQueueName()); conf.SetQueueName("qname"); NUnit.Framework.Assert.AreEqual("qname", conf.GetQueueName()); conf.SetMemoryForMapTask(100 * 1000); NUnit.Framework.Assert.AreEqual(100 * 1000, conf.GetMemoryForMapTask()); conf.SetMemoryForReduceTask(1000 * 1000); NUnit.Framework.Assert.AreEqual(1000 * 1000, conf.GetMemoryForReduceTask()); NUnit.Framework.Assert.AreEqual(-1, conf.GetMaxPhysicalMemoryForTask()); NUnit.Framework.Assert.AreEqual("The variable key is no longer used.", JobConf.DeprecatedString ("key")); // make sure mapreduce.map|reduce.java.opts are not set by default // so that they won't override mapred.child.java.opts NUnit.Framework.Assert.AreEqual("mapreduce.map.java.opts should not be set by default" , null, conf.Get(JobConf.MapredMapTaskJavaOpts)); NUnit.Framework.Assert.AreEqual("mapreduce.reduce.java.opts should not be set by default" , null, conf.Get(JobConf.MapredReduceTaskJavaOpts)); }