Пример #1
0
        /// <exception cref="System.IO.IOException"/>
        private RawKeyValueIterator FinalMerge(JobConf job, FileSystem fs, IList <InMemoryMapOutput
                                                                                  <K, V> > inMemoryMapOutputs, IList <MergeManagerImpl.CompressAwarePath> onDiskMapOutputs
                                               )
        {
            Log.Info("finalMerge called with " + inMemoryMapOutputs.Count + " in-memory map-outputs and "
                     + onDiskMapOutputs.Count + " on-disk map-outputs");
            long maxInMemReduce = GetMaxInMemReduceLimit();
            // merge config params
            Type keyClass   = (Type)job.GetMapOutputKeyClass();
            Type valueClass = (Type)job.GetMapOutputValueClass();
            bool keepInputs = job.GetKeepFailedTaskFiles();
            Path tmpDir     = new Path(reduceId.ToString());
            RawComparator <K> comparator = (RawComparator <K>)job.GetOutputKeyComparator();
            // segments required to vacate memory
            IList <Merger.Segment <K, V> > memDiskSegments = new AList <Merger.Segment <K, V> >();
            long inMemToDiskBytes   = 0;
            bool mergePhaseFinished = false;

            if (inMemoryMapOutputs.Count > 0)
            {
                TaskID mapId = inMemoryMapOutputs[0].GetMapId().GetTaskID();
                inMemToDiskBytes = CreateInMemorySegments(inMemoryMapOutputs, memDiskSegments, maxInMemReduce
                                                          );
                int numMemDiskSegments = memDiskSegments.Count;
                if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.Count)
                {
                    // If we reach here, it implies that we have less than io.sort.factor
                    // disk segments and this will be incremented by 1 (result of the
                    // memory segments merge). Since this total would still be
                    // <= io.sort.factor, we will not do any more intermediate merges,
                    // the merge of all these disk segments would be directly fed to the
                    // reduce method
                    mergePhaseFinished = true;
                    // must spill to disk, but can't retain in-mem for intermediate merge
                    Path outputPath = mapOutputFile.GetInputFileForWrite(mapId, inMemToDiskBytes).Suffix
                                          (Org.Apache.Hadoop.Mapred.Task.MergedOutputPrefix);
                    RawKeyValueIterator rIter = Merger.Merge(job, fs, keyClass, valueClass, memDiskSegments
                                                             , numMemDiskSegments, tmpDir, comparator, reporter, spilledRecordsCounter, null,
                                                             mergePhase);
                    FSDataOutputStream  @out   = CryptoUtils.WrapIfNecessary(job, fs.Create(outputPath));
                    IFile.Writer <K, V> writer = new IFile.Writer <K, V>(job, @out, keyClass, valueClass
                                                                         , codec, null, true);
                    try
                    {
                        Merger.WriteFile(rIter, writer, reporter, job);
                        writer.Close();
                        onDiskMapOutputs.AddItem(new MergeManagerImpl.CompressAwarePath(outputPath, writer
                                                                                        .GetRawLength(), writer.GetCompressedLength()));
                        writer = null;
                    }
                    catch (IOException e)
                    {
                        // add to list of final disk outputs.
                        if (null != outputPath)
                        {
                            try
                            {
                                fs.Delete(outputPath, true);
                            }
                            catch (IOException)
                            {
                            }
                        }
                        // NOTHING
                        throw;
                    }
                    finally
                    {
                        if (null != writer)
                        {
                            writer.Close();
                        }
                    }
                    Log.Info("Merged " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes to disk to satisfy "
                             + "reduce memory limit");
                    inMemToDiskBytes = 0;
                    memDiskSegments.Clear();
                }
                else
                {
                    if (inMemToDiskBytes != 0)
                    {
                        Log.Info("Keeping " + numMemDiskSegments + " segments, " + inMemToDiskBytes + " bytes in memory for "
                                 + "intermediate, on-disk merge");
                    }
                }
            }
            // segments on disk
            IList <Merger.Segment <K, V> > diskSegments = new AList <Merger.Segment <K, V> >();
            long onDiskBytes = inMemToDiskBytes;
            long rawBytes    = inMemToDiskBytes;

            MergeManagerImpl.CompressAwarePath[] onDisk = Sharpen.Collections.ToArray(onDiskMapOutputs
                                                                                      , new MergeManagerImpl.CompressAwarePath[onDiskMapOutputs.Count]);
            foreach (MergeManagerImpl.CompressAwarePath file in onDisk)
            {
                long fileLength = fs.GetFileStatus(file).GetLen();
                onDiskBytes += fileLength;
                rawBytes    += (file.GetRawDataLength() > 0) ? file.GetRawDataLength() : fileLength;
                Log.Debug("Disk file: " + file + " Length is " + fileLength);
                diskSegments.AddItem(new Merger.Segment <K, V>(job, fs, file, codec, keepInputs, (
                                                                   file.ToString().EndsWith(Org.Apache.Hadoop.Mapred.Task.MergedOutputPrefix) ? null
                                         : mergedMapOutputsCounter), file.GetRawDataLength()));
            }
            Log.Info("Merging " + onDisk.Length + " files, " + onDiskBytes + " bytes from disk"
                     );
            diskSegments.Sort(new _IComparer_786());
            // build final list of segments from merged backed by disk + in-mem
            IList <Merger.Segment <K, V> > finalSegments = new AList <Merger.Segment <K, V> >();
            long inMemBytes = CreateInMemorySegments(inMemoryMapOutputs, finalSegments, 0);

            Log.Info("Merging " + finalSegments.Count + " segments, " + inMemBytes + " bytes from memory into reduce"
                     );
            if (0 != onDiskBytes)
            {
                int numInMemSegments = memDiskSegments.Count;
                diskSegments.AddRange(0, memDiskSegments);
                memDiskSegments.Clear();
                // Pass mergePhase only if there is a going to be intermediate
                // merges. See comment where mergePhaseFinished is being set
                Progress            thisPhase = (mergePhaseFinished) ? null : mergePhase;
                RawKeyValueIterator diskMerge = Merger.Merge(job, fs, keyClass, valueClass, codec
                                                             , diskSegments, ioSortFactor, numInMemSegments, tmpDir, comparator, reporter, false
                                                             , spilledRecordsCounter, null, thisPhase);
                diskSegments.Clear();
                if (0 == finalSegments.Count)
                {
                    return(diskMerge);
                }
                finalSegments.AddItem(new Merger.Segment <K, V>(new MergeManagerImpl.RawKVIteratorReader
                                                                    (this, diskMerge, onDiskBytes), true, rawBytes));
            }
            return(Merger.Merge(job, fs, keyClass, valueClass, finalSegments, finalSegments.Count
                                , tmpDir, comparator, reporter, spilledRecordsCounter, null, null));
        }
Пример #2
0
        /// <summary>test getters and setters of JobConf</summary>
        public virtual void TestJobConf()
        {
            JobConf conf = new JobConf();

            // test default value
            Sharpen.Pattern pattern = conf.GetJarUnpackPattern();
            NUnit.Framework.Assert.AreEqual(Sharpen.Pattern.Compile("(?:classes/|lib/).*").ToString
                                                (), pattern.ToString());
            // default value
            NUnit.Framework.Assert.IsFalse(conf.GetKeepFailedTaskFiles());
            conf.SetKeepFailedTaskFiles(true);
            NUnit.Framework.Assert.IsTrue(conf.GetKeepFailedTaskFiles());
            // default value
            NUnit.Framework.Assert.IsNull(conf.GetKeepTaskFilesPattern());
            conf.SetKeepTaskFilesPattern("123454");
            NUnit.Framework.Assert.AreEqual("123454", conf.GetKeepTaskFilesPattern());
            // default value
            NUnit.Framework.Assert.IsNotNull(conf.GetWorkingDirectory());
            conf.SetWorkingDirectory(new Path("test"));
            NUnit.Framework.Assert.IsTrue(conf.GetWorkingDirectory().ToString().EndsWith("test"
                                                                                         ));
            // default value
            NUnit.Framework.Assert.AreEqual(1, conf.GetNumTasksToExecutePerJvm());
            // default value
            NUnit.Framework.Assert.IsNull(conf.GetKeyFieldComparatorOption());
            conf.SetKeyFieldComparatorOptions("keySpec");
            NUnit.Framework.Assert.AreEqual("keySpec", conf.GetKeyFieldComparatorOption());
            // default value
            NUnit.Framework.Assert.IsFalse(conf.GetUseNewReducer());
            conf.SetUseNewReducer(true);
            NUnit.Framework.Assert.IsTrue(conf.GetUseNewReducer());
            // default
            NUnit.Framework.Assert.IsTrue(conf.GetMapSpeculativeExecution());
            NUnit.Framework.Assert.IsTrue(conf.GetReduceSpeculativeExecution());
            NUnit.Framework.Assert.IsTrue(conf.GetSpeculativeExecution());
            conf.SetReduceSpeculativeExecution(false);
            NUnit.Framework.Assert.IsTrue(conf.GetSpeculativeExecution());
            conf.SetMapSpeculativeExecution(false);
            NUnit.Framework.Assert.IsFalse(conf.GetSpeculativeExecution());
            NUnit.Framework.Assert.IsFalse(conf.GetMapSpeculativeExecution());
            NUnit.Framework.Assert.IsFalse(conf.GetReduceSpeculativeExecution());
            conf.SetSessionId("ses");
            NUnit.Framework.Assert.AreEqual("ses", conf.GetSessionId());
            NUnit.Framework.Assert.AreEqual(3, conf.GetMaxTaskFailuresPerTracker());
            conf.SetMaxTaskFailuresPerTracker(2);
            NUnit.Framework.Assert.AreEqual(2, conf.GetMaxTaskFailuresPerTracker());
            NUnit.Framework.Assert.AreEqual(0, conf.GetMaxMapTaskFailuresPercent());
            conf.SetMaxMapTaskFailuresPercent(50);
            NUnit.Framework.Assert.AreEqual(50, conf.GetMaxMapTaskFailuresPercent());
            NUnit.Framework.Assert.AreEqual(0, conf.GetMaxReduceTaskFailuresPercent());
            conf.SetMaxReduceTaskFailuresPercent(70);
            NUnit.Framework.Assert.AreEqual(70, conf.GetMaxReduceTaskFailuresPercent());
            // by default
            NUnit.Framework.Assert.AreEqual(JobPriority.Normal.ToString(), conf.GetJobPriority
                                                ().ToString());
            conf.SetJobPriority(JobPriority.High);
            NUnit.Framework.Assert.AreEqual(JobPriority.High.ToString(), conf.GetJobPriority(
                                                ).ToString());
            NUnit.Framework.Assert.IsNull(conf.GetJobSubmitHostName());
            conf.SetJobSubmitHostName("hostname");
            NUnit.Framework.Assert.AreEqual("hostname", conf.GetJobSubmitHostName());
            // default
            NUnit.Framework.Assert.IsNull(conf.GetJobSubmitHostAddress());
            conf.SetJobSubmitHostAddress("ww");
            NUnit.Framework.Assert.AreEqual("ww", conf.GetJobSubmitHostAddress());
            // default value
            NUnit.Framework.Assert.IsFalse(conf.GetProfileEnabled());
            conf.SetProfileEnabled(true);
            NUnit.Framework.Assert.IsTrue(conf.GetProfileEnabled());
            // default value
            NUnit.Framework.Assert.AreEqual(conf.GetProfileTaskRange(true).ToString(), "0-2");
            NUnit.Framework.Assert.AreEqual(conf.GetProfileTaskRange(false).ToString(), "0-2"
                                            );
            conf.SetProfileTaskRange(true, "0-3");
            NUnit.Framework.Assert.AreEqual(conf.GetProfileTaskRange(false).ToString(), "0-2"
                                            );
            NUnit.Framework.Assert.AreEqual(conf.GetProfileTaskRange(true).ToString(), "0-3");
            // default value
            NUnit.Framework.Assert.IsNull(conf.GetMapDebugScript());
            conf.SetMapDebugScript("mDbgScript");
            NUnit.Framework.Assert.AreEqual("mDbgScript", conf.GetMapDebugScript());
            // default value
            NUnit.Framework.Assert.IsNull(conf.GetReduceDebugScript());
            conf.SetReduceDebugScript("rDbgScript");
            NUnit.Framework.Assert.AreEqual("rDbgScript", conf.GetReduceDebugScript());
            // default value
            NUnit.Framework.Assert.IsNull(conf.GetJobLocalDir());
            NUnit.Framework.Assert.AreEqual("default", conf.GetQueueName());
            conf.SetQueueName("qname");
            NUnit.Framework.Assert.AreEqual("qname", conf.GetQueueName());
            conf.SetMemoryForMapTask(100 * 1000);
            NUnit.Framework.Assert.AreEqual(100 * 1000, conf.GetMemoryForMapTask());
            conf.SetMemoryForReduceTask(1000 * 1000);
            NUnit.Framework.Assert.AreEqual(1000 * 1000, conf.GetMemoryForReduceTask());
            NUnit.Framework.Assert.AreEqual(-1, conf.GetMaxPhysicalMemoryForTask());
            NUnit.Framework.Assert.AreEqual("The variable key is no longer used.", JobConf.DeprecatedString
                                                ("key"));
            // make sure mapreduce.map|reduce.java.opts are not set by default
            // so that they won't override mapred.child.java.opts
            NUnit.Framework.Assert.AreEqual("mapreduce.map.java.opts should not be set by default"
                                            , null, conf.Get(JobConf.MapredMapTaskJavaOpts));
            NUnit.Framework.Assert.AreEqual("mapreduce.reduce.java.opts should not be set by default"
                                            , null, conf.Get(JobConf.MapredReduceTaskJavaOpts));
        }