public virtual void TestDefaultCollect()
        {
            //mock creation
            Task.TaskReporter mockTaskReporter = Org.Mockito.Mockito.Mock <Task.TaskReporter>(
                );
            IFile.Writer <string, int> mockWriter = Org.Mockito.Mockito.Mock <IFile.Writer>();
            Configuration conf = new Configuration();

            coc = new Task.CombineOutputCollector <string, int>(outCounter, mockTaskReporter,
                                                                conf);
            coc.SetWriter(mockWriter);
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress
                ();
            for (int i = 0; i < Task.DefaultCombineRecordsBeforeProgress; i++)
            {
                coc.Collect("dummy", i);
            }
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(1)).Progress
                ();
            for (int i_1 = 0; i_1 < Task.DefaultCombineRecordsBeforeProgress; i_1++)
            {
                coc.Collect("dummy", i_1);
            }
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(2)).Progress
                ();
        }
Esempio n. 2
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        private void RunNewReducer <Inkey, Invalue, Outkey, Outvalue>(JobConf job, TaskUmbilicalProtocol
                                                                      umbilical, Task.TaskReporter reporter, RawKeyValueIterator rIter, RawComparator
                                                                      <INKEY> comparator)
        {
            System.Type keyClass   = typeof(INKEY);
            System.Type valueClass = typeof(INVALUE);
            // wrap value iterator to report progress.
            RawKeyValueIterator rawIter = rIter;

            rIter = new _RawKeyValueIterator_587(rawIter, reporter);
            // make a task context so we can get the classes
            TaskAttemptContext taskContext = new TaskAttemptContextImpl(job, GetTaskID(), reporter
                                                                        );
            // make a reducer
            Reducer <INKEY, INVALUE, OUTKEY, OUTVALUE> reducer = (Reducer <INKEY, INVALUE, OUTKEY
                                                                           , OUTVALUE>)ReflectionUtils.NewInstance(taskContext.GetReducerClass(), job);
            RecordWriter <OUTKEY, OUTVALUE> trackedRW = new ReduceTask.NewTrackingRecordWriter
                                                        <OUTKEY, OUTVALUE>(this, taskContext);

            job.SetBoolean("mapred.skip.on", IsSkipping());
            job.SetBoolean(JobContext.SkipRecords, IsSkipping());
            Reducer.Context reducerContext = CreateReduceContext(reducer, job, GetTaskID(), rIter
                                                                 , reduceInputKeyCounter, reduceInputValueCounter, trackedRW, committer, reporter
                                                                 , comparator, keyClass, valueClass);
            try
            {
                reducer.Run(reducerContext);
            }
            finally
            {
                trackedRW.Close(reducerContext);
            }
        }
Esempio n. 3
0
        /// <exception cref="System.IO.IOException"/>
        private void RunOldReducer <Inkey, Invalue, Outkey, Outvalue>(JobConf job, TaskUmbilicalProtocol
                                                                      umbilical, Task.TaskReporter reporter, RawKeyValueIterator rIter, RawComparator
                                                                      <INKEY> comparator)
        {
            System.Type keyClass   = typeof(INKEY);
            System.Type valueClass = typeof(INVALUE);
            Reducer <INKEY, INVALUE, OUTKEY, OUTVALUE> reducer = ReflectionUtils.NewInstance(job
                                                                                             .GetReducerClass(), job);
            // make output collector
            string finalName = GetOutputName(GetPartition());
            RecordWriter <OUTKEY, OUTVALUE> @out = new ReduceTask.OldTrackingRecordWriter <OUTKEY
                                                                                           , OUTVALUE>(this, job, reporter, finalName);
            RecordWriter <OUTKEY, OUTVALUE>    finalOut  = @out;
            OutputCollector <OUTKEY, OUTVALUE> collector = new _OutputCollector_419(finalOut,
                                                                                    reporter);

            // indicate that progress update needs to be sent
            // apply reduce function
            try
            {
                //increment processed counter only if skipping feature is enabled
                bool incrProcCount = SkipBadRecords.GetReducerMaxSkipGroups(job) > 0 && SkipBadRecords
                                     .GetAutoIncrReducerProcCount(job);
                ReduceTask.ReduceValuesIterator <INKEY, INVALUE> values = IsSkipping() ? new ReduceTask.SkippingReduceValuesIterator
                                                                          <INKEY, INVALUE>(this, rIter, comparator, keyClass, valueClass, job, reporter, umbilical
                                                                                           ) : new ReduceTask.ReduceValuesIterator <INKEY, INVALUE>(this, rIter, job.GetOutputValueGroupingComparator
                                                                                                                                                        (), keyClass, valueClass, job, reporter);
                values.InformReduceProgress();
                while (values.More())
                {
                    reduceInputKeyCounter.Increment(1);
                    reducer.Reduce(values.GetKey(), values, collector, reporter);
                    if (incrProcCount)
                    {
                        reporter.IncrCounter(SkipBadRecords.CounterGroup, SkipBadRecords.CounterReduceProcessedGroups
                                             , 1);
                    }
                    values.NextKey();
                    values.InformReduceProgress();
                }
                reducer.Close();
                reducer = null;
                @out.Close(reporter);
                @out = null;
            }
            finally
            {
                IOUtils.Cleanup(Log, reducer);
                CloseQuietly(@out, reporter);
            }
        }
Esempio n. 4
0
 /// <exception cref="System.IO.IOException"/>
 /// <exception cref="System.TypeLoadException"/>
 public override void Init(MapOutputCollector.Context context)
 {
     this.mapTask       = context.GetMapTask();
     this.jobConf       = context.GetJobConf();
     this.reporter      = context.GetReporter();
     numberOfPartitions = jobConf.GetNumReduceTasks();
     keyClass           = (Type)jobConf.GetMapOutputKeyClass();
     valueClass         = (Type)jobConf.GetMapOutputValueClass();
     recordWriters      = new TestMerge.KeyValueWriter[numberOfPartitions];
     outStreams         = new ByteArrayOutputStream[numberOfPartitions];
     // Create output streams for partitions.
     for (int i = 0; i < numberOfPartitions; i++)
     {
         outStreams[i]    = new ByteArrayOutputStream();
         recordWriters[i] = new TestMerge.KeyValueWriter <K, V>(jobConf, outStreams[i], keyClass
                                                                , valueClass);
     }
 }
Esempio n. 5
0
 /// <exception cref="System.IO.IOException"/>
 public SkippingReduceValuesIterator(ReduceTask _enclosing, RawKeyValueIterator @in
                                     , RawComparator <KEY> comparator, Type keyClass, Type valClass, Configuration conf
                                     , Task.TaskReporter reporter, TaskUmbilicalProtocol umbilical)
     : base(_enclosing)
 {
     this._enclosing       = _enclosing;
     this.umbilical        = umbilical;
     this.skipGroupCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedGroups
                                                                    ));
     this.skipRecCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedRecords
                                                                  ));
     this.toWriteSkipRecs = this._enclosing.ToWriteSkipRecs() && SkipBadRecords.GetSkipOutputPath
                                (conf) != null;
     this.keyClass = keyClass;
     this.valClass = valClass;
     this.reporter = reporter;
     this.skipIt   = this._enclosing.GetSkipRanges().SkipRangeIterator();
     this.MayBeSkip();
 }
Esempio n. 6
0
            /// <exception cref="System.IO.IOException"/>
            public OldTrackingRecordWriter(ReduceTask reduce, JobConf job, Task.TaskReporter
                                           reporter, string finalName)
            {
                this.reduceOutputCounter   = reduce.reduceOutputCounter;
                this.fileOutputByteCounter = reduce.fileOutputByteCounter;
                IList <FileSystem.Statistics> matchedStats = null;

                if (job.GetOutputFormat() is FileOutputFormat)
                {
                    matchedStats = GetFsStatistics(FileOutputFormat.GetOutputPath(job), job);
                }
                fsStats = matchedStats;
                FileSystem fs           = FileSystem.Get(job);
                long       bytesOutPrev = GetOutputBytes(fsStats);

                this.real = job.GetOutputFormat().GetRecordWriter(fs, job, finalName, reporter);
                long bytesOutCurr = GetOutputBytes(fsStats);

                fileOutputByteCounter.Increment(bytesOutCurr - bytesOutPrev);
            }
        public virtual void TestCustomCollect()
        {
            //mock creation
            Task.TaskReporter mockTaskReporter = Org.Mockito.Mockito.Mock <Task.TaskReporter>(
                );
            IFile.Writer <string, int> mockWriter = Org.Mockito.Mockito.Mock <IFile.Writer>();
            Configuration conf = new Configuration();

            conf.Set(MRJobConfig.CombineRecordsBeforeProgress, "2");
            coc = new Task.CombineOutputCollector <string, int>(outCounter, mockTaskReporter,
                                                                conf);
            coc.SetWriter(mockWriter);
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress
                ();
            coc.Collect("dummy", 1);
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress
                ();
            coc.Collect("dummy", 2);
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(1)).Progress
                ();
        }
Esempio n. 8
0
        public virtual void TestProgressIsReportedIfInputASeriesOfEmptyFiles()
        {
            JobConf conf = new JobConf();

            Path[]     paths      = new Path[3];
            FilePath[] files      = new FilePath[3];
            long[]     fileLength = new long[3];
            try
            {
                for (int i = 0; i < 3; i++)
                {
                    FilePath dir = new FilePath(outDir.ToString());
                    dir.Mkdir();
                    files[i] = new FilePath(dir, "testfile" + i);
                    FileWriter fileWriter = new FileWriter(files[i]);
                    fileWriter.Flush();
                    fileWriter.Close();
                    fileLength[i] = i;
                    paths[i]      = new Path(outDir + "/testfile" + i);
                }
                CombineFileSplit       combineFileSplit   = new CombineFileSplit(paths, fileLength);
                TaskAttemptID          taskAttemptID      = Org.Mockito.Mockito.Mock <TaskAttemptID>();
                Task.TaskReporter      reporter           = Org.Mockito.Mockito.Mock <Task.TaskReporter>();
                TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(conf, taskAttemptID
                                                                                       , reporter);
                CombineFileRecordReader cfrr = new CombineFileRecordReader(combineFileSplit, taskAttemptContext
                                                                           , typeof(TestCombineFileRecordReader.TextRecordReaderWrapper));
                cfrr.Initialize(combineFileSplit, taskAttemptContext);
                Org.Mockito.Mockito.Verify(reporter).Progress();
                NUnit.Framework.Assert.IsFalse(cfrr.NextKeyValue());
                Org.Mockito.Mockito.Verify(reporter, Org.Mockito.Mockito.Times(3)).Progress();
            }
            finally
            {
                FileUtil.FullyDelete(new FilePath(outDir.ToString()));
            }
        }
Esempio n. 9
0
 public Context(MapTask mapTask, JobConf jobConf, Task.TaskReporter reporter)
 {
     this.mapTask  = mapTask;
     this.jobConf  = jobConf;
     this.reporter = reporter;
 }
Esempio n. 10
0
 public _RawKeyValueIterator_587(RawKeyValueIterator rawIter, Task.TaskReporter reporter
                                 )
 {
     this.rawIter  = rawIter;
     this.reporter = reporter;
 }
Esempio n. 11
0
 public _OutputCollector_419(RecordWriter <OUTKEY, OUTVALUE> finalOut, Task.TaskReporter
                             reporter)
 {
     this.finalOut = finalOut;
     this.reporter = reporter;
 }
Esempio n. 12
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        public override void Run(JobConf job, TaskUmbilicalProtocol umbilical)
        {
            job.SetBoolean(JobContext.SkipRecords, IsSkipping());
            if (IsMapOrReduce())
            {
                copyPhase   = GetProgress().AddPhase("copy");
                sortPhase   = GetProgress().AddPhase("sort");
                reducePhase = GetProgress().AddPhase("reduce");
            }
            // start thread that will handle communication with parent
            Task.TaskReporter reporter = StartReporter(umbilical);
            bool useNewApi             = job.GetUseNewReducer();

            Initialize(job, GetJobID(), reporter, useNewApi);
            // check if it is a cleanupJobTask
            if (jobCleanup)
            {
                RunJobCleanupTask(umbilical, reporter);
                return;
            }
            if (jobSetup)
            {
                RunJobSetupTask(umbilical, reporter);
                return;
            }
            if (taskCleanup)
            {
                RunTaskCleanupTask(umbilical, reporter);
                return;
            }
            // Initialize the codec
            codec = InitCodec();
            RawKeyValueIterator   rIter = null;
            ShuffleConsumerPlugin shuffleConsumerPlugin = null;
            Type combinerClass = conf.GetCombinerClass();

            Task.CombineOutputCollector combineCollector = (null != combinerClass) ? new Task.CombineOutputCollector
                                                               (reduceCombineOutputCounter, reporter, conf) : null;
            Type clazz = job.GetClass <ShuffleConsumerPlugin>(MRConfig.ShuffleConsumerPlugin,
                                                              typeof(Shuffle));

            shuffleConsumerPlugin = ReflectionUtils.NewInstance(clazz, job);
            Log.Info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin);
            ShuffleConsumerPlugin.Context shuffleContext = new ShuffleConsumerPlugin.Context(
                GetTaskID(), job, FileSystem.GetLocal(job), umbilical, base.lDirAlloc, reporter,
                codec, combinerClass, combineCollector, spilledRecordsCounter, reduceCombineInputCounter
                , shuffledMapsCounter, reduceShuffleBytes, failedShuffleCounter, mergedMapOutputsCounter
                , taskStatus, copyPhase, sortPhase, this, mapOutputFile, localMapFiles);
            shuffleConsumerPlugin.Init(shuffleContext);
            rIter = shuffleConsumerPlugin.Run();
            // free up the data structures
            mapOutputFilesOnDisk.Clear();
            sortPhase.Complete();
            // sort is complete
            SetPhase(TaskStatus.Phase.Reduce);
            StatusUpdate(umbilical);
            Type          keyClass   = job.GetMapOutputKeyClass();
            Type          valueClass = job.GetMapOutputValueClass();
            RawComparator comparator = job.GetOutputValueGroupingComparator();

            if (useNewApi)
            {
                RunNewReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass);
            }
            else
            {
                RunOldReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass);
            }
            shuffleConsumerPlugin.Close();
            Done(umbilical, reporter);
        }