Exemplo n.º 1
0
 /// <exception cref="System.IO.IOException"/>
 public override void Commit()
 {
     fs.Rename(tmpOutputPath, outputPath);
     MergeManagerImpl.CompressAwarePath compressAwarePath = new MergeManagerImpl.CompressAwarePath
                                                                (outputPath, GetSize(), this.compressedSize);
     merger.CloseOnDiskFile(compressAwarePath);
 }
Exemplo n.º 2
0
 public virtual void CloseOnDiskFile(MergeManagerImpl.CompressAwarePath file)
 {
     lock (this)
     {
         onDiskMapOutputs.AddItem(file);
         if (onDiskMapOutputs.Count >= (2 * ioSortFactor - 1))
         {
             onDiskMerger.StartMerge(onDiskMapOutputs);
         }
     }
 }
Exemplo n.º 3
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Sharpen.URISyntaxException"/>
        /// <exception cref="System.Exception"/>
        public virtual void TestOnDiskMerger()
        {
            JobConf jobConf    = new JobConf();
            int     SortFactor = 5;

            jobConf.SetInt(MRJobConfig.IoSortFactor, SortFactor);
            MapOutputFile mapOutputFile = new MROutputFiles();
            FileSystem    fs            = FileSystem.GetLocal(jobConf);
            MergeManagerImpl <IntWritable, IntWritable> manager = new MergeManagerImpl <IntWritable
                                                                                        , IntWritable>(null, jobConf, fs, null, null, null, null, null, null, null, null
                                                                                                       , null, null, mapOutputFile);
            MergeThread <MapOutput <IntWritable, IntWritable>, IntWritable, IntWritable> onDiskMerger
                = (MergeThread <MapOutput <IntWritable, IntWritable>, IntWritable, IntWritable>)Whitebox
                  .GetInternalState(manager, "onDiskMerger");
            int mergeFactor = (int)Whitebox.GetInternalState(onDiskMerger, "mergeFactor");

            // make sure the io.sort.factor is set properly
            NUnit.Framework.Assert.AreEqual(mergeFactor, SortFactor);
            // Stop the onDiskMerger thread so that we can intercept the list of files
            // waiting to be merged.
            onDiskMerger.Suspend();
            //Send the list of fake files waiting to be merged
            Random rand = new Random();

            for (int i = 0; i < 2 * SortFactor; ++i)
            {
                Path path = new Path("somePath");
                MergeManagerImpl.CompressAwarePath cap = new MergeManagerImpl.CompressAwarePath(path
                                                                                                , 1l, rand.Next());
                manager.CloseOnDiskFile(cap);
            }
            //Check that the files pending to be merged are in sorted order.
            List <IList <MergeManagerImpl.CompressAwarePath> > pendingToBeMerged = (List <IList <MergeManagerImpl.CompressAwarePath
                                                                                                 > >)Whitebox.GetInternalState(onDiskMerger, "pendingToBeMerged");

            NUnit.Framework.Assert.IsTrue("No inputs were added to list pending to merge", pendingToBeMerged
                                          .Count > 0);
            for (int i_1 = 0; i_1 < pendingToBeMerged.Count; ++i_1)
            {
                IList <MergeManagerImpl.CompressAwarePath> inputs = pendingToBeMerged[i_1];
                for (int j = 1; j < inputs.Count; ++j)
                {
                    NUnit.Framework.Assert.IsTrue("Not enough / too many inputs were going to be merged"
                                                  , inputs.Count > 0 && inputs.Count <= SortFactor);
                    NUnit.Framework.Assert.IsTrue("Inputs to be merged were not sorted according to size: "
                                                  , inputs[j].GetCompressedSize() >= inputs[j - 1].GetCompressedSize());
                }
            }
        }
Exemplo n.º 4
0
 public override int CompareTo(object obj)
 {
     if (obj is MergeManagerImpl.CompressAwarePath)
     {
         MergeManagerImpl.CompressAwarePath compPath = (MergeManagerImpl.CompressAwarePath
                                                        )obj;
         if (this.compressedSize < compPath.GetCompressedSize())
         {
             return(-1);
         }
         else
         {
             if (this.GetCompressedSize() > compPath.GetCompressedSize())
             {
                 return(1);
             }
         }
     }
     // Not returning 0 here so that objects with the same size (but
     // different paths) are still added to the TreeSet.
     return(base.CompareTo(obj));
 }
Exemplo n.º 5
0
            /// <exception cref="System.IO.IOException"/>
            public override void Merge(IList <InMemoryMapOutput <K, V> > inputs)
            {
                if (inputs == null || inputs.Count == 0)
                {
                    return;
                }
                //name this output file same as the name of the first file that is
                //there in the current list of inmem files (this is guaranteed to
                //be absent on the disk currently. So we don't overwrite a prev.
                //created spill). Also we need to create the output file now since
                //it is not guaranteed that this file will be present after merge
                //is called (we delete empty files as soon as we see them
                //in the merge method)
                //figure out the mapId
                TaskAttemptID mapId     = inputs[0].GetMapId();
                TaskID        mapTaskId = mapId.GetTaskID();
                IList <Merger.Segment <K, V> > inMemorySegments = new AList <Merger.Segment <K, V> >();
                long mergeOutputSize = this._enclosing.CreateInMemorySegments(inputs, inMemorySegments
                                                                              , 0);
                int  noInMemorySegments = inMemorySegments.Count;
                Path outputPath         = this._enclosing.mapOutputFile.GetInputFileForWrite(mapTaskId, mergeOutputSize
                                                                                             ).Suffix(Org.Apache.Hadoop.Mapred.Task.MergedOutputPrefix);
                FSDataOutputStream @out = CryptoUtils.WrapIfNecessary(this._enclosing.jobConf, this
                                                                      ._enclosing.rfs.Create(outputPath));

                IFile.Writer <K, V> writer = new IFile.Writer <K, V>(this._enclosing.jobConf, @out,
                                                                     (Type)this._enclosing.jobConf.GetMapOutputKeyClass(), (Type)this._enclosing.jobConf
                                                                     .GetMapOutputValueClass(), this._enclosing.codec, null, true);
                RawKeyValueIterator rIter = null;

                MergeManagerImpl.CompressAwarePath compressAwarePath;
                try
                {
                    MergeManagerImpl.Log.Info("Initiating in-memory merge with " + noInMemorySegments
                                              + " segments...");
                    rIter = Merger.Merge(this._enclosing.jobConf, this._enclosing.rfs, (Type)this._enclosing
                                         .jobConf.GetMapOutputKeyClass(), (Type)this._enclosing.jobConf.GetMapOutputValueClass
                                             (), inMemorySegments, inMemorySegments.Count, new Path(this._enclosing.reduceId.
                                                                                                    ToString()), (RawComparator <K>) this._enclosing.jobConf.GetOutputKeyComparator(),
                                         this._enclosing.reporter, this._enclosing.spilledRecordsCounter, null, null);
                    if (null == this._enclosing.combinerClass)
                    {
                        Merger.WriteFile(rIter, writer, this._enclosing.reporter, this._enclosing.jobConf
                                         );
                    }
                    else
                    {
                        this._enclosing.combineCollector.SetWriter(writer);
                        this._enclosing.CombineAndSpill(rIter, this._enclosing.reduceCombineInputCounter);
                    }
                    writer.Close();
                    compressAwarePath = new MergeManagerImpl.CompressAwarePath(outputPath, writer.GetRawLength
                                                                                   (), writer.GetCompressedLength());
                    MergeManagerImpl.Log.Info(this._enclosing.reduceId + " Merge of the " + noInMemorySegments
                                              + " files in-memory complete." + " Local file is " + outputPath + " of size " +
                                              this._enclosing.localFS.GetFileStatus(outputPath).GetLen());
                }
                catch (IOException e)
                {
                    //make sure that we delete the ondisk file that we created
                    //earlier when we invoked cloneFileAttributes
                    this._enclosing.localFS.Delete(outputPath, true);
                    throw;
                }
                // Note the output of the merge
                this._enclosing.CloseOnDiskFile(compressAwarePath);
            }
Exemplo n.º 6
0
        public virtual void TestInMemoryAndOnDiskMerger()
        {
            JobID         jobId     = new JobID("a", 0);
            TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 0)
                                                        , 0);
            TaskAttemptID                 mapId1       = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 1), 0);
            TaskAttemptID                 mapId2       = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 2), 0);
            LocalDirAllocator             lda          = new LocalDirAllocator(MRConfig.LocalDir);
            MergeManagerImpl <Text, Text> mergeManager = new MergeManagerImpl <Text, Text>(reduceId1
                                                                                           , jobConf, fs, lda, Reporter.Null, null, null, null, null, null, null, null, new
                                                                                           Progress(), new MROutputFiles());
            // write map outputs
            IDictionary <string, string> map1 = new SortedDictionary <string, string>();

            map1["apple"]  = "disgusting";
            map1["carrot"] = "delicious";
            IDictionary <string, string> map2 = new SortedDictionary <string, string>();

            map1["banana"] = "pretty good";
            byte[] mapOutputBytes1 = WriteMapOutput(conf, map1);
            byte[] mapOutputBytes2 = WriteMapOutput(conf, map2);
            InMemoryMapOutput <Text, Text> mapOutput1 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId1, mergeManager, mapOutputBytes1.Length, null, true);
            InMemoryMapOutput <Text, Text> mapOutput2 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId2, mergeManager, mapOutputBytes2.Length, null, true);

            System.Array.Copy(mapOutputBytes1, 0, mapOutput1.GetMemory(), 0, mapOutputBytes1.
                              Length);
            System.Array.Copy(mapOutputBytes2, 0, mapOutput2.GetMemory(), 0, mapOutputBytes2.
                              Length);
            // create merger and run merge
            MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger = mergeManager
                                                                                      .CreateInMemoryMerger();
            IList <InMemoryMapOutput <Text, Text> > mapOutputs1 = new AList <InMemoryMapOutput <Text
                                                                                                , Text> >();

            mapOutputs1.AddItem(mapOutput1);
            mapOutputs1.AddItem(mapOutput2);
            inMemoryMerger.Merge(mapOutputs1);
            NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count);
            TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.Reduce, 3)
                                                        , 0);
            TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 4), 0);
            TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.Map, 5), 0);
            // write map outputs
            IDictionary <string, string> map3 = new SortedDictionary <string, string>();

            map3["apple"]  = "awesome";
            map3["carrot"] = "amazing";
            IDictionary <string, string> map4 = new SortedDictionary <string, string>();

            map4["banana"] = "bla";
            byte[] mapOutputBytes3 = WriteMapOutput(conf, map3);
            byte[] mapOutputBytes4 = WriteMapOutput(conf, map4);
            InMemoryMapOutput <Text, Text> mapOutput3 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId3, mergeManager, mapOutputBytes3.Length, null, true);
            InMemoryMapOutput <Text, Text> mapOutput4 = new InMemoryMapOutput <Text, Text>(conf
                                                                                           , mapId4, mergeManager, mapOutputBytes4.Length, null, true);

            System.Array.Copy(mapOutputBytes3, 0, mapOutput3.GetMemory(), 0, mapOutputBytes3.
                              Length);
            System.Array.Copy(mapOutputBytes4, 0, mapOutput4.GetMemory(), 0, mapOutputBytes4.
                              Length);
            //    // create merger and run merge
            MergeThread <InMemoryMapOutput <Text, Text>, Text, Text> inMemoryMerger2 = mergeManager
                                                                                       .CreateInMemoryMerger();
            IList <InMemoryMapOutput <Text, Text> > mapOutputs2 = new AList <InMemoryMapOutput <Text
                                                                                                , Text> >();

            mapOutputs2.AddItem(mapOutput3);
            mapOutputs2.AddItem(mapOutput4);
            inMemoryMerger2.Merge(mapOutputs2);
            NUnit.Framework.Assert.AreEqual(2, mergeManager.onDiskMapOutputs.Count);
            IList <MergeManagerImpl.CompressAwarePath> paths = new AList <MergeManagerImpl.CompressAwarePath
                                                                          >();
            IEnumerator <MergeManagerImpl.CompressAwarePath> iterator = mergeManager.onDiskMapOutputs
                                                                        .GetEnumerator();
            IList <string> keys   = new AList <string>();
            IList <string> values = new AList <string>();

            while (iterator.HasNext())
            {
                MergeManagerImpl.CompressAwarePath next = iterator.Next();
                ReadOnDiskMapOutput(conf, fs, next, keys, values);
                paths.AddItem(next);
            }
            NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "banana", "carrot",
                                                                "apple", "banana", "carrot"));
            NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "bla", "amazing"
                                                                  , "disgusting", "pretty good", "delicious"));
            mergeManager.Close();
            mergeManager = new MergeManagerImpl <Text, Text>(reduceId2, jobConf, fs, lda, Reporter
                                                             .Null, null, null, null, null, null, null, null, new Progress(), new MROutputFiles
                                                                 ());
            MergeThread <MergeManagerImpl.CompressAwarePath, Text, Text> onDiskMerger = mergeManager
                                                                                        .CreateOnDiskMerger();

            onDiskMerger.Merge(paths);
            NUnit.Framework.Assert.AreEqual(1, mergeManager.onDiskMapOutputs.Count);
            keys   = new AList <string>();
            values = new AList <string>();
            ReadOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.GetEnumerator().Next(
                                    ), keys, values);
            NUnit.Framework.Assert.AreEqual(keys, Arrays.AsList("apple", "apple", "banana", "banana"
                                                                , "carrot", "carrot"));
            NUnit.Framework.Assert.AreEqual(values, Arrays.AsList("awesome", "disgusting", "pretty good"
                                                                  , "bla", "amazing", "delicious"));
            mergeManager.Close();
            NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMapOutputs.Count);
            NUnit.Framework.Assert.AreEqual(0, mergeManager.inMemoryMergedMapOutputs.Count);
            NUnit.Framework.Assert.AreEqual(0, mergeManager.onDiskMapOutputs.Count);
        }