public virtual void TestDefaultCollect()
        {
            //mock creation
            Task.TaskReporter mockTaskReporter = Org.Mockito.Mockito.Mock <Task.TaskReporter>(
                );
            IFile.Writer <string, int> mockWriter = Org.Mockito.Mockito.Mock <IFile.Writer>();
            Configuration conf = new Configuration();

            coc = new Task.CombineOutputCollector <string, int>(outCounter, mockTaskReporter,
                                                                conf);
            coc.SetWriter(mockWriter);
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress
                ();
            for (int i = 0; i < Task.DefaultCombineRecordsBeforeProgress; i++)
            {
                coc.Collect("dummy", i);
            }
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(1)).Progress
                ();
            for (int i_1 = 0; i_1 < Task.DefaultCombineRecordsBeforeProgress; i_1++)
            {
                coc.Collect("dummy", i_1);
            }
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(2)).Progress
                ();
        }
Пример #2
0
        public virtual void TestIFileReaderWithCodec()
        {
            Configuration conf    = new Configuration();
            FileSystem    localFs = FileSystem.GetLocal(conf);
            FileSystem    rfs     = ((LocalFileSystem)localFs).GetRaw();
            Path          path    = new Path(new Path("build/test.ifile"), "data");
            DefaultCodec  codec   = new GzipCodec();

            codec.SetConf(conf);
            FSDataOutputStream @out = rfs.Create(path);

            IFile.Writer <Text, Text> writer = new IFile.Writer <Text, Text>(conf, @out, typeof(
                                                                                 Text), typeof(Text), codec, null);
            writer.Close();
            FSDataInputStream @in = rfs.Open(path);

            IFile.Reader <Text, Text> reader = new IFile.Reader <Text, Text>(conf, @in, rfs.GetFileStatus
                                                                                 (path).GetLen(), codec, null);
            reader.Close();
            // test check sum
            byte[] ab     = new byte[100];
            int    readed = reader.checksumIn.ReadWithChecksum(ab, 0, ab.Length);

            NUnit.Framework.Assert.AreEqual(readed, reader.checksumIn.GetChecksum().Length);
        }
Пример #3
0
 /// <exception cref="System.IO.IOException"/>
 internal virtual void CreateInDiskSegment()
 {
     System.Diagnostics.Debug.Assert((this.writer != null));
     this.writer.Close();
     Merger.Segment <K, V> s = new Merger.Segment <K, V>(this.conf, this.fs, this.file,
                                                         null, true);
     this.writer = null;
     this._enclosing.segmentList.AddItem(s);
     BackupStore.Log.Debug("Disk Segment added to List. Size is " + this._enclosing.segmentList
                           .Count);
 }
Пример #4
0
 /// <exception cref="System.IO.IOException"/>
 internal virtual void Write(DataInputBuffer key, DataInputBuffer value)
 {
     if (this.writer == null)
     {
         // If spillNumber is 0, we should have called activate and not
         // come here at all
         System.Diagnostics.Debug.Assert((this.spillNumber != 0));
         this.writer = this.CreateSpillFile();
     }
     this.writer.Append(key, value);
     BackupStore.Log.Debug("ID: " + this._enclosing.segmentList.Count + " WRITE TO DISK"
                           );
 }
Пример #5
0
        public virtual void TestIFileWriterWithCodec()
        {
            Configuration conf    = new Configuration();
            FileSystem    localFs = FileSystem.GetLocal(conf);
            FileSystem    rfs     = ((LocalFileSystem)localFs).GetRaw();
            Path          path    = new Path(new Path("build/test.ifile"), "data");
            DefaultCodec  codec   = new GzipCodec();

            codec.SetConf(conf);
            IFile.Writer <Text, Text> writer = new IFile.Writer <Text, Text>(conf, rfs.Create(path
                                                                                              ), typeof(Text), typeof(Text), codec, null);
            writer.Close();
        }
Пример #6
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void RunValueIterator(Path tmpDir, TestReduceTask.Pair[] vals, Configuration
                                             conf, CompressionCodec codec)
        {
            FileSystem localFs = FileSystem.GetLocal(conf);
            FileSystem rfs     = ((LocalFileSystem)localFs).GetRaw();
            Path       path    = new Path(tmpDir, "data.in");

            IFile.Writer <Text, Text> writer = new IFile.Writer <Text, Text>(conf, rfs.Create(path
                                                                                              ), typeof(Text), typeof(Text), codec, null);
            foreach (TestReduceTask.Pair p in vals)
            {
                writer.Append(new Text(p.key), new Text(p.value));
            }
            writer.Close();
            RawKeyValueIterator rawItr = Merger.Merge <Text, Text>(conf, rfs, codec, new Path[]
                                                                   { path }, false, conf.GetInt(JobContext.IoSortFactor, 100), tmpDir, new Text.Comparator
                                                                       (), new TestReduceTask.NullProgress(), null, null, null);

            Task.ValuesIterator valItr = new Task.ValuesIterator <Text, Text>(rawItr, WritableComparator
                                                                              .Get(typeof(Text)), typeof(Text), typeof(Text), conf, new TestReduceTask.NullProgress
                                                                                  ());
            // WritableComparators are not generic
            int i = 0;

            while (valItr.More())
            {
                object key       = valItr.GetKey();
                string keyString = key.ToString();
                // make sure it matches!
                NUnit.Framework.Assert.AreEqual(vals[i].key, keyString);
                // must have at least 1 value!
                NUnit.Framework.Assert.IsTrue(valItr.HasNext());
                while (valItr.HasNext())
                {
                    string valueString = valItr.Next().ToString();
                    // make sure the values match
                    NUnit.Framework.Assert.AreEqual(vals[i].value, valueString);
                    // make sure the keys match
                    NUnit.Framework.Assert.AreEqual(vals[i].key, valItr.GetKey().ToString());
                    i += 1;
                }
                // make sure the key hasn't changed under the hood
                NUnit.Framework.Assert.AreEqual(keyString, valItr.GetKey().ToString());
                valItr.NextKey();
            }
            NUnit.Framework.Assert.AreEqual(vals.Length, i);
            // make sure we have progress equal to 1.0
            NUnit.Framework.Assert.AreEqual(1.0f, rawItr.GetProgress().Get());
        }
Пример #7
0
        /// <exception cref="System.IO.IOException"/>
        public static void WriteFile <K, V>(RawKeyValueIterator records, IFile.Writer <K, V
                                                                                       > writer, Progressable progressable, Configuration conf)
        {
            long progressBar = conf.GetLong(JobContext.RecordsBeforeProgress, 10000);
            long recordCtr   = 0;

            while (records.Next())
            {
                writer.Append(records.GetKey(), records.GetValue());
                if (((recordCtr++) % progressBar) == 0)
                {
                    progressable.Progress();
                }
            }
        }
        public virtual void TestCustomCollect()
        {
            //mock creation
            Task.TaskReporter mockTaskReporter = Org.Mockito.Mockito.Mock <Task.TaskReporter>(
                );
            IFile.Writer <string, int> mockWriter = Org.Mockito.Mockito.Mock <IFile.Writer>();
            Configuration conf = new Configuration();

            conf.Set(MRJobConfig.CombineRecordsBeforeProgress, "2");
            coc = new Task.CombineOutputCollector <string, int>(outCounter, mockTaskReporter,
                                                                conf);
            coc.SetWriter(mockWriter);
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress
                ();
            coc.Collect("dummy", 1);
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Never()).Progress
                ();
            coc.Collect("dummy", 2);
            Org.Mockito.Mockito.Verify(mockTaskReporter, Org.Mockito.Mockito.Times(1)).Progress
                ();
        }
Пример #9
0
 /// <exception cref="System.IO.IOException"/>
 internal virtual void Activate()
 {
     this.isActive = true;
     this.writer   = this.CreateSpillFile();
 }
Пример #10
0
 internal virtual void Reinitialize()
 {
     this.spillNumber = 0;
     this.writer      = null;
     this.isActive    = false;
 }
Пример #11
0
            /// <exception cref="System.IO.IOException"/>
            internal virtual RawKeyValueIterator Merge(Type keyClass, Type valueClass, int factor
                                                       , int inMem, Path tmpDir, Counters.Counter readsCounter, Counters.Counter writesCounter
                                                       , Progress mergePhase)
            {
                Log.Info("Merging " + segments.Count + " sorted segments");

                /*
                 * If there are inMemory segments, then they come first in the segments
                 * list and then the sorted disk segments. Otherwise(if there are only
                 * disk segments), then they are sorted segments if there are more than
                 * factor segments in the segments list.
                 */
                int numSegments = segments.Count;
                int origFactor  = factor;
                int passNo      = 1;

                if (mergePhase != null)
                {
                    mergeProgress = mergePhase;
                }
                long totalBytes = ComputeBytesInMerges(factor, inMem);

                if (totalBytes != 0)
                {
                    progPerByte = 1.0f / (float)totalBytes;
                }
                do
                {
                    //create the MergeStreams from the sorted map created in the constructor
                    //and dump the final output to a file
                    //get the factor for this pass of merge. We assume in-memory segments
                    //are the first entries in the segment list and that the pass factor
                    //doesn't apply to them
                    factor = GetPassFactor(factor, passNo, numSegments - inMem);
                    if (1 == passNo)
                    {
                        factor += inMem;
                    }
                    IList <Merger.Segment <K, V> > segmentsToMerge = new AList <Merger.Segment <K, V> >();
                    int  segmentsConsidered    = 0;
                    int  numSegmentsToConsider = factor;
                    long startBytes            = 0;
                    // starting bytes of segments of this merge
                    while (true)
                    {
                        //extract the smallest 'factor' number of segments
                        //Call cleanup on the empty segments (no key/value data)
                        IList <Merger.Segment <K, V> > mStream = GetSegmentDescriptors(numSegmentsToConsider
                                                                                       );
                        foreach (Merger.Segment <K, V> segment in mStream)
                        {
                            // Initialize the segment at the last possible moment;
                            // this helps in ensuring we don't use buffers until we need them
                            segment.Init(readsCounter);
                            long startPos = segment.GetReader().bytesRead;
                            bool hasNext  = segment.NextRawKey();
                            long endPos   = segment.GetReader().bytesRead;
                            if (hasNext)
                            {
                                startBytes += endPos - startPos;
                                segmentsToMerge.AddItem(segment);
                                segmentsConsidered++;
                            }
                            else
                            {
                                segment.Close();
                                numSegments--;
                            }
                        }
                        //we ignore this segment for the merge
                        //if we have the desired number of segments
                        //or looked at all available segments, we break
                        if (segmentsConsidered == factor || segments.Count == 0)
                        {
                            break;
                        }
                        numSegmentsToConsider = factor - segmentsConsidered;
                    }
                    //feed the streams to the priority queue
                    Initialize(segmentsToMerge.Count);
                    Clear();
                    foreach (Merger.Segment <K, V> segment_1 in segmentsToMerge)
                    {
                        Put(segment_1);
                    }
                    //if we have lesser number of segments remaining, then just return the
                    //iterator, else do another single level merge
                    if (numSegments <= factor)
                    {
                        if (!includeFinalMerge)
                        {
                            // for reduce task
                            // Reset totalBytesProcessed and recalculate totalBytes from the
                            // remaining segments to track the progress of the final merge.
                            // Final merge is considered as the progress of the reducePhase,
                            // the 3rd phase of reduce task.
                            totalBytesProcessed = 0;
                            totalBytes          = 0;
                            for (int i = 0; i < segmentsToMerge.Count; i++)
                            {
                                totalBytes += segmentsToMerge[i].GetRawDataLength();
                            }
                        }
                        if (totalBytes != 0)
                        {
                            //being paranoid
                            progPerByte = 1.0f / (float)totalBytes;
                        }
                        totalBytesProcessed += startBytes;
                        if (totalBytes != 0)
                        {
                            mergeProgress.Set(totalBytesProcessed * progPerByte);
                        }
                        else
                        {
                            mergeProgress.Set(1.0f);
                        }
                        // Last pass and no segments left - we're done
                        Log.Info("Down to the last merge-pass, with " + numSegments + " segments left of total size: "
                                 + (totalBytes - totalBytesProcessed) + " bytes");
                        return(this);
                    }
                    else
                    {
                        Log.Info("Merging " + segmentsToMerge.Count + " intermediate segments out of a total of "
                                 + (segments.Count + segmentsToMerge.Count));
                        long bytesProcessedInPrevMerges = totalBytesProcessed;
                        totalBytesProcessed += startBytes;
                        //we want to spread the creation of temp files on multiple disks if
                        //available under the space constraints
                        long approxOutputSize = 0;
                        foreach (Merger.Segment <K, V> s in segmentsToMerge)
                        {
                            approxOutputSize += s.GetLength() + ChecksumFileSystem.GetApproxChkSumLength(s.GetLength
                                                                                                             ());
                        }
                        Path tmpFilename = new Path(tmpDir, "intermediate").Suffix("." + passNo);
                        Path outputFile  = lDirAlloc.GetLocalPathForWrite(tmpFilename.ToString(), approxOutputSize
                                                                          , conf);
                        FSDataOutputStream @out = fs.Create(outputFile);
                        @out = CryptoUtils.WrapIfNecessary(conf, @out);
                        IFile.Writer <K, V> writer = new IFile.Writer <K, V>(conf, @out, keyClass, valueClass
                                                                             , codec, writesCounter, true);
                        WriteFile(this, writer, reporter, conf);
                        writer.Close();
                        //we finished one single level merge; now clean up the priority
                        //queue
                        this.Close();
                        // Add the newly create segment to the list of segments to be merged
                        Merger.Segment <K, V> tempSegment = new Merger.Segment <K, V>(conf, fs, outputFile,
                                                                                      codec, false);
                        // Insert new merged segment into the sorted list
                        int pos = Sharpen.Collections.BinarySearch(segments, tempSegment, segmentComparator
                                                                   );
                        if (pos < 0)
                        {
                            // binary search failed. So position to be inserted at is -pos-1
                            pos = -pos - 1;
                        }
                        segments.Add(pos, tempSegment);
                        numSegments = segments.Count;
                        // Subtract the difference between expected size of new segment and
                        // actual size of new segment(Expected size of new segment is
                        // inputBytesOfThisMerge) from totalBytes. Expected size and actual
                        // size will match(almost) if combiner is not called in merge.
                        long inputBytesOfThisMerge = totalBytesProcessed - bytesProcessedInPrevMerges;
                        totalBytes -= inputBytesOfThisMerge - tempSegment.GetRawDataLength();
                        if (totalBytes != 0)
                        {
                            progPerByte = 1.0f / (float)totalBytes;
                        }
                        passNo++;
                    }
                    //we are worried about only the first pass merge factor. So reset the
                    //factor to what it originally was
                    factor = origFactor;
                }while (true);
            }