Пример #1
0
 /// <exception cref="System.IO.IOException"/>
 public Segment(Configuration conf, FileSystem fs, Path file, CompressionCodec codec
                , bool preserve, Counters.Counter mergedMapOutputsCounter, long rawDataLength)
     : this(conf, fs, file, 0, fs.GetFileStatus(file).GetLen(), codec, preserve, mergedMapOutputsCounter
            )
 {
     this.rawDataLength = rawDataLength;
 }
Пример #2
0
 /// <exception cref="System.IO.IOException"/>
 public virtual RawKeyValueIterator Merge(Type keyClass, Type valueClass, int factor
                                          , Path tmpDir, Counters.Counter readsCounter, Counters.Counter writesCounter, Progress
                                          mergePhase)
 {
     return(Merge(keyClass, valueClass, factor, 0, tmpDir, readsCounter, writesCounter
                  , mergePhase));
 }
Пример #3
0
 /// <summary>Construct an IFile Reader.</summary>
 /// <param name="conf">Configuration File</param>
 /// <param name="in">The input stream</param>
 /// <param name="length">
 /// Length of the data in the stream, including the checksum
 /// bytes.
 /// </param>
 /// <param name="codec">codec</param>
 /// <param name="readsCounter">Counter for records read from disk</param>
 /// <exception cref="System.IO.IOException"/>
 public Reader(Configuration conf, FSDataInputStream @in, long length, CompressionCodec
               codec, Counters.Counter readsCounter)
 {
     // Count records read from disk
     // Possibly decompressed stream that we read
     readRecordsCounter = readsCounter;
     checksumIn         = new IFileInputStream(@in, length, conf);
     if (codec != null)
     {
         decompressor = CodecPool.GetDecompressor(codec);
         if (decompressor != null)
         {
             this.@in = codec.CreateInputStream(checksumIn, decompressor);
         }
         else
         {
             Log.Warn("Could not obtain decompressor from CodecPool");
             this.@in = checksumIn;
         }
     }
     else
     {
         this.@in = checksumIn;
     }
     this.dataIn     = new DataInputStream(this.@in);
     this.fileLength = length;
     if (conf != null)
     {
         bufferSize = conf.GetInt("io.file.buffer.size", DefaultBufferSize);
     }
 }
Пример #4
0
        public virtual void TestCounterValue()
        {
            Counters counters    = new Counters();
            int      NumberTests = 100;
            int      NumberInc   = 10;
            Random   rand        = new Random();

            for (int i = 0; i < NumberTests; i++)
            {
                long             initValue     = rand.Next();
                long             expectedValue = initValue;
                Counters.Counter counter       = counters.FindCounter("foo", "bar");
                counter.SetValue(initValue);
                NUnit.Framework.Assert.AreEqual("Counter value is not initialized correctly", expectedValue
                                                , counter.GetValue());
                for (int j = 0; j < NumberInc; j++)
                {
                    int incValue = rand.Next();
                    counter.Increment(incValue);
                    expectedValue += incValue;
                    NUnit.Framework.Assert.AreEqual("Counter value is not incremented correctly", expectedValue
                                                    , counter.GetValue());
                }
                expectedValue = rand.Next();
                counter.SetValue(expectedValue);
                NUnit.Framework.Assert.AreEqual("Counter value is not set correctly", expectedValue
                                                , counter.GetValue());
            }
        }
Пример #5
0
 public Context(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS, TaskUmbilicalProtocol
                umbilical, LocalDirAllocator localDirAllocator, Reporter reporter, CompressionCodec
                codec, Type combinerClass, Task.CombineOutputCollector <K, V> combineCollector,
                Counters.Counter spilledRecordsCounter, Counters.Counter reduceCombineInputCounter
                , Counters.Counter shuffledMapsCounter, Counters.Counter reduceShuffleBytes, Counters.Counter
                failedShuffleCounter, Counters.Counter mergedMapOutputsCounter, TaskStatus status
                , Progress copyPhase, Progress mergePhase, Task reduceTask, MapOutputFile mapOutputFile
                , IDictionary <TaskAttemptID, MapOutputFile> localMapFiles)
 {
     this.reduceId                  = reduceId;
     this.jobConf                   = jobConf;
     this.localFS                   = localFS;
     this.umbilical                 = umbilical;
     this.localDirAllocator         = localDirAllocator;
     this.reporter                  = reporter;
     this.codec                     = codec;
     this.combinerClass             = combinerClass;
     this.combineCollector          = combineCollector;
     this.spilledRecordsCounter     = spilledRecordsCounter;
     this.reduceCombineInputCounter = reduceCombineInputCounter;
     this.shuffledMapsCounter       = shuffledMapsCounter;
     this.reduceShuffleBytes        = reduceShuffleBytes;
     this.failedShuffleCounter      = failedShuffleCounter;
     this.mergedMapOutputsCounter   = mergedMapOutputsCounter;
     this.status                    = status;
     this.copyPhase                 = copyPhase;
     this.mergePhase                = mergePhase;
     this.reduceTask                = reduceTask;
     this.mapOutputFile             = mapOutputFile;
     this.localMapFiles             = localMapFiles;
 }
Пример #6
0
 public Segment(IFile.Reader <K, V> reader, bool preserve, Counters.Counter mapOutputsCounter
                )
 {
     this.reader            = reader;
     this.preserve          = preserve;
     this.segmentLength     = reader.GetLength();
     this.mapOutputsCounter = mapOutputsCounter;
 }
Пример #7
0
 /// <exception cref="System.IO.IOException"/>
 public static RawKeyValueIterator Merge <K, V>(Configuration conf, FileSystem fs,
                                                IList <Merger.Segment <K, V> > segments, int mergeFactor, int inMemSegments, Path tmpDir
                                                , RawComparator <K> comparator, Progressable reporter, bool sortSegments, Counters.Counter
                                                readsCounter, Counters.Counter writesCounter, Progress mergePhase)
 {
     System.Type keyClass   = typeof(K);
     System.Type valueClass = typeof(V);
     return(new Merger.MergeQueue <K, V>(conf, fs, segments, comparator, reporter, sortSegments
                                         , TaskType.Reduce).Merge(keyClass, valueClass, mergeFactor, inMemSegments, tmpDir
                                                                  , readsCounter, writesCounter, mergePhase));
 }
Пример #8
0
 // Local directories
 /// <exception cref="System.IO.IOException"/>
 public static RawKeyValueIterator Merge <K, V>(Configuration conf, FileSystem fs,
                                                CompressionCodec codec, Path[] inputs, bool deleteInputs, int mergeFactor, Path
                                                tmpDir, RawComparator <K> comparator, Progressable reporter, Counters.Counter readsCounter
                                                , Counters.Counter writesCounter, Progress mergePhase)
 {
     System.Type keyClass   = typeof(K);
     System.Type valueClass = typeof(V);
     return(new Merger.MergeQueue <K, V>(conf, fs, inputs, deleteInputs, codec, comparator
                                         , reporter, null, TaskType.Reduce).Merge(keyClass, valueClass, mergeFactor, tmpDir
                                                                                  , readsCounter, writesCounter, mergePhase));
 }
Пример #9
0
 /// <exception cref="System.IO.IOException"/>
 public Segment(Configuration conf, FileSystem fs, Path file, long segmentOffset,
                long segmentLength, CompressionCodec codec, bool preserve, Counters.Counter mergedMapOutputsCounter
                )
 {
     this.conf              = conf;
     this.fs                = fs;
     this.file              = file;
     this.codec             = codec;
     this.preserve          = preserve;
     this.segmentOffset     = segmentOffset;
     this.segmentLength     = segmentLength;
     this.mapOutputsCounter = mergedMapOutputsCounter;
 }
Пример #10
0
 /// <exception cref="System.IO.IOException"/>
 internal virtual void Init(Counters.Counter readsCounter)
 {
     if (reader == null)
     {
         FSDataInputStream @in = fs.Open(file);
         @in.Seek(segmentOffset);
         @in    = CryptoUtils.WrapIfNecessary(conf, @in);
         reader = new IFile.Reader <K, V>(conf, @in, segmentLength - CryptoUtils.CryptoPadding
                                              (conf), codec, readsCounter);
     }
     if (mapOutputsCounter != null)
     {
         mapOutputsCounter.Increment(1);
     }
 }
Пример #11
0
 public override bool Equals(object genericRight)
 {
     lock (this)
     {
         if (genericRight is Counters.Counter)
         {
             lock (genericRight)
             {
                 Counters.Counter right = (Counters.Counter)genericRight;
                 return(GetName().Equals(right.GetName()) && GetDisplayName().Equals(right.GetDisplayName
                                                                                         ()) && GetValue() == right.GetValue());
             }
         }
         return(false);
     }
 }
Пример #12
0
 /// <exception cref="System.IO.IOException"/>
 public SkippingReduceValuesIterator(ReduceTask _enclosing, RawKeyValueIterator @in
                                     , RawComparator <KEY> comparator, Type keyClass, Type valClass, Configuration conf
                                     , Task.TaskReporter reporter, TaskUmbilicalProtocol umbilical)
     : base(_enclosing)
 {
     this._enclosing       = _enclosing;
     this.umbilical        = umbilical;
     this.skipGroupCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedGroups
                                                                    ));
     this.skipRecCounter = ((Counters.Counter)reporter.GetCounter(TaskCounter.ReduceSkippedRecords
                                                                  ));
     this.toWriteSkipRecs = this._enclosing.ToWriteSkipRecs() && SkipBadRecords.GetSkipOutputPath
                                (conf) != null;
     this.keyClass = keyClass;
     this.valClass = valClass;
     this.reporter = reporter;
     this.skipIt   = this._enclosing.GetSkipRanges().SkipRangeIterator();
     this.MayBeSkip();
 }
Пример #13
0
            /// <exception cref="System.IO.IOException"/>
            public OldTrackingRecordWriter(ReduceTask reduce, JobConf job, Task.TaskReporter
                                           reporter, string finalName)
            {
                this.reduceOutputCounter   = reduce.reduceOutputCounter;
                this.fileOutputByteCounter = reduce.fileOutputByteCounter;
                IList <FileSystem.Statistics> matchedStats = null;

                if (job.GetOutputFormat() is FileOutputFormat)
                {
                    matchedStats = GetFsStatistics(FileOutputFormat.GetOutputPath(job), job);
                }
                fsStats = matchedStats;
                FileSystem fs           = FileSystem.Get(job);
                long       bytesOutPrev = GetOutputBytes(fsStats);

                this.real = job.GetOutputFormat().GetRecordWriter(fs, job, finalName, reporter);
                long bytesOutCurr = GetOutputBytes(fsStats);

                fileOutputByteCounter.Increment(bytesOutCurr - bytesOutPrev);
            }
Пример #14
0
 /// <exception cref="System.IO.IOException"/>
 public Writer(Configuration conf, FSDataOutputStream @out, Type keyClass, Type valueClass
               , CompressionCodec codec, Counters.Counter writesCounter, bool ownOutputStream)
 {
     this.writtenRecordsCounter = writesCounter;
     this.checksumOut           = new IFileOutputStream(@out);
     this.rawOut = @out;
     this.start  = this.rawOut.GetPos();
     if (codec != null)
     {
         this.compressor = CodecPool.GetCompressor(codec);
         if (this.compressor != null)
         {
             this.compressor.Reset();
             this.compressedOut  = codec.CreateOutputStream(checksumOut, compressor);
             this.@out           = new FSDataOutputStream(this.compressedOut, null);
             this.compressOutput = true;
         }
         else
         {
             Log.Warn("Could not obtain compressor from CodecPool");
             this.@out = new FSDataOutputStream(checksumOut, null);
         }
     }
     else
     {
         this.@out = new FSDataOutputStream(checksumOut, null);
     }
     this.keyClass   = keyClass;
     this.valueClass = valueClass;
     if (keyClass != null)
     {
         SerializationFactory serializationFactory = new SerializationFactory(conf);
         this.keySerializer = serializationFactory.GetSerializer(keyClass);
         this.keySerializer.Open(buffer);
         this.valueSerializer = serializationFactory.GetSerializer(valueClass);
         this.valueSerializer.Open(buffer);
     }
     this.ownOutputStream = ownOutputStream;
 }
Пример #15
0
            /// <exception cref="System.IO.IOException"/>
            internal virtual RawKeyValueIterator Merge(Type keyClass, Type valueClass, int factor
                                                       , int inMem, Path tmpDir, Counters.Counter readsCounter, Counters.Counter writesCounter
                                                       , Progress mergePhase)
            {
                Log.Info("Merging " + segments.Count + " sorted segments");

                /*
                 * If there are inMemory segments, then they come first in the segments
                 * list and then the sorted disk segments. Otherwise(if there are only
                 * disk segments), then they are sorted segments if there are more than
                 * factor segments in the segments list.
                 */
                int numSegments = segments.Count;
                int origFactor  = factor;
                int passNo      = 1;

                if (mergePhase != null)
                {
                    mergeProgress = mergePhase;
                }
                long totalBytes = ComputeBytesInMerges(factor, inMem);

                if (totalBytes != 0)
                {
                    progPerByte = 1.0f / (float)totalBytes;
                }
                do
                {
                    //create the MergeStreams from the sorted map created in the constructor
                    //and dump the final output to a file
                    //get the factor for this pass of merge. We assume in-memory segments
                    //are the first entries in the segment list and that the pass factor
                    //doesn't apply to them
                    factor = GetPassFactor(factor, passNo, numSegments - inMem);
                    if (1 == passNo)
                    {
                        factor += inMem;
                    }
                    IList <Merger.Segment <K, V> > segmentsToMerge = new AList <Merger.Segment <K, V> >();
                    int  segmentsConsidered    = 0;
                    int  numSegmentsToConsider = factor;
                    long startBytes            = 0;
                    // starting bytes of segments of this merge
                    while (true)
                    {
                        //extract the smallest 'factor' number of segments
                        //Call cleanup on the empty segments (no key/value data)
                        IList <Merger.Segment <K, V> > mStream = GetSegmentDescriptors(numSegmentsToConsider
                                                                                       );
                        foreach (Merger.Segment <K, V> segment in mStream)
                        {
                            // Initialize the segment at the last possible moment;
                            // this helps in ensuring we don't use buffers until we need them
                            segment.Init(readsCounter);
                            long startPos = segment.GetReader().bytesRead;
                            bool hasNext  = segment.NextRawKey();
                            long endPos   = segment.GetReader().bytesRead;
                            if (hasNext)
                            {
                                startBytes += endPos - startPos;
                                segmentsToMerge.AddItem(segment);
                                segmentsConsidered++;
                            }
                            else
                            {
                                segment.Close();
                                numSegments--;
                            }
                        }
                        //we ignore this segment for the merge
                        //if we have the desired number of segments
                        //or looked at all available segments, we break
                        if (segmentsConsidered == factor || segments.Count == 0)
                        {
                            break;
                        }
                        numSegmentsToConsider = factor - segmentsConsidered;
                    }
                    //feed the streams to the priority queue
                    Initialize(segmentsToMerge.Count);
                    Clear();
                    foreach (Merger.Segment <K, V> segment_1 in segmentsToMerge)
                    {
                        Put(segment_1);
                    }
                    //if we have lesser number of segments remaining, then just return the
                    //iterator, else do another single level merge
                    if (numSegments <= factor)
                    {
                        if (!includeFinalMerge)
                        {
                            // for reduce task
                            // Reset totalBytesProcessed and recalculate totalBytes from the
                            // remaining segments to track the progress of the final merge.
                            // Final merge is considered as the progress of the reducePhase,
                            // the 3rd phase of reduce task.
                            totalBytesProcessed = 0;
                            totalBytes          = 0;
                            for (int i = 0; i < segmentsToMerge.Count; i++)
                            {
                                totalBytes += segmentsToMerge[i].GetRawDataLength();
                            }
                        }
                        if (totalBytes != 0)
                        {
                            //being paranoid
                            progPerByte = 1.0f / (float)totalBytes;
                        }
                        totalBytesProcessed += startBytes;
                        if (totalBytes != 0)
                        {
                            mergeProgress.Set(totalBytesProcessed * progPerByte);
                        }
                        else
                        {
                            mergeProgress.Set(1.0f);
                        }
                        // Last pass and no segments left - we're done
                        Log.Info("Down to the last merge-pass, with " + numSegments + " segments left of total size: "
                                 + (totalBytes - totalBytesProcessed) + " bytes");
                        return(this);
                    }
                    else
                    {
                        Log.Info("Merging " + segmentsToMerge.Count + " intermediate segments out of a total of "
                                 + (segments.Count + segmentsToMerge.Count));
                        long bytesProcessedInPrevMerges = totalBytesProcessed;
                        totalBytesProcessed += startBytes;
                        //we want to spread the creation of temp files on multiple disks if
                        //available under the space constraints
                        long approxOutputSize = 0;
                        foreach (Merger.Segment <K, V> s in segmentsToMerge)
                        {
                            approxOutputSize += s.GetLength() + ChecksumFileSystem.GetApproxChkSumLength(s.GetLength
                                                                                                             ());
                        }
                        Path tmpFilename = new Path(tmpDir, "intermediate").Suffix("." + passNo);
                        Path outputFile  = lDirAlloc.GetLocalPathForWrite(tmpFilename.ToString(), approxOutputSize
                                                                          , conf);
                        FSDataOutputStream @out = fs.Create(outputFile);
                        @out = CryptoUtils.WrapIfNecessary(conf, @out);
                        IFile.Writer <K, V> writer = new IFile.Writer <K, V>(conf, @out, keyClass, valueClass
                                                                             , codec, writesCounter, true);
                        WriteFile(this, writer, reporter, conf);
                        writer.Close();
                        //we finished one single level merge; now clean up the priority
                        //queue
                        this.Close();
                        // Add the newly create segment to the list of segments to be merged
                        Merger.Segment <K, V> tempSegment = new Merger.Segment <K, V>(conf, fs, outputFile,
                                                                                      codec, false);
                        // Insert new merged segment into the sorted list
                        int pos = Sharpen.Collections.BinarySearch(segments, tempSegment, segmentComparator
                                                                   );
                        if (pos < 0)
                        {
                            // binary search failed. So position to be inserted at is -pos-1
                            pos = -pos - 1;
                        }
                        segments.Add(pos, tempSegment);
                        numSegments = segments.Count;
                        // Subtract the difference between expected size of new segment and
                        // actual size of new segment(Expected size of new segment is
                        // inputBytesOfThisMerge) from totalBytes. Expected size and actual
                        // size will match(almost) if combiner is not called in merge.
                        long inputBytesOfThisMerge = totalBytesProcessed - bytesProcessedInPrevMerges;
                        totalBytes -= inputBytesOfThisMerge - tempSegment.GetRawDataLength();
                        if (totalBytes != 0)
                        {
                            progPerByte = 1.0f / (float)totalBytes;
                        }
                        passNo++;
                    }
                    //we are worried about only the first pass merge factor. So reset the
                    //factor to what it originally was
                    factor = origFactor;
                }while (true);
            }
Пример #16
0
 /// <exception cref="System.IO.IOException"/>
 public Writer(Configuration conf, FSDataOutputStream @out, Type keyClass, Type valueClass
               , CompressionCodec codec, Counters.Counter writesCounter)
     : this(conf, @out, keyClass, valueClass, codec, writesCounter, false)
 {
 }
Пример #17
0
 /// <exception cref="System.IO.IOException"/>
 public static RawKeyValueIterator Merge <K, V>(Configuration conf, FileSystem fs,
                                                IList <Merger.Segment <K, V> > segments, int mergeFactor, Path tmpDir, RawComparator
                                                <K> comparator, Progressable reporter, Counters.Counter readsCounter, Counters.Counter
                                                writesCounter, Progress mergePhase)
 {
     System.Type keyClass   = typeof(K);
     System.Type valueClass = typeof(V);
     return(Merge(conf, fs, keyClass, valueClass, segments, mergeFactor, tmpDir, comparator
                  , reporter, false, readsCounter, writesCounter, mergePhase));
 }
Пример #18
0
 /// <exception cref="System.IO.IOException"/>
 public MergeQueue(Configuration conf, FileSystem fs, Path[] inputs, bool deleteInputs
                   , CompressionCodec codec, RawComparator <K> comparator, Progressable reporter, Counters.Counter
                   mergedMapOutputsCounter, TaskType taskType)
 {
     this.conf       = conf;
     this.fs         = fs;
     this.codec      = codec;
     this.comparator = comparator;
     this.reporter   = reporter;
     if (taskType == TaskType.Map)
     {
         ConsiderFinalMergeForProgress();
     }
     foreach (Path file in inputs)
     {
         Log.Debug("MergeQ: adding: " + file);
         segments.AddItem(new Merger.Segment <K, V>(conf, fs, file, codec, !deleteInputs, (
                                                        file.ToString().EndsWith(Task.MergedOutputPrefix) ? null : mergedMapOutputsCounter
                                                        )));
     }
     // Sort segments on file-lengths
     segments.Sort(segmentComparator);
 }
Пример #19
0
 protected internal Writer(Counters.Counter writesCounter)
 {
     // End of File Marker
     // Count records written to disk
     writtenRecordsCounter = writesCounter;
 }
Пример #20
0
 /// <summary>Construct an IFile Reader.</summary>
 /// <param name="conf">Configuration File</param>
 /// <param name="fs">FileSystem</param>
 /// <param name="file">
 /// Path of the file to be opened. This file should have
 /// checksum bytes for the data at the end of the file.
 /// </param>
 /// <param name="codec">codec</param>
 /// <param name="readsCounter">Counter for records read from disk</param>
 /// <exception cref="System.IO.IOException"/>
 public Reader(Configuration conf, FileSystem fs, Path file, CompressionCodec codec
               , Counters.Counter readsCounter)
     : this(conf, fs.Open(file), fs.GetFileStatus(file).GetLen(), codec, readsCounter)
 {
 }
Пример #21
0
 public virtual bool ContentEquals(Counters.Counter counter)
 {
     return(realCounter.Equals(counter.GetUnderlyingCounter()));
 }
 public override bool ContentEquals(Counters.Counter counter)
 {
     // TODO Auto-generated method stub
     return(false);
 }