Exemple #1
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void Mark()
        {
            // We read one KV pair in advance in hasNext.
            // If hasNext has read the next KV pair from a new segment, but the
            // user has not called next() for that KV, then reset the readSegmentIndex
            // to the previous segment
            if (nextKVOffset == 0)
            {
                System.Diagnostics.Debug.Assert((readSegmentIndex != 0));
                System.Diagnostics.Debug.Assert((currentKVOffset != 0));
                readSegmentIndex--;
            }
            // just drop segments before the current active segment
            int i = 0;
            IEnumerator <Merger.Segment <K, V> > itr = segmentList.GetEnumerator();

            while (itr.HasNext())
            {
                Merger.Segment <K, V> s = itr.Next();
                if (i == readSegmentIndex)
                {
                    break;
                }
                s.Close();
                itr.Remove();
                i++;
                Log.Debug("Dropping a segment");
            }
            // FirstSegmentOffset is the offset in the current segment from where we
            // need to start reading on the next reset
            firstSegmentOffset = currentKVOffset;
            readSegmentIndex   = 0;
            Log.Debug("Setting the FirsSegmentOffset to " + currentKVOffset);
        }
Exemple #2
0
 public int Compare(Merger.Segment <K, V> o1, Merger.Segment <K, V> o2)
 {
     if (o1.GetLength() == o2.GetLength())
     {
         return(0);
     }
     return(o1.GetLength() < o2.GetLength() ? -1 : 1);
 }
Exemple #3
0
 /// <exception cref="System.IO.IOException"/>
 internal virtual void CreateInDiskSegment()
 {
     System.Diagnostics.Debug.Assert((this.writer != null));
     this.writer.Close();
     Merger.Segment <K, V> s = new Merger.Segment <K, V>(this.conf, this.fs, this.file,
                                                         null, true);
     this.writer = null;
     this._enclosing.segmentList.AddItem(s);
     BackupStore.Log.Debug("Disk Segment added to List. Size is " + this._enclosing.segmentList
                           .Count);
 }
Exemple #4
0
            /// <exception cref="System.IO.IOException"/>
            public virtual bool Next()
            {
                if (Size() == 0)
                {
                    ResetKeyValue();
                    return(false);
                }
                if (minSegment != null)
                {
                    //minSegment is non-null for all invocations of next except the first
                    //one. For the first invocation, the priority queue is ready for use
                    //but for the subsequent invocations, first adjust the queue
                    AdjustPriorityQueue(minSegment);
                    if (Size() == 0)
                    {
                        minSegment = null;
                        ResetKeyValue();
                        return(false);
                    }
                }
                minSegment = Top();
                long startPos = minSegment.GetReader().bytesRead;

                key = minSegment.GetKey();
                if (!minSegment.InMemory())
                {
                    //When we load the value from an inmemory segment, we reset
                    //the "value" DIB in this class to the inmem segment's byte[].
                    //When we load the value bytes from disk, we shouldn't use
                    //the same byte[] since it would corrupt the data in the inmem
                    //segment. So we maintain an explicit DIB for value bytes
                    //obtained from disk, and if the current segment is a disk
                    //segment, we reset the "value" DIB to the byte[] in that (so
                    //we reuse the disk segment DIB whenever we consider
                    //a disk segment).
                    minSegment.GetValue(diskIFileValue);
                    value.Reset(diskIFileValue.GetData(), diskIFileValue.GetLength());
                }
                else
                {
                    minSegment.GetValue(value);
                }
                long endPos = minSegment.GetReader().bytesRead;

                totalBytesProcessed += endPos - startPos;
                mergeProgress.Set(totalBytesProcessed * progPerByte);
                return(true);
            }
Exemple #5
0
            /// <exception cref="System.IO.IOException"/>
            private void AdjustPriorityQueue(Merger.Segment <K, V> reader)
            {
                long startPos = reader.GetReader().bytesRead;
                bool hasNext  = reader.NextRawKey();
                long endPos   = reader.GetReader().bytesRead;

                totalBytesProcessed += endPos - startPos;
                mergeProgress.Set(totalBytesProcessed * progPerByte);
                if (hasNext)
                {
                    AdjustTop();
                }
                else
                {
                    Pop();
                    reader.Close();
                }
            }
Exemple #6
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void Reset()
 {
     // Create a new segment for the previously written records only if we
     // are not already in the reset mode
     if (!inReset)
     {
         if (fileCache.isActive)
         {
             fileCache.CreateInDiskSegment();
         }
         else
         {
             memCache.CreateInMemorySegment();
         }
     }
     inReset = true;
     // Reset the segments to the correct position from where the next read
     // should begin.
     for (int i = 0; i < segmentList.Count; i++)
     {
         Merger.Segment <K, V> s = segmentList[i];
         if (s.InMemory())
         {
             int offset = (i == 0) ? firstSegmentOffset : 0;
             s.GetReader().Reset(offset);
         }
         else
         {
             s.CloseReader();
             if (i == 0)
             {
                 s.ReinitReader(firstSegmentOffset);
                 s.GetReader().DisableChecksumValidation();
             }
         }
     }
     currentKVOffset  = firstSegmentOffset;
     nextKVOffset     = -1;
     readSegmentIndex = 0;
     hasMore          = false;
     lastSegmentEOF   = false;
     Log.Debug("Reset - First segment offset is " + firstSegmentOffset + " Segment List Size is "
               + segmentList.Count);
 }
Exemple #7
0
 /// <summary>This method creates a memory segment from the existing buffer</summary>
 /// <exception cref="System.IO.IOException"/>
 internal virtual void CreateInMemorySegment()
 {
     // If nothing was written in this block because the record size
     // was greater than the allocated block size, just return.
     if (this.usedSize == 0)
     {
         this.ramManager.Unreserve(this.blockSize);
         return;
     }
     // spaceAvailable would have ensured that there is enough space
     // left for the EOF markers.
     System.Diagnostics.Debug.Assert(((this.blockSize - this.usedSize) >= BackupStore.
                                      EofMarkerSize));
     WritableUtils.WriteVInt(this.dataOut, IFile.EofMarker);
     WritableUtils.WriteVInt(this.dataOut, IFile.EofMarker);
     this.usedSize += BackupStore.EofMarkerSize;
     this.ramManager.Unreserve(this.blockSize - this.usedSize);
     IFile.Reader <K, V> reader = new InMemoryReader <K, V>(null, (TaskAttemptID)this._enclosing
                                                            .tid, this.dataOut.GetData(), 0, this.usedSize, this._enclosing.conf);
     Merger.Segment <K, V> segment = new Merger.Segment <K, V>(reader, false);
     this._enclosing.segmentList.AddItem(segment);
     BackupStore.Log.Debug("Added Memory Segment to List. List Size is " + this._enclosing
                           .segmentList.Count);
 }
Exemple #8
0
 /// <exception cref="System.IO.IOException"/>
 public virtual bool HasNext()
 {
     if (lastSegmentEOF)
     {
         return(false);
     }
     // We read the next KV from the cache to decide if there is any left.
     // Since hasNext can be called several times before the actual call to
     // next(), we use hasMore to avoid extra reads. hasMore is set to false
     // when the user actually consumes this record in next()
     if (hasMore)
     {
         return(true);
     }
     Merger.Segment <K, V> seg = segmentList[readSegmentIndex];
     // Mark the current position. This would be set to currentKVOffset
     // when the user consumes this record in next().
     nextKVOffset = (int)seg.GetActualPosition();
     if (seg.NextRawKey())
     {
         currentKey = seg.GetKey();
         seg.GetValue(currentValue);
         hasMore = true;
         return(true);
     }
     else
     {
         if (!seg.InMemory())
         {
             seg.CloseReader();
         }
     }
     // If this is the last segment, mark the lastSegmentEOF flag and return
     if (readSegmentIndex == segmentList.Count - 1)
     {
         nextKVOffset   = -1;
         lastSegmentEOF = true;
         return(false);
     }
     nextKVOffset = 0;
     readSegmentIndex++;
     Merger.Segment <K, V> nextSegment = segmentList[readSegmentIndex];
     // We possibly are moving from a memory segment to a disk segment.
     // Reset so that we do not corrupt the in-memory segment buffer.
     // See HADOOP-5494
     if (!nextSegment.InMemory())
     {
         currentValue.Reset(currentDiskValue.GetData(), currentDiskValue.GetLength());
         nextSegment.Init(null);
     }
     if (nextSegment.NextRawKey())
     {
         currentKey = nextSegment.GetKey();
         nextSegment.GetValue(currentValue);
         hasMore = true;
         return(true);
     }
     else
     {
         throw new IOException("New segment did not have even one K/V");
     }
 }
Exemple #9
0
            /// <exception cref="System.IO.IOException"/>
            internal virtual RawKeyValueIterator Merge(Type keyClass, Type valueClass, int factor
                                                       , int inMem, Path tmpDir, Counters.Counter readsCounter, Counters.Counter writesCounter
                                                       , Progress mergePhase)
            {
                Log.Info("Merging " + segments.Count + " sorted segments");

                /*
                 * If there are inMemory segments, then they come first in the segments
                 * list and then the sorted disk segments. Otherwise(if there are only
                 * disk segments), then they are sorted segments if there are more than
                 * factor segments in the segments list.
                 */
                int numSegments = segments.Count;
                int origFactor  = factor;
                int passNo      = 1;

                if (mergePhase != null)
                {
                    mergeProgress = mergePhase;
                }
                long totalBytes = ComputeBytesInMerges(factor, inMem);

                if (totalBytes != 0)
                {
                    progPerByte = 1.0f / (float)totalBytes;
                }
                do
                {
                    //create the MergeStreams from the sorted map created in the constructor
                    //and dump the final output to a file
                    //get the factor for this pass of merge. We assume in-memory segments
                    //are the first entries in the segment list and that the pass factor
                    //doesn't apply to them
                    factor = GetPassFactor(factor, passNo, numSegments - inMem);
                    if (1 == passNo)
                    {
                        factor += inMem;
                    }
                    IList <Merger.Segment <K, V> > segmentsToMerge = new AList <Merger.Segment <K, V> >();
                    int  segmentsConsidered    = 0;
                    int  numSegmentsToConsider = factor;
                    long startBytes            = 0;
                    // starting bytes of segments of this merge
                    while (true)
                    {
                        //extract the smallest 'factor' number of segments
                        //Call cleanup on the empty segments (no key/value data)
                        IList <Merger.Segment <K, V> > mStream = GetSegmentDescriptors(numSegmentsToConsider
                                                                                       );
                        foreach (Merger.Segment <K, V> segment in mStream)
                        {
                            // Initialize the segment at the last possible moment;
                            // this helps in ensuring we don't use buffers until we need them
                            segment.Init(readsCounter);
                            long startPos = segment.GetReader().bytesRead;
                            bool hasNext  = segment.NextRawKey();
                            long endPos   = segment.GetReader().bytesRead;
                            if (hasNext)
                            {
                                startBytes += endPos - startPos;
                                segmentsToMerge.AddItem(segment);
                                segmentsConsidered++;
                            }
                            else
                            {
                                segment.Close();
                                numSegments--;
                            }
                        }
                        //we ignore this segment for the merge
                        //if we have the desired number of segments
                        //or looked at all available segments, we break
                        if (segmentsConsidered == factor || segments.Count == 0)
                        {
                            break;
                        }
                        numSegmentsToConsider = factor - segmentsConsidered;
                    }
                    //feed the streams to the priority queue
                    Initialize(segmentsToMerge.Count);
                    Clear();
                    foreach (Merger.Segment <K, V> segment_1 in segmentsToMerge)
                    {
                        Put(segment_1);
                    }
                    //if we have lesser number of segments remaining, then just return the
                    //iterator, else do another single level merge
                    if (numSegments <= factor)
                    {
                        if (!includeFinalMerge)
                        {
                            // for reduce task
                            // Reset totalBytesProcessed and recalculate totalBytes from the
                            // remaining segments to track the progress of the final merge.
                            // Final merge is considered as the progress of the reducePhase,
                            // the 3rd phase of reduce task.
                            totalBytesProcessed = 0;
                            totalBytes          = 0;
                            for (int i = 0; i < segmentsToMerge.Count; i++)
                            {
                                totalBytes += segmentsToMerge[i].GetRawDataLength();
                            }
                        }
                        if (totalBytes != 0)
                        {
                            //being paranoid
                            progPerByte = 1.0f / (float)totalBytes;
                        }
                        totalBytesProcessed += startBytes;
                        if (totalBytes != 0)
                        {
                            mergeProgress.Set(totalBytesProcessed * progPerByte);
                        }
                        else
                        {
                            mergeProgress.Set(1.0f);
                        }
                        // Last pass and no segments left - we're done
                        Log.Info("Down to the last merge-pass, with " + numSegments + " segments left of total size: "
                                 + (totalBytes - totalBytesProcessed) + " bytes");
                        return(this);
                    }
                    else
                    {
                        Log.Info("Merging " + segmentsToMerge.Count + " intermediate segments out of a total of "
                                 + (segments.Count + segmentsToMerge.Count));
                        long bytesProcessedInPrevMerges = totalBytesProcessed;
                        totalBytesProcessed += startBytes;
                        //we want to spread the creation of temp files on multiple disks if
                        //available under the space constraints
                        long approxOutputSize = 0;
                        foreach (Merger.Segment <K, V> s in segmentsToMerge)
                        {
                            approxOutputSize += s.GetLength() + ChecksumFileSystem.GetApproxChkSumLength(s.GetLength
                                                                                                             ());
                        }
                        Path tmpFilename = new Path(tmpDir, "intermediate").Suffix("." + passNo);
                        Path outputFile  = lDirAlloc.GetLocalPathForWrite(tmpFilename.ToString(), approxOutputSize
                                                                          , conf);
                        FSDataOutputStream @out = fs.Create(outputFile);
                        @out = CryptoUtils.WrapIfNecessary(conf, @out);
                        IFile.Writer <K, V> writer = new IFile.Writer <K, V>(conf, @out, keyClass, valueClass
                                                                             , codec, writesCounter, true);
                        WriteFile(this, writer, reporter, conf);
                        writer.Close();
                        //we finished one single level merge; now clean up the priority
                        //queue
                        this.Close();
                        // Add the newly create segment to the list of segments to be merged
                        Merger.Segment <K, V> tempSegment = new Merger.Segment <K, V>(conf, fs, outputFile,
                                                                                      codec, false);
                        // Insert new merged segment into the sorted list
                        int pos = Sharpen.Collections.BinarySearch(segments, tempSegment, segmentComparator
                                                                   );
                        if (pos < 0)
                        {
                            // binary search failed. So position to be inserted at is -pos-1
                            pos = -pos - 1;
                        }
                        segments.Add(pos, tempSegment);
                        numSegments = segments.Count;
                        // Subtract the difference between expected size of new segment and
                        // actual size of new segment(Expected size of new segment is
                        // inputBytesOfThisMerge) from totalBytes. Expected size and actual
                        // size will match(almost) if combiner is not called in merge.
                        long inputBytesOfThisMerge = totalBytesProcessed - bytesProcessedInPrevMerges;
                        totalBytes -= inputBytesOfThisMerge - tempSegment.GetRawDataLength();
                        if (totalBytes != 0)
                        {
                            progPerByte = 1.0f / (float)totalBytes;
                        }
                        passNo++;
                    }
                    //we are worried about only the first pass merge factor. So reset the
                    //factor to what it originally was
                    factor = origFactor;
                }while (true);
            }