Exemplo n.º 1
0
        /// <summary>
        /// Returns the merges necessary to merge the index, taking the max merge
        /// size or max merge docs into consideration. this method attempts to respect
        /// the <paramref name="maxNumSegments"/> parameter, however it might be, due to size
        /// constraints, that more than that number of segments will remain in the
        /// index. Also, this method does not guarantee that exactly
        /// <paramref name="maxNumSegments"/> will remain, but &lt;= that number.
        /// </summary>
        private MergeSpecification FindForcedMergesSizeLimit(SegmentInfos infos, int maxNumSegments, int last)
        {
            MergeSpecification        spec     = new MergeSpecification();
            IList <SegmentCommitInfo> segments = infos.AsList();

            int start = last - 1;

            while (start >= 0)
            {
                SegmentCommitInfo info = infos.Info(start);
                if (Size(info) > m_maxMergeSizeForForcedMerge || SizeDocs(info) > m_maxMergeDocs)
                {
                    if (IsVerbose)
                    {
                        Message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + m_maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + m_maxMergeDocs + ")");
                    }
                    // need to skip that segment + add a merge for the 'right' segments,
                    // unless there is only 1 which is merged.
                    if (last - start - 1 > 1 || (start != last - 1 && !IsMerged(infos, infos.Info(start + 1))))
                    {
                        // there is more than 1 segment to the right of
                        // this one, or a mergeable single segment.
                        spec.Add(new OneMerge(segments.SubList(start + 1, last)));
                    }
                    last = start;
                }
                else if (last - start == m_mergeFactor)
                {
                    // mergeFactor eligible segments were found, add them as a merge.
                    spec.Add(new OneMerge(segments.SubList(start, last)));
                    last = start;
                }
                --start;
            }

            // Add any left-over segments, unless there is just 1
            // already fully merged
            if (last > 0 && (++start + 1 < last || !IsMerged(infos, infos.Info(start))))
            {
                spec.Add(new OneMerge(segments.SubList(start, last)));
            }

            return(spec.Merges.Count == 0 ? null : spec);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Finds merges necessary to force-merge all deletes from the
        /// index.  We simply merge adjacent segments that have
        /// deletes, up to mergeFactor at a time.
        /// </summary>
        public override MergeSpecification FindForcedDeletesMerges(SegmentInfos segmentInfos)
        {
            var segments    = segmentInfos.AsList();
            int numSegments = segments.Count;

            if (IsVerbose)
            {
                Message("findForcedDeleteMerges: " + numSegments + " segments");
            }

            var         spec = new MergeSpecification();
            int         firstSegmentWithDeletions = -1;
            IndexWriter w = m_writer.Get();

            Debug.Assert(w != null);
            for (int i = 0; i < numSegments; i++)
            {
                SegmentCommitInfo info = segmentInfos.Info(i);
                int delCount           = w.NumDeletedDocs(info);
                if (delCount > 0)
                {
                    if (IsVerbose)
                    {
                        Message("  segment " + info.Info.Name + " has deletions");
                    }
                    if (firstSegmentWithDeletions == -1)
                    {
                        firstSegmentWithDeletions = i;
                    }
                    else if (i - firstSegmentWithDeletions == m_mergeFactor)
                    {
                        // We've seen mergeFactor segments in a row with
                        // deletions, so force a merge now:
                        if (IsVerbose)
                        {
                            Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        }
                        spec.Add(new OneMerge(segments.SubList(firstSegmentWithDeletions, i)));
                        firstSegmentWithDeletions = i;
                    }
                }
                else if (firstSegmentWithDeletions != -1)
                {
                    // End of a sequence of segments with deletions, so,
                    // merge those past segments even if it's fewer than
                    // mergeFactor segments
                    if (IsVerbose)
                    {
                        Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    }
                    spec.Add(new OneMerge(segments.SubList(firstSegmentWithDeletions, i)));
                    firstSegmentWithDeletions = -1;
                }
            }

            if (firstSegmentWithDeletions != -1)
            {
                if (IsVerbose)
                {
                    Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                }
                spec.Add(new OneMerge(segments.SubList(firstSegmentWithDeletions, numSegments)));
            }

            return(spec);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Returns the merges necessary to <see cref="IndexWriter.ForceMerge(int)"/> the index. this method constraints
        /// the returned merges only by the <paramref name="maxNumSegments"/> parameter, and
        /// guaranteed that exactly that number of segments will remain in the index.
        /// </summary>
        private MergeSpecification FindForcedMergesMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last)
        {
            var spec     = new MergeSpecification();
            var segments = infos.AsList();

            // First, enroll all "full" merges (size
            // mergeFactor) to potentially be run concurrently:
            while (last - maxNumSegments + 1 >= m_mergeFactor)
            {
                spec.Add(new OneMerge(segments.SubList(last - m_mergeFactor, last)));
                last -= m_mergeFactor;
            }

            // Only if there are no full merges pending do we
            // add a final partial (< mergeFactor segments) merge:
            if (0 == spec.Merges.Count)
            {
                if (maxNumSegments == 1)
                {
                    // Since we must merge down to 1 segment, the
                    // choice is simple:
                    if (last > 1 || !IsMerged(infos, infos.Info(0)))
                    {
                        spec.Add(new OneMerge(segments.SubList(0, last)));
                    }
                }
                else if (last > maxNumSegments)
                {
                    // Take care to pick a partial merge that is
                    // least cost, but does not make the index too
                    // lopsided.  If we always just picked the
                    // partial tail then we could produce a highly
                    // lopsided index over time:

                    // We must merge this many segments to leave
                    // maxNumSegments in the index (from when
                    // forceMerge was first kicked off):
                    int finalMergeSize = last - maxNumSegments + 1;

                    // Consider all possible starting points:
                    long bestSize  = 0;
                    int  bestStart = 0;

                    for (int i = 0; i < last - finalMergeSize + 1; i++)
                    {
                        long sumSize = 0;
                        for (int j = 0; j < finalMergeSize; j++)
                        {
                            sumSize += Size(infos.Info(j + i));
                        }
                        if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
                        {
                            bestStart = i;
                            bestSize  = sumSize;
                        }
                    }

                    spec.Add(new OneMerge(segments.SubList(bestStart, bestStart + finalMergeSize)));
                }
            }
            return(spec.Merges.Count == 0 ? null : spec);
        }
Exemplo n.º 4
0
        public override MergeSpecification FindMerges(MergeTrigger mergeTrigger, SegmentInfos infos)
        {
            if (Verbose())
            {
                Message("findMerges: " + infos.Count + " segments");
            }
            if (infos.Count == 0)
            {
                return(null);
            }
            ICollection <SegmentCommitInfo> merging    = m_writer.Get().MergingSegments;
            ICollection <SegmentCommitInfo> toBeMerged = new JCG.HashSet <SegmentCommitInfo>();

            List <SegmentCommitInfo> infosSorted = new List <SegmentCommitInfo>(infos.AsList());

            infosSorted.Sort(new SegmentByteSizeDescending(this));

            // Compute total index bytes & print details about the index
            long totIndexBytes   = 0;
            long minSegmentBytes = long.MaxValue;

            foreach (SegmentCommitInfo info in infosSorted)
            {
                long segBytes = Size(info);
                if (Verbose())
                {
                    string extra = merging.Contains(info) ? " [merging]" : "";
                    if (segBytes >= maxMergedSegmentBytes / 2.0)
                    {
                        extra += " [skip: too large]";
                    }
                    else if (segBytes < floorSegmentBytes)
                    {
                        extra += " [floored]";
                    }
                    Message("  seg=" + m_writer.Get().SegString(info) + " size=" + string.Format("{0:0.000}", segBytes / 1024 / 1024.0) + " MB" + extra);
                }

                minSegmentBytes = Math.Min(segBytes, minSegmentBytes);
                // Accum total byte size
                totIndexBytes += segBytes;
            }

            // If we have too-large segments, grace them out
            // of the maxSegmentCount:
            int tooBigCount = 0;

            while (tooBigCount < infosSorted.Count && Size(infosSorted[tooBigCount]) >= maxMergedSegmentBytes / 2.0)
            {
                totIndexBytes -= Size(infosSorted[tooBigCount]);
                tooBigCount++;
            }

            minSegmentBytes = FloorSize(minSegmentBytes);

            // Compute max allowed segs in the index
            long   levelSize       = minSegmentBytes;
            long   bytesLeft       = totIndexBytes;
            double allowedSegCount = 0;

            while (true)
            {
                double segCountLevel = bytesLeft / (double)levelSize;
                if (segCountLevel < segsPerTier)
                {
                    allowedSegCount += Math.Ceiling(segCountLevel);
                    break;
                }
                allowedSegCount += segsPerTier;
                bytesLeft       -= (long)(segsPerTier * levelSize);
                levelSize       *= maxMergeAtOnce;
            }
            int allowedSegCountInt = (int)allowedSegCount;

            MergeSpecification spec = null;

            // Cycle to possibly select more than one merge:
            while (true)
            {
                long mergingBytes = 0;

                // Gather eligible segments for merging, ie segments
                // not already being merged and not already picked (by
                // prior iteration of this loop) for merging:
                IList <SegmentCommitInfo> eligible = new List <SegmentCommitInfo>();
                for (int idx = tooBigCount; idx < infosSorted.Count; idx++)
                {
                    SegmentCommitInfo info = infosSorted[idx];
                    if (merging.Contains(info))
                    {
                        mergingBytes += info.GetSizeInBytes();
                    }
                    else if (!toBeMerged.Contains(info))
                    {
                        eligible.Add(info);
                    }
                }

                bool maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes;

                if (Verbose())
                {
                    Message("  allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.Count + " (eligible count=" + eligible.Count + ") tooBigCount=" + tooBigCount);
                }

                if (eligible.Count == 0)
                {
                    return(spec);
                }

                if (eligible.Count >= allowedSegCountInt)
                {
                    // OK we are over budget -- find best merge!
                    MergeScore bestScore           = null;
                    IList <SegmentCommitInfo> best = null;
                    bool bestTooLarge   = false;
                    long bestMergeBytes = 0;

                    // Consider all merge starts:
                    for (int startIdx = 0; startIdx <= eligible.Count - maxMergeAtOnce; startIdx++)
                    {
                        long totAfterMergeBytes = 0;

                        IList <SegmentCommitInfo> candidate = new List <SegmentCommitInfo>();
                        bool hitTooLarge = false;
                        for (int idx = startIdx; idx < eligible.Count && candidate.Count < maxMergeAtOnce; idx++)
                        {
                            SegmentCommitInfo info = eligible[idx];
                            long segBytes          = Size(info);

                            if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes)
                            {
                                hitTooLarge = true;
                                // NOTE: we continue, so that we can try
                                // "packing" smaller segments into this merge
                                // to see if we can get closer to the max
                                // size; this in general is not perfect since
                                // this is really "bin packing" and we'd have
                                // to try different permutations.
                                continue;
                            }
                            candidate.Add(info);
                            totAfterMergeBytes += segBytes;
                        }

                        MergeScore score = Score(candidate, hitTooLarge, mergingBytes);
                        if (Verbose())
                        {
                            Message("  maybe=" + m_writer.Get().SegString(candidate) + " score=" + score.Score + " " + score.Explanation + " tooLarge=" + hitTooLarge + " size=" + string.Format("{0:0.000} MB", totAfterMergeBytes / 1024.0 / 1024.0));
                        }

                        // If we are already running a max sized merge
                        // (maxMergeIsRunning), don't allow another max
                        // sized merge to kick off:
                        if ((bestScore == null || score.Score < bestScore.Score) && (!hitTooLarge || !maxMergeIsRunning))
                        {
                            best           = candidate;
                            bestScore      = score;
                            bestTooLarge   = hitTooLarge;
                            bestMergeBytes = totAfterMergeBytes;
                        }
                    }

                    if (best != null)
                    {
                        if (spec == null)
                        {
                            spec = new MergeSpecification();
                        }
                        OneMerge merge = new OneMerge(best);
                        spec.Add(merge);
                        foreach (SegmentCommitInfo info in merge.Segments)
                        {
                            toBeMerged.Add(info);
                        }

                        if (Verbose())
                        {
                            Message("  add merge=" + m_writer.Get().SegString(merge.Segments) + " size=" + string.Format("{0:0.000} MB", bestMergeBytes / 1024.0 / 1024.0) + " score=" + string.Format("{0:0.000}", bestScore.Score) + " " + bestScore.Explanation + (bestTooLarge ? " [max merge]" : ""));
                        }
                    }
                    else
                    {
                        return(spec);
                    }
                }
                else
                {
                    return(spec);
                }
            }
        }