public override MergeSpecification FindMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos) { MergeSpecification ms = new MergeSpecification(); if (doMerge) { OneMerge om = new OneMerge(segmentInfos.AsList().GetView(start, length)); // LUCENENET: Converted end index to length ms.Add(om); doMerge = false; return(ms); } return(null); }
public override MergeSpecification FindMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos) { MergeSpecification ms = new MergeSpecification(); if (DoMerge) { OneMerge om = new OneMerge(segmentInfos.AsList().SubList(Start, Start + Length)); ms.Add(om); DoMerge = false; return(ms); } return(null); }
/// <summary> /// Returns the merges necessary to merge the index, taking the max merge /// size or max merge docs into consideration. this method attempts to respect /// the <paramref name="maxNumSegments"/> parameter, however it might be, due to size /// constraints, that more than that number of segments will remain in the /// index. Also, this method does not guarantee that exactly /// <paramref name="maxNumSegments"/> will remain, but <= that number. /// </summary> private MergeSpecification FindForcedMergesSizeLimit(SegmentInfos infos, int maxNumSegments, int last) { MergeSpecification spec = new MergeSpecification(); IList <SegmentCommitInfo> segments = infos.AsList(); int start = last - 1; while (start >= 0) { SegmentCommitInfo info = infos.Info(start); if (Size(info) > m_maxMergeSizeForForcedMerge || SizeDocs(info) > m_maxMergeDocs) { if (IsVerbose) { Message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + m_maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + m_maxMergeDocs + ")"); } // need to skip that segment + add a merge for the 'right' segments, // unless there is only 1 which is merged. if (last - start - 1 > 1 || (start != last - 1 && !IsMerged(infos, infos.Info(start + 1)))) { // there is more than 1 segment to the right of // this one, or a mergeable single segment. spec.Add(new OneMerge(segments.SubList(start + 1, last))); } last = start; } else if (last - start == m_mergeFactor) { // mergeFactor eligible segments were found, add them as a merge. spec.Add(new OneMerge(segments.SubList(start, last))); last = start; } --start; } // Add any left-over segments, unless there is just 1 // already fully merged if (last > 0 && (++start + 1 < last || !IsMerged(infos, infos.Info(start)))) { spec.Add(new OneMerge(segments.SubList(start, last))); } return(spec.Merges.Count == 0 ? null : spec); }
/// <summary> /// Finds merges necessary to force-merge all deletes from the /// index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. /// </summary> public override MergeSpecification FindForcedDeletesMerges(SegmentInfos segmentInfos) { var segments = segmentInfos.AsList(); int numSegments = segments.Count; if (IsVerbose) { Message("findForcedDeleteMerges: " + numSegments + " segments"); } var spec = new MergeSpecification(); int firstSegmentWithDeletions = -1; IndexWriter w = m_writer.Get(); Debug.Assert(w != null); for (int i = 0; i < numSegments; i++) { SegmentCommitInfo info = segmentInfos.Info(i); int delCount = w.NumDeletedDocs(info); if (delCount > 0) { if (IsVerbose) { Message(" segment " + info.Info.Name + " has deletions"); } if (firstSegmentWithDeletions == -1) { firstSegmentWithDeletions = i; } else if (i - firstSegmentWithDeletions == m_mergeFactor) { // We've seen mergeFactor segments in a row with // deletions, so force a merge now: if (IsVerbose) { Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); } spec.Add(new OneMerge(segments.SubList(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { // End of a sequence of segments with deletions, so, // merge those past segments even if it's fewer than // mergeFactor segments if (IsVerbose) { Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); } spec.Add(new OneMerge(segments.SubList(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } if (firstSegmentWithDeletions != -1) { if (IsVerbose) { Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); } spec.Add(new OneMerge(segments.SubList(firstSegmentWithDeletions, numSegments))); } return(spec); }
/// <summary> /// Returns the merges necessary to <see cref="IndexWriter.ForceMerge(int)"/> the index. this method constraints /// the returned merges only by the <paramref name="maxNumSegments"/> parameter, and /// guaranteed that exactly that number of segments will remain in the index. /// </summary> private MergeSpecification FindForcedMergesMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) { var spec = new MergeSpecification(); var segments = infos.AsList(); // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= m_mergeFactor) { spec.Add(new OneMerge(segments.SubList(last - m_mergeFactor, last))); last -= m_mergeFactor; } // Only if there are no full merges pending do we // add a final partial (< mergeFactor segments) merge: if (0 == spec.Merges.Count) { if (maxNumSegments == 1) { // Since we must merge down to 1 segment, the // choice is simple: if (last > 1 || !IsMerged(infos, infos.Info(0))) { spec.Add(new OneMerge(segments.SubList(0, last))); } } else if (last > maxNumSegments) { // Take care to pick a partial merge that is // least cost, but does not make the index too // lopsided. If we always just picked the // partial tail then we could produce a highly // lopsided index over time: // We must merge this many segments to leave // maxNumSegments in the index (from when // forceMerge was first kicked off): int finalMergeSize = last - maxNumSegments + 1; // Consider all possible starting points: long bestSize = 0; int bestStart = 0; for (int i = 0; i < last - finalMergeSize + 1; i++) { long sumSize = 0; for (int j = 0; j < finalMergeSize; j++) { sumSize += Size(infos.Info(j + i)); } if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec.Add(new OneMerge(segments.SubList(bestStart, bestStart + finalMergeSize))); } } return(spec.Merges.Count == 0 ? null : spec); }
public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos infos) { if (Verbose()) { Message("findMerges: " + infos.Size() + " segments"); } if (infos.Size() == 0) { return(null); } ICollection <SegmentCommitInfo> merging = Writer.Get().MergingSegments; ICollection <SegmentCommitInfo> toBeMerged = new HashSet <SegmentCommitInfo>(); List <SegmentCommitInfo> infosSorted = new List <SegmentCommitInfo>(infos.AsList()); infosSorted.Sort(new SegmentByteSizeDescending(this)); // Compute total index bytes & print details about the index long totIndexBytes = 0; long minSegmentBytes = long.MaxValue; foreach (SegmentCommitInfo info in infosSorted) { long segBytes = Size(info); if (Verbose()) { string extra = merging.Contains(info) ? " [merging]" : ""; if (segBytes >= MaxMergedSegmentBytes / 2.0) { extra += " [skip: too large]"; } else if (segBytes < FloorSegmentBytes) { extra += " [floored]"; } Message(" seg=" + Writer.Get().SegString(info) + " size=" + String.Format(CultureInfo.InvariantCulture, "{0:0.00}", segBytes / 1024 / 1024.0) + " MB" + extra); } minSegmentBytes = Math.Min(segBytes, minSegmentBytes); // Accum total byte size totIndexBytes += segBytes; } // If we have too-large segments, grace them out // of the maxSegmentCount: int tooBigCount = 0; while (tooBigCount < infosSorted.Count && Size(infosSorted[tooBigCount]) >= MaxMergedSegmentBytes / 2.0) { totIndexBytes -= Size(infosSorted[tooBigCount]); tooBigCount++; } minSegmentBytes = FloorSize(minSegmentBytes); // Compute max allowed segs in the index long levelSize = minSegmentBytes; long bytesLeft = totIndexBytes; double allowedSegCount = 0; while (true) { double segCountLevel = bytesLeft / (double)levelSize; if (segCountLevel < SegsPerTier) { allowedSegCount += Math.Ceiling(segCountLevel); break; } allowedSegCount += SegsPerTier; bytesLeft -= (long)(SegsPerTier * levelSize); levelSize *= MaxMergeAtOnce_Renamed; } int allowedSegCountInt = (int)allowedSegCount; MergeSpecification spec = null; // Cycle to possibly select more than one merge: while (true) { long mergingBytes = 0; // Gather eligible segments for merging, ie segments // not already being merged and not already picked (by // prior iteration of this loop) for merging: IList <SegmentCommitInfo> eligible = new List <SegmentCommitInfo>(); for (int idx = tooBigCount; idx < infosSorted.Count; idx++) { SegmentCommitInfo info = infosSorted[idx]; if (merging.Contains(info)) { mergingBytes += info.SizeInBytes(); } else if (!toBeMerged.Contains(info)) { eligible.Add(info); } } bool maxMergeIsRunning = mergingBytes >= MaxMergedSegmentBytes; if (Verbose()) { Message(" allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.Count + " (eligible count=" + eligible.Count + ") tooBigCount=" + tooBigCount); } if (eligible.Count == 0) { return(spec); } if (eligible.Count >= allowedSegCountInt) { // OK we are over budget -- find best merge! MergeScore bestScore = null; IList <SegmentCommitInfo> best = null; bool bestTooLarge = false; long bestMergeBytes = 0; // Consider all merge starts: for (int startIdx = 0; startIdx <= eligible.Count - MaxMergeAtOnce_Renamed; startIdx++) { long totAfterMergeBytes = 0; IList <SegmentCommitInfo> candidate = new List <SegmentCommitInfo>(); bool hitTooLarge = false; for (int idx = startIdx; idx < eligible.Count && candidate.Count < MaxMergeAtOnce_Renamed; idx++) { SegmentCommitInfo info = eligible[idx]; long segBytes = Size(info); if (totAfterMergeBytes + segBytes > MaxMergedSegmentBytes) { hitTooLarge = true; // NOTE: we continue, so that we can try // "packing" smaller segments into this merge // to see if we can get closer to the max // size; this in general is not perfect since // this is really "bin packing" and we'd have // to try different permutations. continue; } candidate.Add(info); totAfterMergeBytes += segBytes; } MergeScore score = Score(candidate, hitTooLarge, mergingBytes); if (Verbose()) { Message(" maybe=" + Writer.Get().SegString(candidate) + " score=" + score.Score + " " + score.Explanation + " tooLarge=" + hitTooLarge + " size=" + string.Format(CultureInfo.InvariantCulture, "%.3f MB", totAfterMergeBytes / 1024.0 / 1024.0)); } // If we are already running a max sized merge // (maxMergeIsRunning), don't allow another max // sized merge to kick off: if ((bestScore == null || score.Score < bestScore.Score) && (!hitTooLarge || !maxMergeIsRunning)) { best = candidate; bestScore = score; bestTooLarge = hitTooLarge; bestMergeBytes = totAfterMergeBytes; } } if (best != null) { if (spec == null) { spec = new MergeSpecification(); } OneMerge merge = new OneMerge(best); spec.Add(merge); foreach (SegmentCommitInfo info in merge.Segments) { toBeMerged.Add(info); } if (Verbose()) { Message(" add merge=" + Writer.Get().SegString(merge.Segments) + " size=" + string.Format(CultureInfo.InvariantCulture, "%.3f MB", bestMergeBytes / 1024.0 / 1024.0) + " score=" + string.Format(CultureInfo.InvariantCulture, "%.3f", bestScore.Score) + " " + bestScore.Explanation + (bestTooLarge ? " [max merge]" : "")); } } else { return(spec); } } else { return(spec); } } }
/// <summary> /// Replaces all segments in this instance, but keeps /// generation, version, counter so that future commits /// remain write once. /// </summary> internal void Replace(SegmentInfos other) { RollbackSegmentInfos(other.AsList()); _lastGeneration = other._lastGeneration; }
public override MergeSpecification FindMerges(MergeTrigger? mergeTrigger, SegmentInfos segmentInfos) { MergeSpecification ms = new MergeSpecification(); if (DoMerge) { OneMerge om = new OneMerge(segmentInfos.AsList().SubList(Start, Start + Length)); ms.Add(om); DoMerge = false; return ms; } return null; }