/// <summary>Checks if any merges are now necessary and returns a /// {@link MergePolicy.MergeSpecification} if so. A merge /// is necessary when there are more than {@link /// #setMergeFactor} segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the {@link /// MergeScheduler} to use concurrency. /// </summary> public override MergeSpecification FindMerges(SegmentInfos infos) { int numSegments = infos.Count; if (Verbose()) Message("findMerges: " + numSegments + " segments"); // Compute levels, which is just log (base mergeFactor) // of the size of each segment float[] levels = new float[numSegments]; float norm = (float) System.Math.Log(mergeFactor); for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); long size = Size(info); // Floor tiny segments if (size < 1) size = 1; levels[i] = (float) System.Math.Log(size) / norm; } float levelFloor; if (minMergeSize <= 0) levelFloor = (float) 0.0; else { levelFloor = (float) (System.Math.Log(minMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int start = 0; while (start < numSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start]; for (int i = 1 + start; i < numSegments; i++) { float level = levels[i]; if (level > maxLevel) maxLevel = level; } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel < levelFloor) // All remaining segments fall into the min level levelBottom = - 1.0F; else { levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) levelBottom = levelFloor; } int upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } upto--; } if (Verbose()) Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int i = start; i < end; i++) { SegmentInfo info = infos.Info(i); anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs); } if (!anyTooLarge) { if (spec == null) spec = new MergeSpecification(); if (Verbose()) Message(" " + start + " to " + end + ": add this merge"); spec.Add(MakeOneMerge(infos, infos.Range(start, end))); } else if (Verbose()) Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); start = end; end = start + mergeFactor; } start = 1 + upto; } return spec; }
/// <summary> Finds merges necessary to expunge all deletes from the /// index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. /// </summary> public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos) { int numSegments = segmentInfos.Count; if (Verbose()) Message("findMergesToExpungeDeletes: " + numSegments + " segments"); MergeSpecification spec = new MergeSpecification(); int firstSegmentWithDeletions = - 1; for (int i = 0; i < numSegments; i++) { SegmentInfo info = segmentInfos.Info(i); int delCount = writer.NumDeletedDocs(info); if (delCount > 0) { if (Verbose()) Message(" segment " + info.name + " has deletions"); if (firstSegmentWithDeletions == - 1) firstSegmentWithDeletions = i; else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with // deletions, so force a merge now: if (Verbose()) Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != - 1) { // End of a sequence of segments with deletions, so, // merge those past segments even if it's fewer than // mergeFactor segments if (Verbose()) Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = - 1; } } if (firstSegmentWithDeletions != - 1) { if (Verbose()) Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments))); } return spec; }
/// <summary>Returns the merges necessary to optimize the index. /// This merge policy defines "optimized" to mean only one /// segment in the index, where that segment has no /// deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile /// setting is true. This method returns multiple merges /// (mergeFactor at a time) so the {@link MergeScheduler} /// in use may make use of concurrency. /// </summary> public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize) { MergeSpecification spec; System.Diagnostics.Debug.Assert(maxNumSegments > 0); if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.Count; while (last > 0) { SegmentInfo info = infos.Info(--last); if (segmentsToOptimize.Contains(info)) { last++; break; } } if (last > 0) { spec = new MergeSpecification(); // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last))); last -= mergeFactor; } // Only if there are no full merges pending do we // add a final partial (< mergeFactor segments) merge: if (0 == spec.merges.Count) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !IsOptimized(infos.Info(0))) spec.Add(MakeOneMerge(infos, infos.Range(0, last))); } else if (last > maxNumSegments) { // Take care to pick a partial merge that is // least cost, but does not make the index too // lopsided. If we always just picked the // partial tail then we could produce a highly // lopsided index over time: // We must merge this many segments to leave // maxNumSegments in the index (from when // optimize was first kicked off): int finalMergeSize = last - maxNumSegments + 1; // Consider all possible starting points: long bestSize = 0; int bestStart = 0; for (int i = 0; i < last - finalMergeSize + 1; i++) { long sumSize = 0; for (int j = 0; j < finalMergeSize; j++) sumSize += Size(infos.Info(j + i)); if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize))); } } } else spec = null; } else spec = null; return spec; }
/// <summary>Checks if any merges are now necessary and returns a /// {@link MergePolicy.MergeSpecification} if so. A merge /// is necessary when there are more than {@link /// #setMergeFactor} segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the {@link /// MergeScheduler} to use concurrency. /// </summary> public override MergeSpecification FindMerges(SegmentInfos infos) { int numSegments = infos.Count; if (Verbose()) { Message("findMerges: " + numSegments + " segments"); } // Compute levels, which is just log (base mergeFactor) // of the size of each segment float[] levels = new float[numSegments]; float norm = (float)System.Math.Log(mergeFactor); for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); long size = Size(info); // Floor tiny segments if (size < 1) { size = 1; } levels[i] = (float)System.Math.Log(size) / norm; } float levelFloor; if (minMergeSize <= 0) { levelFloor = (float)0.0; } else { levelFloor = (float)(System.Math.Log(minMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int start = 0; while (start < numSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start]; for (int i = 1 + start; i < numSegments; i++) { float level = levels[i]; if (level > maxLevel) { maxLevel = level; } } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel < levelFloor) { // All remaining segments fall into the min level levelBottom = -1.0F; } else { levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) { levelBottom = levelFloor; } } int upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } upto--; } if (Verbose()) { Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); } // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int i = start; i < end; i++) { SegmentInfo info = infos.Info(i); anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs); } if (!anyTooLarge) { if (spec == null) { spec = new MergeSpecification(); } if (Verbose()) { Message(" " + start + " to " + end + ": add this merge"); } spec.Add(MakeOneMerge(infos, infos.Range(start, end))); } else if (Verbose()) { Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); } start = end; end = start + mergeFactor; } start = 1 + upto; } return(spec); }
/// <summary> Finds merges necessary to expunge all deletes from the /// index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. /// </summary> public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos) { int numSegments = segmentInfos.Count; if (Verbose()) { Message("findMergesToExpungeDeletes: " + numSegments + " segments"); } MergeSpecification spec = new MergeSpecification(); int firstSegmentWithDeletions = -1; for (int i = 0; i < numSegments; i++) { SegmentInfo info = segmentInfos.Info(i); int delCount = writer.NumDeletedDocs(info); if (delCount > 0) { if (Verbose()) { Message(" segment " + info.name + " has deletions"); } if (firstSegmentWithDeletions == -1) { firstSegmentWithDeletions = i; } else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with // deletions, so force a merge now: if (Verbose()) { Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); } spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { // End of a sequence of segments with deletions, so, // merge those past segments even if it's fewer than // mergeFactor segments if (Verbose()) { Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); } spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } if (firstSegmentWithDeletions != -1) { if (Verbose()) { Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); } spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments))); } return(spec); }
/// <summary>Returns the merges necessary to optimize the index. /// This merge policy defines "optimized" to mean only one /// segment in the index, where that segment has no /// deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile /// setting is true. This method returns multiple merges /// (mergeFactor at a time) so the {@link MergeScheduler} /// in use may make use of concurrency. /// </summary> public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize) { MergeSpecification spec; System.Diagnostics.Debug.Assert(maxNumSegments > 0); if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.Count; while (last > 0) { SegmentInfo info = infos.Info(--last); if (segmentsToOptimize.Contains(info)) { last++; break; } } if (last > 0) { spec = new MergeSpecification(); // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last))); last -= mergeFactor; } // Only if there are no full merges pending do we // add a final partial (< mergeFactor segments) merge: if (0 == spec.merges.Count) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !IsOptimized(infos.Info(0))) { spec.Add(MakeOneMerge(infos, infos.Range(0, last))); } } else if (last > maxNumSegments) { // Take care to pick a partial merge that is // least cost, but does not make the index too // lopsided. If we always just picked the // partial tail then we could produce a highly // lopsided index over time: // We must merge this many segments to leave // maxNumSegments in the index (from when // optimize was first kicked off): int finalMergeSize = last - maxNumSegments + 1; // Consider all possible starting points: long bestSize = 0; int bestStart = 0; for (int i = 0; i < last - finalMergeSize + 1; i++) { long sumSize = 0; for (int j = 0; j < finalMergeSize; j++) { sumSize += Size(infos.Info(j + i)); } if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize))); } } } else { spec = null; } } else { spec = null; } return(spec); }