Exemplo n.º 1
0
        /// <summary>Checks if any merges are now necessary and returns a
        /// {@link MergePolicy.MergeSpecification} if so.  A merge
        /// is necessary when there are more than {@link
        /// #setMergeFactor} segments at a given level.  When
        /// multiple levels have too many segments, this method
        /// will return multiple merges, allowing the {@link
        /// MergeScheduler} to use concurrency.
        /// </summary>
        public override MergeSpecification FindMerges(SegmentInfos infos)
        {
            int numSegments = infos.Count;

            if (Verbose())
            {
                Message("findMerges: " + numSegments + " segments");
            }

            // Compute levels, which is just log (base mergeFactor)
            // of the size of each segment
            float[] levels = new float[numSegments];
            float   norm   = (float)System.Math.Log(mergeFactor);

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = infos.Info(i);
                long        size = Size(info);

                // Floor tiny segments
                if (size < 1)
                {
                    size = 1;
                }
                levels[i] = (float)System.Math.Log(size) / norm;
            }

            float levelFloor;

            if (minMergeSize <= 0)
            {
                levelFloor = (float)0.0;
            }
            else
            {
                levelFloor = (float)(System.Math.Log(minMergeSize) / norm);
            }

            // Now, we quantize the log values into levels.  The
            // first level is any segment whose log size is within
            // LEVEL_LOG_SPAN of the max size, or, who has such as
            // segment "to the right".  Then, we find the max of all
            // other segments and use that to define the next level
            // segment, etc.

            MergeSpecification spec = null;

            int start = 0;

            while (start < numSegments)
            {
                // Find max level of all segments not already
                // quantized.
                float maxLevel = levels[start];
                for (int i = 1 + start; i < numSegments; i++)
                {
                    float level = levels[i];
                    if (level > maxLevel)
                    {
                        maxLevel = level;
                    }
                }

                // Now search backwards for the rightmost segment that
                // falls into this level:
                float levelBottom;
                if (maxLevel < levelFloor)
                {
                    // All remaining segments fall into the min level
                    levelBottom = -1.0F;
                }
                else
                {
                    levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN);

                    // Force a boundary at the level floor
                    if (levelBottom < levelFloor && maxLevel >= levelFloor)
                    {
                        levelBottom = levelFloor;
                    }
                }

                int upto = numSegments - 1;
                while (upto >= start)
                {
                    if (levels[upto] >= levelBottom)
                    {
                        break;
                    }
                    upto--;
                }
                if (Verbose())
                {
                    Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
                }

                // Finally, record all merges that are viable at this level:
                int end = start + mergeFactor;
                while (end <= 1 + upto)
                {
                    bool anyTooLarge = false;
                    for (int i = start; i < end; i++)
                    {
                        SegmentInfo info = infos.Info(i);
                        anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
                    }

                    if (!anyTooLarge)
                    {
                        if (spec == null)
                        {
                            spec = new MergeSpecification();
                        }
                        if (Verbose())
                        {
                            Message("    " + start + " to " + end + ": add this merge");
                        }
                        spec.Add(new OneMerge(infos.Range(start, end), useCompoundFile));
                    }
                    else if (Verbose())
                    {
                        Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
                    }

                    start = end;
                    end   = start + mergeFactor;
                }

                start = 1 + upto;
            }

            return(spec);
        }
Exemplo n.º 2
0
        /// <summary> Finds merges necessary to expunge all deletes from the
        /// index.  We simply merge adjacent segments that have
        /// deletes, up to mergeFactor at a time.
        /// </summary>
        public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
        {
            int numSegments = segmentInfos.Count;

            if (Verbose())
            {
                Message("findMergesToExpungeDeletes: " + numSegments + " segments");
            }

            MergeSpecification spec       = new MergeSpecification();
            int firstSegmentWithDeletions = -1;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info     = segmentInfos.Info(i);
                int         delCount = writer.NumDeletedDocs(info);
                if (delCount > 0)
                {
                    if (Verbose())
                    {
                        Message("  segment " + info.name + " has deletions");
                    }
                    if (firstSegmentWithDeletions == -1)
                    {
                        firstSegmentWithDeletions = i;
                    }
                    else if (i - firstSegmentWithDeletions == mergeFactor)
                    {
                        // We've seen mergeFactor segments in a row with
                        // deletions, so force a merge now:
                        if (Verbose())
                        {
                            Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        }
                        spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, i), useCompoundFile));
                        firstSegmentWithDeletions = i;
                    }
                }
                else if (firstSegmentWithDeletions != -1)
                {
                    // End of a sequence of segments with deletions, so,
                    // merge those past segments even if it's fewer than
                    // mergeFactor segments
                    if (Verbose())
                    {
                        Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    }
                    spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, i), useCompoundFile));
                    firstSegmentWithDeletions = -1;
                }
            }

            if (firstSegmentWithDeletions != -1)
            {
                if (Verbose())
                {
                    Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                }
                spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, numSegments), useCompoundFile));
            }

            return(spec);
        }
Exemplo n.º 3
0
        /// <summary>Returns the merges necessary to optimize the index.
        /// This merge policy defines "optimized" to mean only one
        /// segment in the index, where that segment has no
        /// deletions pending nor separate norms, and it is in
        /// compound file format if the current useCompoundFile
        /// setting is true.  This method returns multiple merges
        /// (mergeFactor at a time) so the {@link MergeScheduler}
        /// in use may make use of concurrency.
        /// </summary>
        public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
        {
            MergeSpecification spec;

            System.Diagnostics.Debug.Assert(maxNumSegments > 0);

            if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
            {
                // Find the newest (rightmost) segment that needs to
                // be optimized (other segments may have been flushed
                // since optimize started):
                int last = infos.Count;
                while (last > 0)
                {
                    SegmentInfo info = infos.Info(--last);
                    if (segmentsToOptimize.Contains(info))
                    {
                        last++;
                        break;
                    }
                }

                if (last > 0)
                {
                    spec = new MergeSpecification();

                    // First, enroll all "full" merges (size
                    // mergeFactor) to potentially be run concurrently:
                    while (last - maxNumSegments + 1 >= mergeFactor)
                    {
                        spec.Add(new OneMerge(infos.Range(last - mergeFactor, last), useCompoundFile));
                        last -= mergeFactor;
                    }

                    // Only if there are no full merges pending do we
                    // add a final partial (< mergeFactor segments) merge:
                    if (0 == spec.merges.Count)
                    {
                        if (maxNumSegments == 1)
                        {
                            // Since we must optimize down to 1 segment, the
                            // choice is simple:
                            if (last > 1 || !IsOptimized(infos.Info(0)))
                            {
                                spec.Add(new OneMerge(infos.Range(0, last), useCompoundFile));
                            }
                        }
                        else if (last > maxNumSegments)
                        {
                            // Take care to pick a partial merge that is
                            // least cost, but does not make the index too
                            // lopsided.  If we always just picked the
                            // partial tail then we could produce a highly
                            // lopsided index over time:

                            // We must merge this many segments to leave
                            // maxNumSegments in the index (from when
                            // optimize was first kicked off):
                            int finalMergeSize = last - maxNumSegments + 1;

                            // Consider all possible starting points:
                            long bestSize  = 0;
                            int  bestStart = 0;

                            for (int i = 0; i < last - finalMergeSize + 1; i++)
                            {
                                long sumSize = 0;
                                for (int j = 0; j < finalMergeSize; j++)
                                {
                                    sumSize += Size(infos.Info(j + i));
                                }
                                if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
                                {
                                    bestStart = i;
                                    bestSize  = sumSize;
                                }
                            }

                            spec.Add(new OneMerge(infos.Range(bestStart, bestStart + finalMergeSize), useCompoundFile));
                        }
                    }
                }
                else
                {
                    spec = null;
                }
            }
            else
            {
                spec = null;
            }

            return(spec);
        }
Exemplo n.º 4
0
		/// <summary>Checks if any merges are now necessary and returns a
		/// <see cref="MergePolicy.MergeSpecification" /> if so.  A merge
		/// is necessary when there are more than <see cref="MergeFactor" />
		/// segments at a given level.  When
		/// multiple levels have too many segments, this method
		/// will return multiple merges, allowing the <see cref="MergeScheduler" />
		/// to use concurrency. 
		/// </summary>
		public override MergeSpecification FindMerges(SegmentInfos infos)
		{
			
			int numSegments = infos.Count;
			if (Verbose())
				Message("findMerges: " + numSegments + " segments");
			
			// Compute levels, which is just log (base mergeFactor)
			// of the size of each segment
			float[] levels = new float[numSegments];
			float norm = (float) System.Math.Log(mergeFactor);
			
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = infos.Info(i);
				long size = Size(info);
				
				// Floor tiny segments
				if (size < 1)
					size = 1;
				levels[i] = (float) System.Math.Log(size) / norm;
			}
			
			float levelFloor;
			if (minMergeSize <= 0)
				levelFloor = (float) 0.0;
			else
			{
				levelFloor = (float) (System.Math.Log(minMergeSize) / norm);
			}
			
			// Now, we quantize the log values into levels.  The
			// first level is any segment whose log size is within
			// LEVEL_LOG_SPAN of the max size, or, who has such as
			// segment "to the right".  Then, we find the max of all
			// other segments and use that to define the next level
			// segment, etc.
			
			MergeSpecification spec = null;
			
			int start = 0;
			while (start < numSegments)
			{
				
				// Find max level of all segments not already
				// quantized.
				float maxLevel = levels[start];
				for (int i = 1 + start; i < numSegments; i++)
				{
					float level = levels[i];
					if (level > maxLevel)
						maxLevel = level;
				}
				
				// Now search backwards for the rightmost segment that
				// falls into this level:
				float levelBottom;
				if (maxLevel < levelFloor)
				// All remaining segments fall into the min level
					levelBottom = - 1.0F;
				else
				{
					levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
					
					// Force a boundary at the level floor
					if (levelBottom < levelFloor && maxLevel >= levelFloor)
						levelBottom = levelFloor;
				}
				
				int upto = numSegments - 1;
				while (upto >= start)
				{
					if (levels[upto] >= levelBottom)
					{
						break;
					}
					upto--;
				}
				if (Verbose())
					Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
				
				// Finally, record all merges that are viable at this level:
				int end = start + mergeFactor;
				while (end <= 1 + upto)
				{
					bool anyTooLarge = false;
					for (int i = start; i < end; i++)
					{
						SegmentInfo info = infos.Info(i);
						anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
					}
					
					if (!anyTooLarge)
					{
						if (spec == null)
							spec = new MergeSpecification();
						if (Verbose())
							Message("    " + start + " to " + end + ": add this merge");
                        spec.Add(MakeOneMerge(infos, infos.Range(start, end)));
					}
					else if (Verbose())
						Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
					
					start = end;
					end = start + mergeFactor;
				}
				
				start = 1 + upto;
			}
			
			return spec;
		}
Exemplo n.º 5
0
		/// <summary> Finds merges necessary to expunge all deletes from the
		/// index.  We simply merge adjacent segments that have
		/// deletes, up to mergeFactor at a time.
		/// </summary>
		public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
		{
			int numSegments = segmentInfos.Count;
			
			if (Verbose())
				Message("findMergesToExpungeDeletes: " + numSegments + " segments");
			
			MergeSpecification spec = new MergeSpecification();
			int firstSegmentWithDeletions = - 1;
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = segmentInfos.Info(i);
				int delCount = writer.NumDeletedDocs(info);
				if (delCount > 0)
				{
					if (Verbose())
						Message("  segment " + info.name + " has deletions");
					if (firstSegmentWithDeletions == - 1)
						firstSegmentWithDeletions = i;
					else if (i - firstSegmentWithDeletions == mergeFactor)
					{
						// We've seen mergeFactor segments in a row with
						// deletions, so force a merge now:
						if (Verbose())
							Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
						firstSegmentWithDeletions = i;
					}
				}
				else if (firstSegmentWithDeletions != - 1)
				{
					// End of a sequence of segments with deletions, so,
					// merge those past segments even if it's fewer than
					// mergeFactor segments
					if (Verbose())
						Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
					firstSegmentWithDeletions = - 1;
				}
			}
			
			if (firstSegmentWithDeletions != - 1)
			{
				if (Verbose())
					Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments)));
			}
			
			return spec;
		}
Exemplo n.º 6
0
		/// <summary>Returns the merges necessary to optimize the index.
		/// This merge policy defines "optimized" to mean only one
		/// segment in the index, where that segment has no
		/// deletions pending nor separate norms, and it is in
		/// compound file format if the current useCompoundFile
		/// setting is true.  This method returns multiple merges
		/// (mergeFactor at a time) so the <see cref="MergeScheduler" />
		/// in use may make use of concurrency. 
		/// </summary>
		public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet<SegmentInfo> segmentsToOptimize)
		{
			MergeSpecification spec;
			
			System.Diagnostics.Debug.Assert(maxNumSegments > 0);
			
			if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
			{
				
				// Find the newest (rightmost) segment that needs to
				// be optimized (other segments may have been flushed
				// since optimize started):
				int last = infos.Count;
				while (last > 0)
				{
					SegmentInfo info = infos.Info(--last);
					if (segmentsToOptimize.Contains(info))
					{
						last++;
						break;
					}
				}
				
				if (last > 0)
				{
					
					spec = new MergeSpecification();
					
					// First, enroll all "full" merges (size
					// mergeFactor) to potentially be run concurrently:
					while (last - maxNumSegments + 1 >= mergeFactor)
					{
                        spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last)));
						last -= mergeFactor;
					}
					
					// Only if there are no full merges pending do we
					// add a final partial (< mergeFactor segments) merge:
					if (0 == spec.merges.Count)
					{
						if (maxNumSegments == 1)
						{
							
							// Since we must optimize down to 1 segment, the
							// choice is simple:
							if (last > 1 || !IsOptimized(infos.Info(0)))
                                spec.Add(MakeOneMerge(infos, infos.Range(0, last)));
						}
						else if (last > maxNumSegments)
						{
							
							// Take care to pick a partial merge that is
							// least cost, but does not make the index too
							// lopsided.  If we always just picked the
							// partial tail then we could produce a highly
							// lopsided index over time:
							
							// We must merge this many segments to leave
							// maxNumSegments in the index (from when
							// optimize was first kicked off):
							int finalMergeSize = last - maxNumSegments + 1;
							
							// Consider all possible starting points:
							long bestSize = 0;
							int bestStart = 0;
							
							for (int i = 0; i < last - finalMergeSize + 1; i++)
							{
								long sumSize = 0;
								for (int j = 0; j < finalMergeSize; j++)
									sumSize += Size(infos.Info(j + i));
								if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
								{
									bestStart = i;
									bestSize = sumSize;
								}
							}

                            spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize)));
						}
					}
				}
				else
					spec = null;
			}
			else
				spec = null;
			
			return spec;
		}