Exemplo n.º 1
0
        internal int docShift;                                 // total # deleted docs that were compacted by this merge

        public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
        {
            this.docMaps = docMaps;
            SegmentInfo firstSegment = merge.segments.Info(0);
            int         i            = 0;

            while (true)
            {
                SegmentInfo info = infos.Info(i);
                if (info.Equals(firstSegment))
                {
                    break;
                }
                minDocID += info.docCount;
                i++;
            }

            int numDocs = 0;

            for (int j = 0; j < docMaps.Length; i++, j++)
            {
                numDocs += infos.Info(i).docCount;
                System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
            }
            maxDocID = minDocID + numDocs;

            starts    = new int[docMaps.Length];
            newStarts = new int[docMaps.Length];

            starts[0]    = minDocID;
            newStarts[0] = minDocID;
            for (i = 1; i < docMaps.Length; i++)
            {
                int lastDocCount = merge.segments.Info(i - 1).docCount;
                starts[i]    = starts[i - 1] + lastDocCount;
                newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
            }
            docShift = numDocs - mergedDocCount;

            // There are rare cases when docShift is 0.  It happens
            // if you try to delete a docID that's out of bounds,
            // because the SegmentReader still allocates deletedDocs
            // and pretends it has deletions ... so we can't make
            // this assert here
            // assert docShift > 0;

            // Make sure it all adds up:
            System.Diagnostics.Debug.Assert(docShift == maxDocID - (newStarts[docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts[docMaps.Length - 1]));
        }
Exemplo n.º 2
0
            protected internal override object DoBody(string segmentFileName)
            {
                var sis = new SegmentInfos();

                sis.Read(directory, segmentFileName);
                var readers = new SegmentReader[sis.Count];

                for (int i = sis.Count - 1; i >= 0; i--)
                {
                    System.IO.IOException prior = null;
                    bool success = false;
                    try
                    {
                        readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ);
                        success    = true;
                    }
                    catch (System.IO.IOException ex)
                    {
                        prior = ex;
                    }
                    finally
                    {
                        if (!success)
                        {
                            IOUtils.DisposeWhileHandlingException(prior, readers);
                        }
                    }
                }
                return(new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false));
            }
        internal int[] starts; // used for binary search of mapped docID

        #endregion Fields

        #region Constructors

        public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
        {
            this.docMaps = docMaps;
            SegmentInfo firstSegment = merge.segments.Info(0);
            int i = 0;
            while (true)
            {
                SegmentInfo info = infos.Info(i);
                if (info.Equals(firstSegment))
                    break;
                minDocID += info.docCount;
                i++;
            }

            int numDocs = 0;
            for (int j = 0; j < docMaps.Length; i++, j++)
            {
                numDocs += infos.Info(i).docCount;
                System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
            }
            maxDocID = minDocID + numDocs;

            starts = new int[docMaps.Length];
            newStarts = new int[docMaps.Length];

            starts[0] = minDocID;
            newStarts[0] = minDocID;
            for (i = 1; i < docMaps.Length; i++)
            {
                int lastDocCount = merge.segments.Info(i - 1).docCount;
                starts[i] = starts[i - 1] + lastDocCount;
                newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
            }
            docShift = numDocs - mergedDocCount;

            // There are rare cases when docShift is 0.  It happens
            // if you try to delete a docID that's out of bounds,
            // because the SegmentReader still allocates deletedDocs
            // and pretends it has deletions ... so we can't make
            // this assert here
            // assert docShift > 0;

            // Make sure it all adds up:
            System.Diagnostics.Debug.Assert(docShift == maxDocID - (newStarts[docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts[docMaps.Length - 1]));
        }
Exemplo n.º 4
0
        /// <summary>
        /// Returns the merges necessary to merge the index, taking the max merge
        /// size or max merge docs into consideration. this method attempts to respect
        /// the <paramref name="maxNumSegments"/> parameter, however it might be, due to size
        /// constraints, that more than that number of segments will remain in the
        /// index. Also, this method does not guarantee that exactly
        /// <paramref name="maxNumSegments"/> will remain, but &lt;= that number.
        /// </summary>
        private MergeSpecification FindForcedMergesSizeLimit(SegmentInfos infos, int maxNumSegments, int last)
        {
            MergeSpecification        spec     = new MergeSpecification();
            IList <SegmentCommitInfo> segments = infos.AsList();

            int start = last - 1;

            while (start >= 0)
            {
                SegmentCommitInfo info = infos.Info(start);
                if (Size(info) > m_maxMergeSizeForForcedMerge || SizeDocs(info) > m_maxMergeDocs)
                {
                    if (IsVerbose)
                    {
                        Message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + m_maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + m_maxMergeDocs + ")");
                    }
                    // need to skip that segment + add a merge for the 'right' segments,
                    // unless there is only 1 which is merged.
                    if (last - start - 1 > 1 || (start != last - 1 && !IsMerged(infos, infos.Info(start + 1))))
                    {
                        // there is more than 1 segment to the right of
                        // this one, or a mergeable single segment.
                        spec.Add(new OneMerge(segments.SubList(start + 1, last)));
                    }
                    last = start;
                }
                else if (last - start == m_mergeFactor)
                {
                    // mergeFactor eligible segments were found, add them as a merge.
                    spec.Add(new OneMerge(segments.SubList(start, last)));
                    last = start;
                }
                --start;
            }

            // Add any left-over segments, unless there is just 1
            // already fully merged
            if (last > 0 && (++start + 1 < last || !IsMerged(infos, infos.Info(start))))
            {
                spec.Add(new OneMerge(segments.SubList(start, last)));
            }

            return(spec.Merges.Count == 0 ? null : spec);
        }
Exemplo n.º 5
0
        private bool IsOptimized(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
        {
            int         numSegments   = infos.Count;
            int         numToOptimize = 0;
            SegmentInfo optimizeInfo  = null;

            for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
            {
                SegmentInfo info = infos.Info(i);
                if (segmentsToOptimize.Contains(info))
                {
                    numToOptimize++;
                    optimizeInfo = info;
                }
            }

            return(numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo)));
        }
Exemplo n.º 6
0
        /// <summary>
        /// Returns <c>true</c> if the number of segments eligible for
        /// merging is less than or equal to the specified
        /// <paramref name="maxNumSegments"/>.
        /// </summary>
        protected virtual bool IsMerged(SegmentInfos infos, int maxNumSegments, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge)
        {
            int numSegments             = infos.Count;
            int numToMerge              = 0;
            SegmentCommitInfo mergeInfo = null;
            bool segmentIsOriginal      = false;

            for (int i = 0; i < numSegments && numToMerge <= maxNumSegments; i++)
            {
                SegmentCommitInfo info = infos.Info(i);
                bool?isOriginal;
                segmentsToMerge.TryGetValue(info, out isOriginal);
                if (isOriginal != null)
                {
                    segmentIsOriginal = isOriginal.Value;
                    numToMerge++;
                    mergeInfo = info;
                }
            }

            return(numToMerge <= maxNumSegments && (numToMerge != 1 || !segmentIsOriginal || IsMerged(infos, mergeInfo)));
        }
Exemplo n.º 7
0
        /// <summary>
        /// Used by near real-time search </summary>
        internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes)
        {
            // IndexWriter synchronizes externally before calling
            // us, which ensures infos will not change; so there's
            // no need to process segments in reverse order
            int numSegments = infos.Count;

            IList <SegmentReader> readers = new List <SegmentReader>();
            Directory             dir     = writer.Directory;

            SegmentInfos segmentInfos = (SegmentInfos)infos.Clone();
            int          infosUpto    = 0;
            bool         success      = false;

            try
            {
                for (int i = 0; i < numSegments; i++)
                {
                    // NOTE: important that we use infos not
                    // segmentInfos here, so that we are passing the
                    // actual instance of SegmentInfoPerCommit in
                    // IndexWriter's segmentInfos:
                    SegmentCommitInfo info = infos.Info(i);
                    Debug.Assert(info.Info.Dir == dir);
                    ReadersAndUpdates rld = writer.readerPool.Get(info, true);
                    try
                    {
                        SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ);
                        if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments)
                        {
                            // Steal the ref:
                            readers.Add(reader);
                            infosUpto++;
                        }
                        else
                        {
                            reader.DecRef();
                            segmentInfos.Remove(infosUpto);
                        }
                    }
                    finally
                    {
                        writer.readerPool.Release(rld);
                    }
                }

                writer.IncRefDeleter(segmentInfos);

                StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes);
                success = true;
                return(result);
            }
            finally
            {
                if (!success)
                {
                    foreach (SegmentReader r in readers)
                    {
                        try
                        {
                            r.DecRef();
                        }
#pragma warning disable 168
                        catch (Exception th)
#pragma warning restore 168
                        {
                            // ignore any exception that is thrown here to not mask any original
                            // exception.
                        }
                    }
                }
            }
        }
Exemplo n.º 8
0
        /// <summary>
        /// This constructor is only used for <see cref="DoOpenIfChanged(SegmentInfos)"/> </summary>
        private static DirectoryReader Open(Directory directory, SegmentInfos infos, IList <IndexReader> oldReaders, int termInfosIndexDivisor) // LUCENENET: Changed from AtomicReader to IndexReader to eliminate casting from the 1 place this is called from
        {
            // we put the old SegmentReaders in a map, that allows us
            // to lookup a reader using its segment name
            IDictionary <string, int?> segmentReaders = new Dictionary <string, int?>();

            if (oldReaders != null)
            {
                // create a Map SegmentName->SegmentReader
                for (int i = 0, c = oldReaders.Count; i < c; i++)
                {
                    SegmentReader sr = (SegmentReader)oldReaders[i];
                    segmentReaders[sr.SegmentName] = i;
                }
            }

            SegmentReader[] newReaders = new SegmentReader[infos.Count];

            // remember which readers are shared between the old and the re-opened
            // DirectoryReader - we have to incRef those readers
            bool[] readerShared = new bool[infos.Count];

            for (int i = infos.Count - 1; i >= 0; i--)
            {
                // find SegmentReader for this segment
                int?oldReaderIndex;
                segmentReaders.TryGetValue(infos.Info(i).Info.Name, out oldReaderIndex);
                if (oldReaderIndex == null)
                {
                    // this is a new segment, no old SegmentReader can be reused
                    newReaders[i] = null;
                }
                else
                {
                    // there is an old reader for this segment - we'll try to reopen it
                    newReaders[i] = (SegmentReader)oldReaders[(int)oldReaderIndex];
                }

                bool      success = false;
                Exception prior   = null;
                try
                {
                    SegmentReader newReader;
                    if (newReaders[i] == null || infos.Info(i).Info.UseCompoundFile != newReaders[i].SegmentInfo.Info.UseCompoundFile)
                    {
                        // this is a new reader; in case we hit an exception we can close it safely
                        newReader       = new SegmentReader(infos.Info(i), termInfosIndexDivisor, IOContext.READ);
                        readerShared[i] = false;
                        newReaders[i]   = newReader;
                    }
                    else
                    {
                        if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen&& newReaders[i].SegmentInfo.FieldInfosGen == infos.Info(i).FieldInfosGen)
                        {
                            // No change; this reader will be shared between
                            // the old and the new one, so we must incRef
                            // it:
                            readerShared[i] = true;
                            newReaders[i].IncRef();
                        }
                        else
                        {
                            // there are changes to the reader, either liveDocs or DV updates
                            readerShared[i] = false;
                            // Steal the ref returned by SegmentReader ctor:
                            Debug.Assert(infos.Info(i).Info.Dir == newReaders[i].SegmentInfo.Info.Dir);
                            Debug.Assert(infos.Info(i).HasDeletions || infos.Info(i).HasFieldUpdates);
                            if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen)
                            {
                                // only DV updates
                                newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i], newReaders[i].LiveDocs, newReaders[i].NumDocs);
                            }
                            else
                            {
                                // both DV and liveDocs have changed
                                newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i]);
                            }
                        }
                    }
                    success = true;
                }
                catch (Exception ex)
                {
                    prior = ex;
                }
                finally
                {
                    if (!success)
                    {
                        for (i++; i < infos.Count; i++)
                        {
                            if (newReaders[i] != null)
                            {
                                try
                                {
                                    if (!readerShared[i])
                                    {
                                        // this is a new subReader that is not used by the old one,
                                        // we can close it
                                        newReaders[i].Dispose();
                                    }
                                    else
                                    {
                                        // this subReader is also used by the old reader, so instead
                                        // closing we must decRef it
                                        newReaders[i].DecRef();
                                    }
                                }
                                catch (Exception t)
                                {
                                    if (prior == null)
                                    {
                                        prior = t;
                                    }
                                }
                            }
                        }
                    }
                    // throw the first exception
                    IOUtils.ReThrow(prior);
                }
            }
            return(new StandardDirectoryReader(directory, newReaders, null, infos, termInfosIndexDivisor, false));
        }
Exemplo n.º 9
0
 /// <summary>This constructor is only used for <see cref="Reopen()" /> </summary>
 internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
                          IEnumerable<KeyValuePair<string, byte[]>> oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor)
 {
     this.internalDirectory = directory;
     this.readOnly = readOnly;
     this.segmentInfos = infos;
     this.termInfosIndexDivisor = termInfosIndexDivisor;
     if (!readOnly)
     {
         // We assume that this segments_N was previously
         // properly sync'd:
         synced.UnionWith(infos.Files(directory, true));
     }
     
     // we put the old SegmentReaders in a map, that allows us
     // to lookup a reader using its segment name
     IDictionary<string, int> segmentReaders = new HashMap<string, int>();
     
     if (oldReaders != null)
     {
         // create a Map SegmentName->SegmentReader
         for (int i = 0; i < oldReaders.Length; i++)
         {
             segmentReaders[oldReaders[i].SegmentName] = i;
         }
     }
     
     var newReaders = new SegmentReader[infos.Count];
     
     // remember which readers are shared between the old and the re-opened
     // DirectoryReader - we have to incRef those readers
     var readerShared = new bool[infos.Count];
     
     for (int i = infos.Count - 1; i >= 0; i--)
     {
         // find SegmentReader for this segment
         if (!segmentReaders.ContainsKey(infos.Info(i).name))
         {
             // this is a new segment, no old SegmentReader can be reused
             newReaders[i] = null;
         }
         else
         {
             // there is an old reader for this segment - we'll try to reopen it
             newReaders[i] = oldReaders[segmentReaders[infos.Info(i).name]];
         }
         
         bool success = false;
         try
         {
             SegmentReader newReader;
             if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].SegmentInfo.GetUseCompoundFile())
             {
                 
                 // We should never see a totally new segment during cloning
                 System.Diagnostics.Debug.Assert(!doClone);
                 
                 // this is a new reader; in case we hit an exception we can close it safely
                 newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor);
             }
             else
             {
                 newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly);
             }
             if (newReader == newReaders[i])
             {
                 // this reader will be shared between the old and the new one,
                 // so we must incRef it
                 readerShared[i] = true;
                 newReader.IncRef();
             }
             else
             {
                 readerShared[i] = false;
                 newReaders[i] = newReader;
             }
             success = true;
         }
         finally
         {
             if (!success)
             {
                 for (i++; i < infos.Count; i++)
                 {
                     if (newReaders[i] != null)
                     {
                         try
                         {
                             if (!readerShared[i])
                             {
                                 // this is a new subReader that is not used by the old one,
                                 // we can close it
                                 newReaders[i].Close();
                             }
                             else
                             {
                                 // this subReader is also used by the old reader, so instead
                                 // closing we must decRef it
                                 newReaders[i].DecRef();
                             }
                         }
                         catch (System.IO.IOException)
                         {
                             // keep going - we want to clean up as much as possible
                         }
                     }
                 }
             }
         }
     }
     
     // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
     Initialize(newReaders);
     
     // try to copy unchanged norms from the old normsCache to the new one
     if (oldNormsCache != null)
     {
         foreach(var entry in oldNormsCache)
         {
             String field = entry.Key;
             if (!HasNorms(field))
             {
                 continue;
             }
             
             byte[] oldBytes = entry.Value;
             
             var bytes = new byte[MaxDoc];
             
             for (int i = 0; i < subReaders.Length; i++)
             {
                 int oldReaderIndex = segmentReaders[subReaders[i].SegmentName];
                 
                 // this SegmentReader was not re-opened, we can copy all of its norms 
                 if (segmentReaders.ContainsKey(subReaders[i].SegmentName) &&
                      (oldReaders[oldReaderIndex] == subReaders[i]
                        || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field]))
                 {
                     // we don't have to synchronize here: either this constructor is called from a SegmentReader,
                     // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
                     // which is synchronized
                     Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]);
                 }
                 else
                 {
                     subReaders[i].Norms(field, bytes, starts[i]);
                 }
             }
             
             normsCache[field] = bytes; // update cache
         }
     }
 }
Exemplo n.º 10
0
        /// <summary>
        /// Finds merges necessary to force-merge all deletes from the
        /// index.  We simply merge adjacent segments that have
        /// deletes, up to mergeFactor at a time.
        /// </summary>
        public override MergeSpecification FindForcedDeletesMerges(SegmentInfos segmentInfos)
        {
            var segments    = segmentInfos.AsList();
            int numSegments = segments.Count;

            if (IsVerbose)
            {
                Message("findForcedDeleteMerges: " + numSegments + " segments");
            }

            var         spec = new MergeSpecification();
            int         firstSegmentWithDeletions = -1;
            IndexWriter w = m_writer.Get();

            Debug.Assert(w != null);
            for (int i = 0; i < numSegments; i++)
            {
                SegmentCommitInfo info = segmentInfos.Info(i);
                int delCount           = w.NumDeletedDocs(info);
                if (delCount > 0)
                {
                    if (IsVerbose)
                    {
                        Message("  segment " + info.Info.Name + " has deletions");
                    }
                    if (firstSegmentWithDeletions == -1)
                    {
                        firstSegmentWithDeletions = i;
                    }
                    else if (i - firstSegmentWithDeletions == m_mergeFactor)
                    {
                        // We've seen mergeFactor segments in a row with
                        // deletions, so force a merge now:
                        if (IsVerbose)
                        {
                            Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        }
                        spec.Add(new OneMerge(segments.SubList(firstSegmentWithDeletions, i)));
                        firstSegmentWithDeletions = i;
                    }
                }
                else if (firstSegmentWithDeletions != -1)
                {
                    // End of a sequence of segments with deletions, so,
                    // merge those past segments even if it's fewer than
                    // mergeFactor segments
                    if (IsVerbose)
                    {
                        Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    }
                    spec.Add(new OneMerge(segments.SubList(firstSegmentWithDeletions, i)));
                    firstSegmentWithDeletions = -1;
                }
            }

            if (firstSegmentWithDeletions != -1)
            {
                if (IsVerbose)
                {
                    Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                }
                spec.Add(new OneMerge(segments.SubList(firstSegmentWithDeletions, numSegments)));
            }

            return(spec);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Checks if any merges are now necessary and returns a
        /// <see cref="MergePolicy.MergeSpecification"/> if so.  A merge
        /// is necessary when there are more than
        /// <see cref="MergeFactor"/> segments at a given level.  When
        /// multiple levels have too many segments, this method
        /// will return multiple merges, allowing the
        /// <see cref="MergeScheduler"/> to use concurrency.
        /// </summary>
        public override MergeSpecification FindMerges(MergeTrigger mergeTrigger, SegmentInfos infos)
        {
            int numSegments = infos.Count;

            if (IsVerbose)
            {
                Message("findMerges: " + numSegments + " segments");
            }

            // Compute levels, which is just log (base mergeFactor)
            // of the size of each segment
            IList <SegmentInfoAndLevel> levels = new List <SegmentInfoAndLevel>();
            var norm = (float)Math.Log(m_mergeFactor);

            ICollection <SegmentCommitInfo> mergingSegments = m_writer.Get().MergingSegments;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentCommitInfo info = infos.Info(i);
                long size = Size(info);

                // Floor tiny segments
                if (size < 1)
                {
                    size = 1;
                }

                SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float)Math.Log(size) / norm, i);
                levels.Add(infoLevel);

                if (IsVerbose)
                {
                    long   segBytes = SizeBytes(info);
                    string extra    = mergingSegments.Contains(info) ? " [merging]" : "";
                    if (size >= m_maxMergeSize)
                    {
                        extra += " [skip: too large]";
                    }
                    Message("seg=" + m_writer.Get().SegString(info) + " level=" + infoLevel.level + " size=" + String.Format(CultureInfo.InvariantCulture, "{0:0.00} MB", segBytes / 1024 / 1024.0) + extra);
                }
            }

            float levelFloor;

            if (m_minMergeSize <= 0)
            {
                levelFloor = (float)0.0;
            }
            else
            {
                levelFloor = (float)(Math.Log(m_minMergeSize) / norm);
            }

            // Now, we quantize the log values into levels.  The
            // first level is any segment whose log size is within
            // LEVEL_LOG_SPAN of the max size, or, who has such as
            // segment "to the right".  Then, we find the max of all
            // other segments and use that to define the next level
            // segment, etc.

            MergeSpecification spec = null;

            int numMergeableSegments = levels.Count;

            int start = 0;

            while (start < numMergeableSegments)
            {
                // Find max level of all segments not already
                // quantized.
                float maxLevel = levels[start].level;
                for (int i = 1 + start; i < numMergeableSegments; i++)
                {
                    float level = levels[i].level;
                    if (level > maxLevel)
                    {
                        maxLevel = level;
                    }
                }

                // Now search backwards for the rightmost segment that
                // falls into this level:
                float levelBottom;
                if (maxLevel <= levelFloor)
                {
                    // All remaining segments fall into the min level
                    levelBottom = -1.0F;
                }
                else
                {
                    levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN);

                    // Force a boundary at the level floor
                    if (levelBottom < levelFloor && maxLevel >= levelFloor)
                    {
                        levelBottom = levelFloor;
                    }
                }

                int upto = numMergeableSegments - 1;
                while (upto >= start)
                {
                    if (levels[upto].level >= levelBottom)
                    {
                        break;
                    }
                    upto--;
                }
                if (IsVerbose)
                {
                    Message("  level " + levelBottom.ToString("0.0") + " to " + maxLevel.ToString("0.0") + ": " + (1 + upto - start) + " segments");
                }

                // Finally, record all merges that are viable at this level:
                int end = start + m_mergeFactor;
                while (end <= 1 + upto)
                {
                    bool anyTooLarge = false;
                    bool anyMerging  = false;
                    for (int i = start; i < end; i++)
                    {
                        SegmentCommitInfo info = levels[i].info;
                        anyTooLarge |= (Size(info) >= m_maxMergeSize || SizeDocs(info) >= m_maxMergeDocs);
                        if (mergingSegments.Contains(info))
                        {
                            anyMerging = true;
                            break;
                        }
                    }

                    if (anyMerging)
                    {
                        // skip
                    }
                    else if (!anyTooLarge)
                    {
                        if (spec == null)
                        {
                            spec = new MergeSpecification();
                        }
                        IList <SegmentCommitInfo> mergeInfos = new List <SegmentCommitInfo>();
                        for (int i = start; i < end; i++)
                        {
                            mergeInfos.Add(levels[i].info);
                            Debug.Assert(infos.Contains(levels[i].info));
                        }
                        if (IsVerbose)
                        {
                            Message("  add merge=" + m_writer.Get().SegString(mergeInfos) + " start=" + start + " end=" + end);
                        }
                        spec.Add(new OneMerge(mergeInfos));
                    }
                    else if (IsVerbose)
                    {
                        Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
                    }

                    start = end;
                    end   = start + m_mergeFactor;
                }

                start = 1 + upto;
            }

            return(spec);
        }
Exemplo n.º 12
0
 // Used by near real-time search
 internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor)
 {
     this.internalDirectory = writer.Directory;
     this.readOnly = true;
     segmentInfos = infos;
     segmentInfosStart = (SegmentInfos) infos.Clone();
     this.termInfosIndexDivisor = termInfosIndexDivisor;
     if (!readOnly)
     {
         // We assume that this segments_N was previously
         // properly sync'd:
         synced.UnionWith(infos.Files(internalDirectory, true));
     }
     
     // IndexWriter synchronizes externally before calling
     // us, which ensures infos will not change; so there's
     // no need to process segments in reverse order
     int numSegments = infos.Count;
     var readers = new SegmentReader[numSegments];
     Directory dir = writer.Directory;
     int upto = 0;
     
     for (int i = 0; i < numSegments; i++)
     {
         bool success = false;
         try
         {
             SegmentInfo info = infos.Info(i);
             if (info.dir == dir)
             {
                 readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor);
             }
             success = true;
         }
         finally
         {
             if (!success)
             {
                 // Close all readers we had opened:
                 for (upto--; upto >= 0; upto--)
                 {
                     try
                     {
                         readers[upto].Close();
                     }
                     catch (System.Exception)
                     {
                         // keep going - we want to clean up as much as possible
                     }
                 }
             }
         }
     }
     
     this.writer = writer;
     
     if (upto < readers.Length)
     {
         // This means some segments were in a foreign Directory
         var newReaders = new SegmentReader[upto];
         Array.Copy(readers, 0, newReaders, 0, upto);
         readers = newReaders;
     }
     
     Initialize(readers);
 }
Exemplo n.º 13
0
		/// <summary>Checks if any merges are now necessary and returns a
		/// <see cref="MergePolicy.MergeSpecification" /> if so.  A merge
		/// is necessary when there are more than <see cref="MergeFactor" />
		/// segments at a given level.  When
		/// multiple levels have too many segments, this method
		/// will return multiple merges, allowing the <see cref="MergeScheduler" />
		/// to use concurrency. 
		/// </summary>
		public override MergeSpecification FindMerges(SegmentInfos infos)
		{
			
			int numSegments = infos.Count;
			if (Verbose())
				Message("findMerges: " + numSegments + " segments");
			
			// Compute levels, which is just log (base mergeFactor)
			// of the size of each segment
			float[] levels = new float[numSegments];
			float norm = (float) System.Math.Log(mergeFactor);
			
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = infos.Info(i);
				long size = Size(info);
				
				// Floor tiny segments
				if (size < 1)
					size = 1;
				levels[i] = (float) System.Math.Log(size) / norm;
			}
			
			float levelFloor;
			if (minMergeSize <= 0)
				levelFloor = (float) 0.0;
			else
			{
				levelFloor = (float) (System.Math.Log(minMergeSize) / norm);
			}
			
			// Now, we quantize the log values into levels.  The
			// first level is any segment whose log size is within
			// LEVEL_LOG_SPAN of the max size, or, who has such as
			// segment "to the right".  Then, we find the max of all
			// other segments and use that to define the next level
			// segment, etc.
			
			MergeSpecification spec = null;
			
			int start = 0;
			while (start < numSegments)
			{
				
				// Find max level of all segments not already
				// quantized.
				float maxLevel = levels[start];
				for (int i = 1 + start; i < numSegments; i++)
				{
					float level = levels[i];
					if (level > maxLevel)
						maxLevel = level;
				}
				
				// Now search backwards for the rightmost segment that
				// falls into this level:
				float levelBottom;
				if (maxLevel < levelFloor)
				// All remaining segments fall into the min level
					levelBottom = - 1.0F;
				else
				{
					levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
					
					// Force a boundary at the level floor
					if (levelBottom < levelFloor && maxLevel >= levelFloor)
						levelBottom = levelFloor;
				}
				
				int upto = numSegments - 1;
				while (upto >= start)
				{
					if (levels[upto] >= levelBottom)
					{
						break;
					}
					upto--;
				}
				if (Verbose())
					Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
				
				// Finally, record all merges that are viable at this level:
				int end = start + mergeFactor;
				while (end <= 1 + upto)
				{
					bool anyTooLarge = false;
					for (int i = start; i < end; i++)
					{
						SegmentInfo info = infos.Info(i);
						anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
					}
					
					if (!anyTooLarge)
					{
						if (spec == null)
							spec = new MergeSpecification();
						if (Verbose())
							Message("    " + start + " to " + end + ": add this merge");
                        spec.Add(MakeOneMerge(infos, infos.Range(start, end)));
					}
					else if (Verbose())
						Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
					
					start = end;
					end = start + mergeFactor;
				}
				
				start = 1 + upto;
			}
			
			return spec;
		}
Exemplo n.º 14
0
		/// <summary> Merges all segments from an array of indexes into this
		/// index.
		/// 
		/// <p/>This may be used to parallelize batch indexing.  A large document
		/// collection can be broken into sub-collections.  Each sub-collection can be
		/// indexed in parallel, on a different thread, process or machine.  The
		/// complete index can then be created by merging sub-collection indexes
		/// with this method.
		/// 
		/// <p/><b>NOTE:</b> the index in each Directory must not be
		/// changed (opened by a writer) while this method is
		/// running.  This method does not acquire a write lock in
		/// each input Directory, so it is up to the caller to
		/// enforce this.
		/// 
		/// <p/><b>NOTE:</b> while this is running, any attempts to
		/// add or delete documents (with another thread) will be
		/// paused until this method completes.
		/// 
		/// <p/>This method is transactional in how Exceptions are
		/// handled: it does not commit a new segments_N file until
		/// all indexes are added.  This means if an Exception
		/// occurs (for example disk full), then either no indexes
		/// will have been added or they all will have been.<p/>
		/// 
		/// <p/>Note that this requires temporary free space in the
		/// Directory up to 2X the sum of all input indexes
		/// (including the starting index).  If readers/searchers
		/// are open against the starting index, then temporary
		/// free space required will be higher by the size of the
		/// starting index (see <see cref="Optimize()" /> for details).
		/// <p/>
		/// 
		/// <p/>Once this completes, the final size of the index
		/// will be less than the sum of all input index sizes
		/// (including the starting index).  It could be quite a
		/// bit smaller (if there were many pending deletes) or
		/// just slightly smaller.<p/>
		/// 
		/// <p/>
		/// This requires this index not be among those to be added.
		/// 
		/// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError
		/// you should immediately close the writer.  See <a
		/// href="#OOME">above</a> for details.<p/>
		/// 
		/// </summary>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  IOException if there is a low-level IO error </throws>
		public virtual void  AddIndexesNoOptimize(params Directory[] dirs)
		{
			
			EnsureOpen();
			
			NoDupDirs(dirs);
			
			// Do not allow add docs or deletes while we are running:
			docWriter.PauseAllThreads();
			
			try
			{
				if (infoStream != null)
					Message("flush at addIndexesNoOptimize");
				Flush(true, false, true);
				
				bool success = false;
				
				StartTransaction(false);
				
				try
				{
					
					int docCount = 0;
					lock (this)
					{
						EnsureOpen();
						
						for (int i = 0; i < dirs.Length; i++)
						{
							if (directory == dirs[i])
							{
								// cannot add this index: segments may be deleted in merge before added
								throw new System.ArgumentException("Cannot add this index to itself");
							}
							
							SegmentInfos sis = new SegmentInfos(); // read infos from dir
							sis.Read(dirs[i]);
							for (int j = 0; j < sis.Count; j++)
							{
								SegmentInfo info = sis.Info(j);
								System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name);
								docCount += info.docCount;
								segmentInfos.Add(info); // add each info
							}
						}
					}
					
					// Notify DocumentsWriter that the flushed count just increased
					docWriter.UpdateFlushedDocCount(docCount);
					
					MaybeMerge();
					
					EnsureOpen();
					
					// If after merging there remain segments in the index
					// that are in a different directory, just copy these
					// over into our index.  This is necessary (before
					// finishing the transaction) to avoid leaving the
					// index in an unusable (inconsistent) state.
					ResolveExternalSegments();
					
					EnsureOpen();
					
					success = true;
				}
				finally
				{
					if (success)
					{
						CommitTransaction();
					}
					else
					{
						RollbackTransaction();
					}
				}
			}
			catch (System.OutOfMemoryException oom)
			{
				HandleOOM(oom, "addIndexesNoOptimize");
			}
			finally
			{
				if (docWriter != null)
				{
					docWriter.ResumeAllThreads();
				}
			}
		}
Exemplo n.º 15
0
 /// <summary>Construct reading the named set of readers. </summary>
 internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
 {
     internalDirectory = directory;
     this.readOnly = readOnly;
     this.segmentInfos = sis;
     this.deletionPolicy = deletionPolicy;
     this.termInfosIndexDivisor = termInfosIndexDivisor;
     
     if (!readOnly)
     {
         // We assume that this segments_N was previously
         // properly sync'd:
         synced.UnionWith(sis.Files(directory, true));
     }
     
     // To reduce the chance of hitting FileNotFound
     // (and having to retry), we open segments in
     // reverse because IndexWriter merges & deletes
     // the newest segments first.
     
     var readers = new SegmentReader[sis.Count];
     for (int i = sis.Count - 1; i >= 0; i--)
     {
         bool success = false;
         try
         {
             readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor);
             success = true;
         }
         finally
         {
             if (!success)
             {
                 // Close all readers we had opened:
                 for (i++; i < sis.Count; i++)
                 {
                     try
                     {
                         readers[i].Close();
                     }
                     catch (System.Exception)
                     {
                         // keep going - we want to clean up as much as possible
                     }
                 }
             }
         }
     }
     
     Initialize(readers);
 }
Exemplo n.º 16
0
        /// <summary> Finds merges necessary to expunge all deletes from the
        /// index.  We simply merge adjacent segments that have
        /// deletes, up to mergeFactor at a time.
        /// </summary>
        public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
        {
            int numSegments = segmentInfos.Count;

            if (Verbose())
            {
                Message("findMergesToExpungeDeletes: " + numSegments + " segments");
            }

            MergeSpecification spec       = new MergeSpecification();
            int firstSegmentWithDeletions = -1;

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info     = segmentInfos.Info(i);
                int         delCount = writer.NumDeletedDocs(info);
                if (delCount > 0)
                {
                    if (Verbose())
                    {
                        Message("  segment " + info.name + " has deletions");
                    }
                    if (firstSegmentWithDeletions == -1)
                    {
                        firstSegmentWithDeletions = i;
                    }
                    else if (i - firstSegmentWithDeletions == mergeFactor)
                    {
                        // We've seen mergeFactor segments in a row with
                        // deletions, so force a merge now:
                        if (Verbose())
                        {
                            Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        }
                        spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, i), useCompoundFile));
                        firstSegmentWithDeletions = i;
                    }
                }
                else if (firstSegmentWithDeletions != -1)
                {
                    // End of a sequence of segments with deletions, so,
                    // merge those past segments even if it's fewer than
                    // mergeFactor segments
                    if (Verbose())
                    {
                        Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    }
                    spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, i), useCompoundFile));
                    firstSegmentWithDeletions = -1;
                }
            }

            if (firstSegmentWithDeletions != -1)
            {
                if (Verbose())
                {
                    Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                }
                spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, numSegments), useCompoundFile));
            }

            return(spec);
        }
Exemplo n.º 17
0
        /// <summary>Checks if any merges are now necessary and returns a
        /// {@link MergePolicy.MergeSpecification} if so.  A merge
        /// is necessary when there are more than {@link
        /// #setMergeFactor} segments at a given level.  When
        /// multiple levels have too many segments, this method
        /// will return multiple merges, allowing the {@link
        /// MergeScheduler} to use concurrency.
        /// </summary>
        public override MergeSpecification FindMerges(SegmentInfos infos)
        {
            int numSegments = infos.Count;

            if (Verbose())
            {
                Message("findMerges: " + numSegments + " segments");
            }

            // Compute levels, which is just log (base mergeFactor)
            // of the size of each segment
            float[] levels = new float[numSegments];
            float   norm   = (float)System.Math.Log(mergeFactor);

            for (int i = 0; i < numSegments; i++)
            {
                SegmentInfo info = infos.Info(i);
                long        size = Size(info);

                // Floor tiny segments
                if (size < 1)
                {
                    size = 1;
                }
                levels[i] = (float)System.Math.Log(size) / norm;
            }

            float levelFloor;

            if (minMergeSize <= 0)
            {
                levelFloor = (float)0.0;
            }
            else
            {
                levelFloor = (float)(System.Math.Log(minMergeSize) / norm);
            }

            // Now, we quantize the log values into levels.  The
            // first level is any segment whose log size is within
            // LEVEL_LOG_SPAN of the max size, or, who has such as
            // segment "to the right".  Then, we find the max of all
            // other segments and use that to define the next level
            // segment, etc.

            MergeSpecification spec = null;

            int start = 0;

            while (start < numSegments)
            {
                // Find max level of all segments not already
                // quantized.
                float maxLevel = levels[start];
                for (int i = 1 + start; i < numSegments; i++)
                {
                    float level = levels[i];
                    if (level > maxLevel)
                    {
                        maxLevel = level;
                    }
                }

                // Now search backwards for the rightmost segment that
                // falls into this level:
                float levelBottom;
                if (maxLevel < levelFloor)
                {
                    // All remaining segments fall into the min level
                    levelBottom = -1.0F;
                }
                else
                {
                    levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN);

                    // Force a boundary at the level floor
                    if (levelBottom < levelFloor && maxLevel >= levelFloor)
                    {
                        levelBottom = levelFloor;
                    }
                }

                int upto = numSegments - 1;
                while (upto >= start)
                {
                    if (levels[upto] >= levelBottom)
                    {
                        break;
                    }
                    upto--;
                }
                if (Verbose())
                {
                    Message("  level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
                }

                // Finally, record all merges that are viable at this level:
                int end = start + mergeFactor;
                while (end <= 1 + upto)
                {
                    bool anyTooLarge = false;
                    for (int i = start; i < end; i++)
                    {
                        SegmentInfo info = infos.Info(i);
                        anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
                    }

                    if (!anyTooLarge)
                    {
                        if (spec == null)
                        {
                            spec = new MergeSpecification();
                        }
                        if (Verbose())
                        {
                            Message("    " + start + " to " + end + ": add this merge");
                        }
                        spec.Add(new OneMerge(infos.Range(start, end), useCompoundFile));
                    }
                    else if (Verbose())
                    {
                        Message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
                    }

                    start = end;
                    end   = start + mergeFactor;
                }

                start = 1 + upto;
            }

            return(spec);
        }
Exemplo n.º 18
0
        /// <summary>Returns the merges necessary to optimize the index.
        /// This merge policy defines "optimized" to mean only one
        /// segment in the index, where that segment has no
        /// deletions pending nor separate norms, and it is in
        /// compound file format if the current useCompoundFile
        /// setting is true.  This method returns multiple merges
        /// (mergeFactor at a time) so the {@link MergeScheduler}
        /// in use may make use of concurrency.
        /// </summary>
        public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize)
        {
            MergeSpecification spec;

            System.Diagnostics.Debug.Assert(maxNumSegments > 0);

            if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
            {
                // Find the newest (rightmost) segment that needs to
                // be optimized (other segments may have been flushed
                // since optimize started):
                int last = infos.Count;
                while (last > 0)
                {
                    SegmentInfo info = infos.Info(--last);
                    if (segmentsToOptimize.Contains(info))
                    {
                        last++;
                        break;
                    }
                }

                if (last > 0)
                {
                    spec = new MergeSpecification();

                    // First, enroll all "full" merges (size
                    // mergeFactor) to potentially be run concurrently:
                    while (last - maxNumSegments + 1 >= mergeFactor)
                    {
                        spec.Add(new OneMerge(infos.Range(last - mergeFactor, last), useCompoundFile));
                        last -= mergeFactor;
                    }

                    // Only if there are no full merges pending do we
                    // add a final partial (< mergeFactor segments) merge:
                    if (0 == spec.merges.Count)
                    {
                        if (maxNumSegments == 1)
                        {
                            // Since we must optimize down to 1 segment, the
                            // choice is simple:
                            if (last > 1 || !IsOptimized(infos.Info(0)))
                            {
                                spec.Add(new OneMerge(infos.Range(0, last), useCompoundFile));
                            }
                        }
                        else if (last > maxNumSegments)
                        {
                            // Take care to pick a partial merge that is
                            // least cost, but does not make the index too
                            // lopsided.  If we always just picked the
                            // partial tail then we could produce a highly
                            // lopsided index over time:

                            // We must merge this many segments to leave
                            // maxNumSegments in the index (from when
                            // optimize was first kicked off):
                            int finalMergeSize = last - maxNumSegments + 1;

                            // Consider all possible starting points:
                            long bestSize  = 0;
                            int  bestStart = 0;

                            for (int i = 0; i < last - finalMergeSize + 1; i++)
                            {
                                long sumSize = 0;
                                for (int j = 0; j < finalMergeSize; j++)
                                {
                                    sumSize += Size(infos.Info(j + i));
                                }
                                if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
                                {
                                    bestStart = i;
                                    bestSize  = sumSize;
                                }
                            }

                            spec.Add(new OneMerge(infos.Range(bestStart, bestStart + finalMergeSize), useCompoundFile));
                        }
                    }
                }
                else
                {
                    spec = null;
                }
            }
            else
            {
                spec = null;
            }

            return(spec);
        }
Exemplo n.º 19
0
		private bool IsOptimized(SegmentInfos infos, int maxNumSegments, ISet<SegmentInfo> segmentsToOptimize)
		{
			int numSegments = infos.Count;
			int numToOptimize = 0;
			SegmentInfo optimizeInfo = null;
			for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
			{
				SegmentInfo info = infos.Info(i);
				if (segmentsToOptimize.Contains(info))
				{
					numToOptimize++;
					optimizeInfo = info;
				}
			}
			
			return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo));
		}
Exemplo n.º 20
0
		/// <summary>Returns the merges necessary to optimize the index.
		/// This merge policy defines "optimized" to mean only one
		/// segment in the index, where that segment has no
		/// deletions pending nor separate norms, and it is in
		/// compound file format if the current useCompoundFile
		/// setting is true.  This method returns multiple merges
		/// (mergeFactor at a time) so the <see cref="MergeScheduler" />
		/// in use may make use of concurrency. 
		/// </summary>
		public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet<SegmentInfo> segmentsToOptimize)
		{
			MergeSpecification spec;
			
			System.Diagnostics.Debug.Assert(maxNumSegments > 0);
			
			if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
			{
				
				// Find the newest (rightmost) segment that needs to
				// be optimized (other segments may have been flushed
				// since optimize started):
				int last = infos.Count;
				while (last > 0)
				{
					SegmentInfo info = infos.Info(--last);
					if (segmentsToOptimize.Contains(info))
					{
						last++;
						break;
					}
				}
				
				if (last > 0)
				{
					
					spec = new MergeSpecification();
					
					// First, enroll all "full" merges (size
					// mergeFactor) to potentially be run concurrently:
					while (last - maxNumSegments + 1 >= mergeFactor)
					{
                        spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last)));
						last -= mergeFactor;
					}
					
					// Only if there are no full merges pending do we
					// add a final partial (< mergeFactor segments) merge:
					if (0 == spec.merges.Count)
					{
						if (maxNumSegments == 1)
						{
							
							// Since we must optimize down to 1 segment, the
							// choice is simple:
							if (last > 1 || !IsOptimized(infos.Info(0)))
                                spec.Add(MakeOneMerge(infos, infos.Range(0, last)));
						}
						else if (last > maxNumSegments)
						{
							
							// Take care to pick a partial merge that is
							// least cost, but does not make the index too
							// lopsided.  If we always just picked the
							// partial tail then we could produce a highly
							// lopsided index over time:
							
							// We must merge this many segments to leave
							// maxNumSegments in the index (from when
							// optimize was first kicked off):
							int finalMergeSize = last - maxNumSegments + 1;
							
							// Consider all possible starting points:
							long bestSize = 0;
							int bestStart = 0;
							
							for (int i = 0; i < last - finalMergeSize + 1; i++)
							{
								long sumSize = 0;
								for (int j = 0; j < finalMergeSize; j++)
									sumSize += Size(infos.Info(j + i));
								if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
								{
									bestStart = i;
									bestSize = sumSize;
								}
							}

                            spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize)));
						}
					}
				}
				else
					spec = null;
			}
			else
				spec = null;
			
			return spec;
		}
Exemplo n.º 21
0
		/// <summary>Returns a <see cref="Status" /> instance detailing
		/// the state of the index.
		/// 
		/// </summary>
		/// <param name="onlySegments">list of specific segment names to check
		/// 
		/// <p/>As this method checks every byte in the specified
		/// segments, on a large index it can take quite a long
		/// time to run.
		/// 
		/// <p/><b>WARNING</b>: make sure
		/// you only call this when the index is not opened by any
		/// writer. 
		/// </param>
		public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments)
		{
            System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
			SegmentInfos sis = new SegmentInfos();
			Status result = new Status();
			result.dir = dir;
			try
			{
				sis.Read(dir);
			}
			catch (System.Exception t)
			{
				Msg("ERROR: could not read any segments file in directory");
				result.missingSegments = true;
				if (infoStream != null)
					infoStream.WriteLine(t.StackTrace);
				return result;
			}
			
			int numSegments = sis.Count;
			var segmentsFileName = sis.GetCurrentSegmentFileName();
			IndexInput input = null;
			try
			{
				input = dir.OpenInput(segmentsFileName);
			}
			catch (System.Exception t)
			{
				Msg("ERROR: could not open segments file in directory");
				if (infoStream != null)
					infoStream.WriteLine(t.StackTrace);
				result.cantOpenSegments = true;
				return result;
			}
			int format = 0;
			try
			{
				format = input.ReadInt();
			}
			catch (System.Exception t)
			{
				Msg("ERROR: could not read segment file version in directory");
				if (infoStream != null)
					infoStream.WriteLine(t.StackTrace);
				result.missingSegmentVersion = true;
				return result;
			}
			finally
			{
				if (input != null)
					input.Close();
			}
			
			System.String sFormat = "";
			bool skip = false;
			
			if (format == SegmentInfos.FORMAT)
				sFormat = "FORMAT [Lucene Pre-2.1]";
			if (format == SegmentInfos.FORMAT_LOCKLESS)
				sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
			else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
				sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
			else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
				sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
			else
			{
				if (format == SegmentInfos.FORMAT_CHECKSUM)
					sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
				else if (format == SegmentInfos.FORMAT_DEL_COUNT)
					sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
				else if (format == SegmentInfos.FORMAT_HAS_PROX)
					sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
				else if (format == SegmentInfos.FORMAT_USER_DATA)
					sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
				else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
					sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
				else if (format < SegmentInfos.CURRENT_FORMAT)
				{
					sFormat = "int=" + format + " [newer version of Lucene than this tool]";
					skip = true;
				}
				else
				{
					sFormat = format + " [Lucene 1.3 or prior]";
				}
			}
			
			result.segmentsFileName = segmentsFileName;
			result.numSegments = numSegments;
			result.segmentFormat = sFormat;
			result.userData = sis.UserData;
			System.String userDataString;
			if (sis.UserData.Count > 0)
			{
				userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
			}
			else
			{
				userDataString = "";
			}
			
			Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
			
			if (onlySegments != null)
			{
				result.partial = true;
				if (infoStream != null)
					infoStream.Write("\nChecking only these segments:");
                foreach(string s in onlySegments)
				{
					if (infoStream != null)
					{
						infoStream.Write(" " + s);
					}
				}
                result.segmentsChecked.AddRange(onlySegments);
                Msg(":");
			}
			
			if (skip)
			{
				Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
				result.toolOutOfDate = true;
				return result;
			}
			
			
			result.newSegments = (SegmentInfos) sis.Clone();
			result.newSegments.Clear();
			
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = sis.Info(i);
				if (onlySegments != null && !onlySegments.Contains(info.name))
					continue;
				var segInfoStat = new Status.SegmentInfoStatus();
				result.segmentInfos.Add(segInfoStat);
				Msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
				segInfoStat.name = info.name;
				segInfoStat.docCount = info.docCount;
				
				int toLoseDocCount = info.docCount;
				
				SegmentReader reader = null;
				
				try
				{
					Msg("    compound=" + info.GetUseCompoundFile());
					segInfoStat.compound = info.GetUseCompoundFile();
					Msg("    hasProx=" + info.HasProx);
					segInfoStat.hasProx = info.HasProx;
					Msg("    numFiles=" + info.Files().Count);
					segInfoStat.numFiles = info.Files().Count;
					Msg(System.String.Format(nf, "    size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
					segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
                    IDictionary<string, string> diagnostics = info.Diagnostics;
					segInfoStat.diagnostics = diagnostics;
					if (diagnostics.Count > 0)
					{
						Msg("    diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));
					}
					
					int docStoreOffset = info.DocStoreOffset;
					if (docStoreOffset != - 1)
					{
						Msg("    docStoreOffset=" + docStoreOffset);
						segInfoStat.docStoreOffset = docStoreOffset;
						Msg("    docStoreSegment=" + info.DocStoreSegment);
						segInfoStat.docStoreSegment = info.DocStoreSegment;
						Msg("    docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
						segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
					}
					System.String delFileName = info.GetDelFileName();
					if (delFileName == null)
					{
						Msg("    no deletions");
						segInfoStat.hasDeletions = false;
					}
					else
					{
						Msg("    has deletions [delFileName=" + delFileName + "]");
						segInfoStat.hasDeletions = true;
						segInfoStat.deletionsFileName = delFileName;
					}
					if (infoStream != null)
						infoStream.Write("    test: open reader.........");
					reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
					
					segInfoStat.openReaderPassed = true;
					
					int numDocs = reader.NumDocs();
					toLoseDocCount = numDocs;
					if (reader.HasDeletions)
					{
						if (reader.deletedDocs.Count() != info.GetDelCount())
						{
							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
						}
						if (reader.deletedDocs.Count() > reader.MaxDoc)
						{
							throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
						}
						if (info.docCount - numDocs != info.GetDelCount())
						{
							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
						}
						segInfoStat.numDeleted = info.docCount - numDocs;
						Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
					}
					else
					{
						if (info.GetDelCount() != 0)
						{
							throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
						}
						Msg("OK");
					}
					if (reader.MaxDoc != info.docCount)
						throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);
					
					// Test getFieldNames()
					if (infoStream != null)
					{
						infoStream.Write("    test: fields..............");
					}
                    ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
					Msg("OK [" + fieldNames.Count + " fields]");
					segInfoStat.numFields = fieldNames.Count;
					
					// Test Field Norms
					segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
					
					// Test the Term Index
					segInfoStat.termIndexStatus = TestTermIndex(info, reader);
					
					// Test Stored Fields
					segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
					
					// Test Term Vectors
					segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
					
					// Rethrow the first exception we encountered
					//  This will cause stats for failed segments to be incremented properly
					if (segInfoStat.fieldNormStatus.error != null)
					{
						throw new SystemException("Field Norm test failed");
					}
					else if (segInfoStat.termIndexStatus.error != null)
					{
						throw new SystemException("Term Index test failed");
					}
					else if (segInfoStat.storedFieldStatus.error != null)
					{
						throw new SystemException("Stored Field test failed");
					}
					else if (segInfoStat.termVectorStatus.error != null)
					{
						throw new System.SystemException("Term Vector test failed");
					}
					
					Msg("");
				}
				catch (System.Exception t)
				{
					Msg("FAILED");
					const string comment = "fixIndex() would remove reference to this segment";
					Msg("    WARNING: " + comment + "; full exception:");
					if (infoStream != null)
						infoStream.WriteLine(t.StackTrace);
					Msg("");
					result.totLoseDocCount += toLoseDocCount;
					result.numBadSegments++;
					continue;
				}
				finally
				{
					if (reader != null)
						reader.Close();
				}
				
				// Keeper
				result.newSegments.Add((SegmentInfo)info.Clone());
			}
			
			if (0 == result.numBadSegments)
			{
				result.clean = true;
				Msg("No problems were detected with this index.\n");
			}
			else
				Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
			
			return result;
		}
Exemplo n.º 22
0
        /// <summary>
        /// Returns the merges necessary to <see cref="IndexWriter.ForceMerge(int)"/> the index. this method constraints
        /// the returned merges only by the <paramref name="maxNumSegments"/> parameter, and
        /// guaranteed that exactly that number of segments will remain in the index.
        /// </summary>
        private MergeSpecification FindForcedMergesMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last)
        {
            var spec     = new MergeSpecification();
            var segments = infos.AsList();

            // First, enroll all "full" merges (size
            // mergeFactor) to potentially be run concurrently:
            while (last - maxNumSegments + 1 >= m_mergeFactor)
            {
                spec.Add(new OneMerge(segments.SubList(last - m_mergeFactor, last)));
                last -= m_mergeFactor;
            }

            // Only if there are no full merges pending do we
            // add a final partial (< mergeFactor segments) merge:
            if (0 == spec.Merges.Count)
            {
                if (maxNumSegments == 1)
                {
                    // Since we must merge down to 1 segment, the
                    // choice is simple:
                    if (last > 1 || !IsMerged(infos, infos.Info(0)))
                    {
                        spec.Add(new OneMerge(segments.SubList(0, last)));
                    }
                }
                else if (last > maxNumSegments)
                {
                    // Take care to pick a partial merge that is
                    // least cost, but does not make the index too
                    // lopsided.  If we always just picked the
                    // partial tail then we could produce a highly
                    // lopsided index over time:

                    // We must merge this many segments to leave
                    // maxNumSegments in the index (from when
                    // forceMerge was first kicked off):
                    int finalMergeSize = last - maxNumSegments + 1;

                    // Consider all possible starting points:
                    long bestSize  = 0;
                    int  bestStart = 0;

                    for (int i = 0; i < last - finalMergeSize + 1; i++)
                    {
                        long sumSize = 0;
                        for (int j = 0; j < finalMergeSize; j++)
                        {
                            sumSize += Size(infos.Info(j + i));
                        }
                        if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
                        {
                            bestStart = i;
                            bestSize  = sumSize;
                        }
                    }

                    spec.Add(new OneMerge(segments.SubList(bestStart, bestStart + finalMergeSize)));
                }
            }
            return(spec.Merges.Count == 0 ? null : spec);
        }
Exemplo n.º 23
0
		internal bool ApplyDeletes(SegmentInfos infos)
		{
			lock (this)
			{
				if (!HasDeletes())
					return false;
				
				if (infoStream != null)
					Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments.");
				
				int infosEnd = infos.Count;
				
				int docStart = 0;
				bool any = false;
				for (int i = 0; i < infosEnd; i++)
				{
					
					// Make sure we never attempt to apply deletes to
					// segment in external dir
					System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory);
					
					SegmentReader reader = writer.readerPool.Get(infos.Info(i), false);
					try
					{
						any |= ApplyDeletes(reader, docStart);
						docStart += reader.MaxDoc;
					}
					finally
					{
						writer.readerPool.Release(reader);
					}
				}
				
				deletesFlushed.Clear();
				
				return any;
			}
		}
Exemplo n.º 24
0
        // LUCENENET TODO: Get rid of the nullable in IDictionary<SegmentCommitInfo, bool?>, if possible
        /// <summary>
        /// Returns the merges necessary to merge the index down
        /// to a specified number of segments.
        /// this respects the <see cref="m_maxMergeSizeForForcedMerge"/> setting.
        /// By default, and assuming <c>maxNumSegments=1</c>, only
        /// one segment will be left in the index, where that segment
        /// has no deletions pending nor separate norms, and it is in
        /// compound file format if the current useCompoundFile
        /// setting is <c>true</c>.  This method returns multiple merges
        /// (mergeFactor at a time) so the <see cref="MergeScheduler"/>
        /// in use may make use of concurrency.
        /// </summary>
        public override MergeSpecification FindForcedMerges(SegmentInfos infos, int maxNumSegments, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge)
        {
            Debug.Assert(maxNumSegments > 0);
            if (IsVerbose)
            {
                Message("findForcedMerges: maxNumSegs=" + maxNumSegments + " segsToMerge=" + Arrays.ToString(segmentsToMerge));
            }

            // If the segments are already merged (e.g. there's only 1 segment), or
            // there are <maxNumSegments:.
            if (IsMerged(infos, maxNumSegments, segmentsToMerge))
            {
                if (IsVerbose)
                {
                    Message("already merged; skip");
                }
                return(null);
            }

            // Find the newest (rightmost) segment that needs to
            // be merged (other segments may have been flushed
            // since merging started):
            int last = infos.Count;

            while (last > 0)
            {
                SegmentCommitInfo info = infos.Info(--last);
                if (segmentsToMerge.ContainsKey(info))
                {
                    last++;
                    break;
                }
            }

            if (last == 0)
            {
                if (IsVerbose)
                {
                    Message("last == 0; skip");
                }
                return(null);
            }

            // There is only one segment already, and it is merged
            if (maxNumSegments == 1 && last == 1 && IsMerged(infos, infos.Info(0)))
            {
                if (IsVerbose)
                {
                    Message("already 1 seg; skip");
                }
                return(null);
            }

            // Check if there are any segments above the threshold
            bool anyTooLarge = false;

            for (int i = 0; i < last; i++)
            {
                SegmentCommitInfo info = infos.Info(i);
                if (Size(info) > m_maxMergeSizeForForcedMerge || SizeDocs(info) > m_maxMergeDocs)
                {
                    anyTooLarge = true;
                    break;
                }
            }

            if (anyTooLarge)
            {
                return(FindForcedMergesSizeLimit(infos, maxNumSegments, last));
            }
            else
            {
                return(FindForcedMergesMaxNumSegments(infos, maxNumSegments, last));
            }
        }
Exemplo n.º 25
0
		private void  SetRollbackSegmentInfos(SegmentInfos infos)
		{
			lock (this)
			{
				rollbackSegmentInfos = (SegmentInfos) infos.Clone();
				System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory));
				rollbackSegments = new HashMap<SegmentInfo, int?>();
				int size = rollbackSegmentInfos.Count;
				for (int i = 0; i < size; i++)
					rollbackSegments[rollbackSegmentInfos.Info(i)] = i;
			}
		}
Exemplo n.º 26
0
		/// <summary> Finds merges necessary to expunge all deletes from the
		/// index.  We simply merge adjacent segments that have
		/// deletes, up to mergeFactor at a time.
		/// </summary>
		public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
		{
			int numSegments = segmentInfos.Count;
			
			if (Verbose())
				Message("findMergesToExpungeDeletes: " + numSegments + " segments");
			
			MergeSpecification spec = new MergeSpecification();
			int firstSegmentWithDeletions = - 1;
			for (int i = 0; i < numSegments; i++)
			{
				SegmentInfo info = segmentInfos.Info(i);
				int delCount = writer.NumDeletedDocs(info);
				if (delCount > 0)
				{
					if (Verbose())
						Message("  segment " + info.name + " has deletions");
					if (firstSegmentWithDeletions == - 1)
						firstSegmentWithDeletions = i;
					else if (i - firstSegmentWithDeletions == mergeFactor)
					{
						// We've seen mergeFactor segments in a row with
						// deletions, so force a merge now:
						if (Verbose())
							Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                        spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
						firstSegmentWithDeletions = i;
					}
				}
				else if (firstSegmentWithDeletions != - 1)
				{
					// End of a sequence of segments with deletions, so,
					// merge those past segments even if it's fewer than
					// mergeFactor segments
					if (Verbose())
						Message("  add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
                    spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
					firstSegmentWithDeletions = - 1;
				}
			}
			
			if (firstSegmentWithDeletions != - 1)
			{
				if (Verbose())
					Message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
                spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments)));
			}
			
			return spec;
		}
Exemplo n.º 27
0
		private System.String SegString(SegmentInfos infos)
		{
			lock (this)
			{
				System.Text.StringBuilder buffer = new System.Text.StringBuilder();
				int count = infos.Count;
				for (int i = 0; i < count; i++)
				{
					if (i > 0)
					{
						buffer.Append(' ');
					}
					SegmentInfo info = infos.Info(i);
					buffer.Append(info.SegString(directory));
					if (info.dir != directory)
						buffer.Append("**");
				}
				return buffer.ToString();
			}
		}
Exemplo n.º 28
0
 internal ReaderCommit(SegmentInfos infos, Directory dir)
 {
     segmentsFileName = infos.GetCurrentSegmentFileName();
     this.dir = dir;
     userData = infos.UserData;
     files = infos.Files(dir, true);
     version = infos.Version;
     generation = infos.Generation;
     isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions();
 }