コード例 #1
0
        protected internal override long Size(SegmentCommitInfo info)
        {
            int hourOfDay = Calendar.Hour;
            if (hourOfDay < 6 || hourOfDay > 20 || Random.Next(23) == 5)
            // its 5 o'clock somewhere
            {
                Drink.Drink_e[] values = Enum.GetValues(typeof(Drink.Drink_e)).Cast<Drink.Drink_e>().ToArray();
                // pick a random drink during the day
                Drink.Drink_e drink = values[Random.Next(values.Length - 1)];
                return (long)drink * info.SizeInBytes();
            }

            return info.SizeInBytes();
        }
コード例 #2
0
        protected internal override long Size(SegmentCommitInfo info)
        {
            int hourOfDay = Calendar.Hour;

            if (hourOfDay < 6 || hourOfDay > 20 || Random.Next(23) == 5)
            // its 5 o'clock somewhere
            {
                Drink.Drink_e[] values = Enum.GetValues(typeof(Drink.Drink_e)).Cast <Drink.Drink_e>().ToArray();
                // pick a random drink during the day
                Drink.Drink_e drink = values[Random.Next(values.Length - 1)];
                return((long)drink * info.SizeInBytes());
            }

            return(info.SizeInBytes());
        }
コード例 #3
0
 /// <summary>
 /// Return the byte size of the provided {@link
 ///  SegmentCommitInfo}, pro-rated by percentage of
 ///  non-deleted documents if {@link
 ///  #setCalibrateSizeByDeletes} is set.
 /// </summary>
 protected internal virtual long SizeBytes(SegmentCommitInfo info)
 {
     if (CalibrateSizeByDeletes_Renamed)
     {
         return(base.Size(info));
     }
     return(info.SizeInBytes());
 }
コード例 #4
0
        /// <summary>
        /// Return the byte size of the provided {@link
        ///  SegmentCommitInfo}, pro-rated by percentage of
        ///  non-deleted documents is set.
        /// </summary>
        protected internal virtual long Size(SegmentCommitInfo info)
        {
            long   byteSize = info.SizeInBytes();
            int    delCount = Writer.Get().NumDeletedDocs(info);
            double delRatio = (info.Info.DocCount <= 0 ? 0.0f : ((float)delCount / (float)info.Info.DocCount));

            Debug.Assert(delRatio <= 1.0);
            return(info.Info.DocCount <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio)));
        }
コード例 #5
0
        public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos infos)
        {
            if (Verbose())
            {
                Message("findMerges: " + infos.Size() + " segments");
            }
            if (infos.Size() == 0)
            {
                return(null);
            }
            ICollection <SegmentCommitInfo> merging    = Writer.Get().MergingSegments;
            ICollection <SegmentCommitInfo> toBeMerged = new HashSet <SegmentCommitInfo>();

            List <SegmentCommitInfo> infosSorted = new List <SegmentCommitInfo>(infos.AsList());

            infosSorted.Sort(new SegmentByteSizeDescending(this));

            // Compute total index bytes & print details about the index
            long totIndexBytes   = 0;
            long minSegmentBytes = long.MaxValue;

            foreach (SegmentCommitInfo info in infosSorted)
            {
                long segBytes = Size(info);
                if (Verbose())
                {
                    string extra = merging.Contains(info) ? " [merging]" : "";
                    if (segBytes >= MaxMergedSegmentBytes / 2.0)
                    {
                        extra += " [skip: too large]";
                    }
                    else if (segBytes < FloorSegmentBytes)
                    {
                        extra += " [floored]";
                    }
                    Message("  seg=" + Writer.Get().SegString(info) + " size=" + String.Format(CultureInfo.InvariantCulture, "{0:0.00}", segBytes / 1024 / 1024.0) + " MB" + extra);
                }

                minSegmentBytes = Math.Min(segBytes, minSegmentBytes);
                // Accum total byte size
                totIndexBytes += segBytes;
            }

            // If we have too-large segments, grace them out
            // of the maxSegmentCount:
            int tooBigCount = 0;

            while (tooBigCount < infosSorted.Count && Size(infosSorted[tooBigCount]) >= MaxMergedSegmentBytes / 2.0)
            {
                totIndexBytes -= Size(infosSorted[tooBigCount]);
                tooBigCount++;
            }

            minSegmentBytes = FloorSize(minSegmentBytes);

            // Compute max allowed segs in the index
            long   levelSize       = minSegmentBytes;
            long   bytesLeft       = totIndexBytes;
            double allowedSegCount = 0;

            while (true)
            {
                double segCountLevel = bytesLeft / (double)levelSize;
                if (segCountLevel < SegsPerTier)
                {
                    allowedSegCount += Math.Ceiling(segCountLevel);
                    break;
                }
                allowedSegCount += SegsPerTier;
                bytesLeft       -= (long)(SegsPerTier * levelSize);
                levelSize       *= MaxMergeAtOnce_Renamed;
            }
            int allowedSegCountInt = (int)allowedSegCount;

            MergeSpecification spec = null;

            // Cycle to possibly select more than one merge:
            while (true)
            {
                long mergingBytes = 0;

                // Gather eligible segments for merging, ie segments
                // not already being merged and not already picked (by
                // prior iteration of this loop) for merging:
                IList <SegmentCommitInfo> eligible = new List <SegmentCommitInfo>();
                for (int idx = tooBigCount; idx < infosSorted.Count; idx++)
                {
                    SegmentCommitInfo info = infosSorted[idx];
                    if (merging.Contains(info))
                    {
                        mergingBytes += info.SizeInBytes();
                    }
                    else if (!toBeMerged.Contains(info))
                    {
                        eligible.Add(info);
                    }
                }

                bool maxMergeIsRunning = mergingBytes >= MaxMergedSegmentBytes;

                if (Verbose())
                {
                    Message("  allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.Count + " (eligible count=" + eligible.Count + ") tooBigCount=" + tooBigCount);
                }

                if (eligible.Count == 0)
                {
                    return(spec);
                }

                if (eligible.Count >= allowedSegCountInt)
                {
                    // OK we are over budget -- find best merge!
                    MergeScore bestScore           = null;
                    IList <SegmentCommitInfo> best = null;
                    bool bestTooLarge   = false;
                    long bestMergeBytes = 0;

                    // Consider all merge starts:
                    for (int startIdx = 0; startIdx <= eligible.Count - MaxMergeAtOnce_Renamed; startIdx++)
                    {
                        long totAfterMergeBytes = 0;

                        IList <SegmentCommitInfo> candidate = new List <SegmentCommitInfo>();
                        bool hitTooLarge = false;
                        for (int idx = startIdx; idx < eligible.Count && candidate.Count < MaxMergeAtOnce_Renamed; idx++)
                        {
                            SegmentCommitInfo info = eligible[idx];
                            long segBytes          = Size(info);

                            if (totAfterMergeBytes + segBytes > MaxMergedSegmentBytes)
                            {
                                hitTooLarge = true;
                                // NOTE: we continue, so that we can try
                                // "packing" smaller segments into this merge
                                // to see if we can get closer to the max
                                // size; this in general is not perfect since
                                // this is really "bin packing" and we'd have
                                // to try different permutations.
                                continue;
                            }
                            candidate.Add(info);
                            totAfterMergeBytes += segBytes;
                        }

                        MergeScore score = Score(candidate, hitTooLarge, mergingBytes);
                        if (Verbose())
                        {
                            Message("  maybe=" + Writer.Get().SegString(candidate) + " score=" + score.Score + " " + score.Explanation + " tooLarge=" + hitTooLarge + " size=" + string.Format(CultureInfo.InvariantCulture, "%.3f MB", totAfterMergeBytes / 1024.0 / 1024.0));
                        }

                        // If we are already running a max sized merge
                        // (maxMergeIsRunning), don't allow another max
                        // sized merge to kick off:
                        if ((bestScore == null || score.Score < bestScore.Score) && (!hitTooLarge || !maxMergeIsRunning))
                        {
                            best           = candidate;
                            bestScore      = score;
                            bestTooLarge   = hitTooLarge;
                            bestMergeBytes = totAfterMergeBytes;
                        }
                    }

                    if (best != null)
                    {
                        if (spec == null)
                        {
                            spec = new MergeSpecification();
                        }
                        OneMerge merge = new OneMerge(best);
                        spec.Add(merge);
                        foreach (SegmentCommitInfo info in merge.Segments)
                        {
                            toBeMerged.Add(info);
                        }

                        if (Verbose())
                        {
                            Message("  add merge=" + Writer.Get().SegString(merge.Segments) + " size=" + string.Format(CultureInfo.InvariantCulture, "%.3f MB", bestMergeBytes / 1024.0 / 1024.0) + " score=" + string.Format(CultureInfo.InvariantCulture, "%.3f", bestScore.Score) + " " + bestScore.Explanation + (bestTooLarge ? " [max merge]" : ""));
                        }
                    }
                    else
                    {
                        return(spec);
                    }
                }
                else
                {
                    return(spec);
                }
            }
        }
コード例 #6
0
        /// <summary>
        /// Flush all pending docs to a new segment </summary>
        internal virtual FlushedSegment Flush()
        {
            Debug.Assert(numDocsInRAM > 0);
            Debug.Assert(DeleteSlice.Empty, "all deletes must be applied in prepareFlush");
            SegmentInfo_Renamed.DocCount = numDocsInRAM;
            SegmentWriteState flushState = new SegmentWriteState(InfoStream, Directory, SegmentInfo_Renamed, FieldInfos.Finish(), IndexWriterConfig.TermIndexInterval, PendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed())));
            double startMBUsed = BytesUsed() / 1024.0 / 1024.0;

            // Apply delete-by-docID now (delete-byDocID only
            // happens when an exception is hit processing that
            // doc, eg if analyzer has some problem w/ the text):
            if (PendingUpdates.DocIDs.Count > 0)
            {
                flushState.LiveDocs = Codec.LiveDocsFormat().NewLiveDocs(numDocsInRAM);
                foreach (int delDocID in PendingUpdates.DocIDs)
                {
                    flushState.LiveDocs.Clear(delDocID);
                }
                flushState.DelCountOnFlush = PendingUpdates.DocIDs.Count;
                PendingUpdates.BytesUsed.AddAndGet(-PendingUpdates.DocIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID);
                PendingUpdates.DocIDs.Clear();
            }

            if (Aborting)
            {
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "flush: skip because aborting is set");
                }
                return null;
            }

            if (InfoStream.IsEnabled("DWPT"))
            {
                InfoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM);
            }

            bool success = false;

            try
            {
                Consumer.Flush(flushState);
                PendingUpdates.Terms.Clear();
                SegmentInfo_Renamed.Files = new HashSet<string>(Directory.CreatedFiles);

                SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(SegmentInfo_Renamed, 0, -1L, -1L);
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs");
                    InfoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq() ? "freqs" : "no freqs"));
                    InfoStream.Message("DWPT", "flushedFiles=" + segmentInfoPerCommit.Files());
                    InfoStream.Message("DWPT", "flushed codec=" + Codec);
                }

                BufferedUpdates segmentDeletes;
                if (PendingUpdates.Queries.Count == 0 && PendingUpdates.NumericUpdates.Count == 0 && PendingUpdates.BinaryUpdates.Count == 0)
                {
                    PendingUpdates.Clear();
                    segmentDeletes = null;
                }
                else
                {
                    segmentDeletes = PendingUpdates;
                }

                if (InfoStream.IsEnabled("DWPT"))
                {
                    double newSegmentSize = segmentInfoPerCommit.SizeInBytes() / 1024.0 / 1024.0;
                    InfoStream.Message("DWPT", "flushed: segment=" + SegmentInfo_Renamed.Name + " ramUsed=" + startMBUsed.ToString(Nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(Nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(Nf));
                }

                Debug.Assert(SegmentInfo_Renamed != null);

                FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush);
                SealFlushedSegment(fs);
                success = true;

                return fs;
            }
            finally
            {
                if (!success)
                {
                    Abort(FilesToDelete);
                }
            }
        }
コード例 #7
0
ファイル: MergePolicy.cs プロジェクト: paulirwin/lucene.net
 /// <summary>
 /// Return the byte size of the provided {@link
 ///  SegmentCommitInfo}, pro-rated by percentage of
 ///  non-deleted documents is set.
 /// </summary>
 protected internal virtual long Size(SegmentCommitInfo info)
 {
     long byteSize = info.SizeInBytes();
     int delCount = Writer.Get().NumDeletedDocs(info);
     double delRatio = (info.Info.DocCount <= 0 ? 0.0f : ((float)delCount / (float)info.Info.DocCount));
     Debug.Assert(delRatio <= 1.0);
     return (info.Info.DocCount <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio)));
 }
コード例 #8
0
        /// <summary>
        /// Seals the <seealso cref="SegmentInfo"/> for the new flushed segment and persists
        /// the deleted documents <seealso cref="MutableBits"/>.
        /// </summary>
        internal virtual void SealFlushedSegment(FlushedSegment flushedSegment)
        {
            Debug.Assert(flushedSegment != null);

            SegmentCommitInfo newSegment = flushedSegment.SegmentInfo;

            IndexWriter.SetDiagnostics(newSegment.Info, IndexWriter.SOURCE_FLUSH);

            IOContext context = new IOContext(new FlushInfo(newSegment.Info.DocCount, newSegment.SizeInBytes()));

            bool success = false;

            try
            {
                if (IndexWriterConfig.UseCompoundFile)
                {
                    CollectionsHelper.AddAll(FilesToDelete, IndexWriter.CreateCompoundFile(InfoStream, Directory, MergeState.CheckAbort.NONE, newSegment.Info, context));
                    newSegment.Info.UseCompoundFile = true;
                }

                // Have codec write SegmentInfo.  Must do this after
                // creating CFS so that 1) .si isn't slurped into CFS,
                // and 2) .si reflects useCompoundFile=true change
                // above:
                Codec.SegmentInfoFormat().SegmentInfoWriter.Write(Directory, newSegment.Info, flushedSegment.FieldInfos, context);

                // TODO: ideally we would freeze newSegment here!!
                // because any changes after writing the .si will be
                // lost...

                // Must write deleted docs after the CFS so we don't
                // slurp the del file into CFS:
                if (flushedSegment.LiveDocs != null)
                {
                    int delCount = flushedSegment.DelCount;
                    Debug.Assert(delCount > 0);
                    if (InfoStream.IsEnabled("DWPT"))
                    {
                        InfoStream.Message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.SegmentInfo.DelGen);
                    }

                    // TODO: we should prune the segment if it's 100%
                    // deleted... but merge will also catch it.

                    // TODO: in the NRT case it'd be better to hand
                    // this del vector over to the
                    // shortly-to-be-opened SegmentReader and let it
                    // carry the changes; there's no reason to use
                    // filesystem as intermediary here.

                    SegmentCommitInfo info  = flushedSegment.SegmentInfo;
                    Codec             codec = info.Info.Codec;
                    codec.LiveDocsFormat().WriteLiveDocs(flushedSegment.LiveDocs, Directory, info, delCount, context);
                    newSegment.DelCount = delCount;
                    newSegment.AdvanceDelGen();
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    if (InfoStream.IsEnabled("DWPT"))
                    {
                        InfoStream.Message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.Info.Name);
                    }
                }
            }
        }
コード例 #9
0
        /// <summary>
        /// Flush all pending docs to a new segment </summary>
        internal virtual FlushedSegment Flush()
        {
            Debug.Assert(numDocsInRAM > 0);
            Debug.Assert(DeleteSlice.Empty, "all deletes must be applied in prepareFlush");
            SegmentInfo_Renamed.DocCount = numDocsInRAM;
            SegmentWriteState flushState  = new SegmentWriteState(InfoStream, Directory, SegmentInfo_Renamed, FieldInfos.Finish(), IndexWriterConfig.TermIndexInterval, PendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed())));
            double            startMBUsed = BytesUsed() / 1024.0 / 1024.0;

            // Apply delete-by-docID now (delete-byDocID only
            // happens when an exception is hit processing that
            // doc, eg if analyzer has some problem w/ the text):
            if (PendingUpdates.DocIDs.Count > 0)
            {
                flushState.LiveDocs = Codec.LiveDocsFormat().NewLiveDocs(numDocsInRAM);
                foreach (int delDocID in PendingUpdates.DocIDs)
                {
                    flushState.LiveDocs.Clear(delDocID);
                }
                flushState.DelCountOnFlush = PendingUpdates.DocIDs.Count;
                PendingUpdates.BytesUsed.AddAndGet(-PendingUpdates.DocIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID);
                PendingUpdates.DocIDs.Clear();
            }

            if (Aborting)
            {
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "flush: skip because aborting is set");
                }
                return(null);
            }

            if (InfoStream.IsEnabled("DWPT"))
            {
                InfoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM);
            }

            bool success = false;

            try
            {
                Consumer.Flush(flushState);
                PendingUpdates.Terms.Clear();
                SegmentInfo_Renamed.Files = new HashSet <string>(Directory.CreatedFiles);

                SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(SegmentInfo_Renamed, 0, -1L, -1L);
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs");
                    InfoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq() ? "freqs" : "no freqs"));
                    InfoStream.Message("DWPT", "flushedFiles=" + segmentInfoPerCommit.Files());
                    InfoStream.Message("DWPT", "flushed codec=" + Codec);
                }

                BufferedUpdates segmentDeletes;
                if (PendingUpdates.Queries.Count == 0 && PendingUpdates.NumericUpdates.Count == 0 && PendingUpdates.BinaryUpdates.Count == 0)
                {
                    PendingUpdates.Clear();
                    segmentDeletes = null;
                }
                else
                {
                    segmentDeletes = PendingUpdates;
                }

                if (InfoStream.IsEnabled("DWPT"))
                {
                    double newSegmentSize = segmentInfoPerCommit.SizeInBytes() / 1024.0 / 1024.0;
                    InfoStream.Message("DWPT", "flushed: segment=" + SegmentInfo_Renamed.Name + " ramUsed=" + startMBUsed.ToString(Nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(Nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(Nf));
                }

                Debug.Assert(SegmentInfo_Renamed != null);

                FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush);
                SealFlushedSegment(fs);
                success = true;

                return(fs);
            }
            finally
            {
                if (!success)
                {
                    Abort(FilesToDelete);
                }
            }
        }
コード例 #10
0
ファイル: IndexSplitter.cs プロジェクト: wwb/lucenenet
 public virtual void ListSegments()
 {
     for (int x = 0; x < infos.Size(); x++)
     {
         SegmentCommitInfo info    = infos.Info(x);
         string            sizeStr = string.Format(CultureInfo.InvariantCulture, "{0:###,###.###}", info.SizeInBytes());
         Console.WriteLine(info.Info.Name + " " + sizeStr);
     }
 }