// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
 public SegmentMerger(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate)
 {
     // validate incoming readers
     if (validate)
     {
         foreach (AtomicReader reader in readers)
         {
             reader.CheckIntegrity();
         }
     }
     MergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
     Directory = dir;
     this.TermIndexInterval = termIndexInterval;
     this.Codec = segmentInfo.Codec;
     this.Context = context;
     this.FieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
     MergeState.SegmentInfo.DocCount = SetDocMaps();
 }
        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingStoredFieldsReader matchingFieldsReader = null;
                if (matchingSegmentReader != null)
                {
                    StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
                    if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader)
                    {
                        matchingFieldsReader = (CompressingStoredFieldsReader)fieldsReader;
                    }
                }

                int maxDoc = reader.MaxDoc();
                Bits liveDocs = reader.LiveDocs;

                if (matchingFieldsReader == null || matchingFieldsReader.Version != VERSION_CURRENT || matchingFieldsReader.CompressionMode != CompressionMode || matchingFieldsReader.ChunkSize != ChunkSize) // the way data is decompressed depends on the chunk size -  means reader version is not the same as the writer version
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Document doc = reader.Document(i);
                        AddDocument(doc, mergeState.FieldInfos);
                        ++docCount;
                        mergeState.checkAbort.Work(300);
                    }
                }
                else
                {
                    int docID = NextLiveDoc(0, liveDocs, maxDoc);
                    if (docID < maxDoc)
                    {
                        // not all docs were deleted
                        CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader.GetChunkIterator(docID);
                        int[] startOffsets = new int[0];
                        do
                        {
                            // go to the next chunk that contains docID
                            it.Next(docID);
                            // transform lengths into offsets
                            if (startOffsets.Length < it.ChunkDocs)
                            {
                                startOffsets = new int[ArrayUtil.Oversize(it.ChunkDocs, 4)];
                            }
                            for (int i = 1; i < it.ChunkDocs; ++i)
                            {
                                startOffsets[i] = startOffsets[i - 1] + it.Lengths[i - 1];
                            }

                            if (NumBufferedDocs == 0 && startOffsets[it.ChunkDocs - 1] < ChunkSize && startOffsets[it.ChunkDocs - 1] + it.Lengths[it.ChunkDocs - 1] >= ChunkSize && NextDeletedDoc(it.DocBase, liveDocs, it.DocBase + it.ChunkDocs) == it.DocBase + it.ChunkDocs) // no deletion in the chunk -  chunk is large enough -  chunk is small enough -  starting a new chunk
                            {
                                Debug.Assert(docID == it.DocBase);

                                // no need to decompress, just copy data
                                IndexWriter.WriteIndex(it.ChunkDocs, FieldsStream.FilePointer);
                                WriteHeader(this.DocBase, it.ChunkDocs, it.NumStoredFields, it.Lengths);
                                it.CopyCompressedData(FieldsStream);
                                this.DocBase += it.ChunkDocs;
                                docID = NextLiveDoc(it.DocBase + it.ChunkDocs, liveDocs, maxDoc);
                                docCount += it.ChunkDocs;
                                mergeState.checkAbort.Work(300 * it.ChunkDocs);
                            }
                            else
                            {
                                // decompress
                                it.Decompress();
                                if (startOffsets[it.ChunkDocs - 1] + it.Lengths[it.ChunkDocs - 1] != it.Bytes.Length)
                                {
                                    throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.ChunkDocs - 1] + it.Lengths[it.ChunkDocs - 1] + ", got " + it.Bytes.Length);
                                }
                                // copy non-deleted docs
                                for (; docID < it.DocBase + it.ChunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc))
                                {
                                    int diff = docID - it.DocBase;
                                    StartDocument(it.NumStoredFields[diff]);
                                    BufferedDocs.WriteBytes(it.Bytes.Bytes, it.Bytes.Offset + startOffsets[diff], it.Lengths[diff]);
                                    FinishDocument();
                                    ++docCount;
                                    mergeState.checkAbort.Work(300);
                                }
                            }
                        } while (docID < maxDoc);

                        it.CheckIntegrity();
                    }
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return docCount;
        }
        /// <summary>
        /// Default merge impl: append documents, mapping around
        ///  deletes
        /// </summary>
        public virtual TermStats Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, DocsEnum postings, FixedBitSet visitedDocs)
        {
            int df = 0;
            long totTF = 0;

            if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
            {
                while (true)
                {
                    int doc = postings.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    this.StartDoc(doc, -1);
                    this.FinishDoc();
                    df++;
                }
                totTF = -1;
            }
            else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS)
            {
                while (true)
                {
                    int doc = postings.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    int freq = postings.Freq();
                    this.StartDoc(doc, freq);
                    this.FinishDoc();
                    df++;
                    totTF += freq;
                }
            }
            else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                var postingsEnum = (DocsAndPositionsEnum)postings;
                while (true)
                {
                    int doc = postingsEnum.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    int freq = postingsEnum.Freq();
                    this.StartDoc(doc, freq);
                    totTF += freq;
                    for (int i = 0; i < freq; i++)
                    {
                        int position = postingsEnum.NextPosition();
                        BytesRef payload = postingsEnum.Payload;
                        this.AddPosition(position, payload, -1, -1);
                    }
                    this.FinishDoc();
                    df++;
                }
            }
            else
            {
                Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                var postingsEnum = (DocsAndPositionsEnum)postings;
                while (true)
                {
                    int doc = postingsEnum.NextDoc();
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    visitedDocs.Set(doc);
                    int freq = postingsEnum.Freq();
                    this.StartDoc(doc, freq);
                    totTF += freq;
                    for (int i = 0; i < freq; i++)
                    {
                        int position = postingsEnum.NextPosition();
                        BytesRef payload = postingsEnum.Payload;
                        this.AddPosition(position, payload, postingsEnum.StartOffset(), postingsEnum.EndOffset());
                    }
                    this.FinishDoc();
                    df++;
                }
            }
            return new TermStats(df, indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : totTF);
        }
Exemple #4
0
        private bool CommitMerge(MergePolicy.OneMerge merge, MergeState mergeState)
        {
            lock (this)
            {
                Debug.Assert(TestPoint("startCommitMerge"));

                if (HitOOM)
                {
                    throw new InvalidOperationException("this writer hit an OutOfMemoryError; cannot complete merge");
                }

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "commitMerge: " + SegString(merge.Segments) + " index=" + SegString());
                }

                Debug.Assert(merge.RegisterDone);

                // If merge was explicitly aborted, or, if rollback() or
                // rollbackTransaction() had been called since our merge
                // started (which results in an unqualified
                // deleter.refresh() call that will remove any index
                // file that current segments does not reference), we
                // abort this merge
                if (merge.Aborted)
                {
                    if (infoStream.IsEnabled("IW"))
                    {
                        infoStream.Message("IW", "commitMerge: skip: it was aborted");
                    }
                    // In case we opened and pooled a reader for this
                    // segment, drop it now.  this ensures that we close
                    // the reader before trying to delete any of its
                    // files.  this is not a very big deal, since this
                    // reader will never be used by any NRT reader, and
                    // another thread is currently running close(false)
                    // so it will be dropped shortly anyway, but not
                    // doing this  makes  MockDirWrapper angry in
                    // TestNRTThreads (LUCENE-5434):
                    readerPool.Drop(merge.Info_Renamed);
                    Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                    return false;
                }

                ReadersAndUpdates mergedUpdates = merge.Info_Renamed.Info.DocCount == 0 ? null : CommitMergedDeletesAndUpdates(merge, mergeState);
                //    System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMerge: mergedDeletes=" + mergedDeletes);

                // If the doc store we are using has been closed and
                // is in now compound format (but wasn't when we
                // started), then we will switch to the compound
                // format as well:

                Debug.Assert(!segmentInfos.Contains(merge.Info_Renamed));

                bool allDeleted = merge.Segments.Count == 0 || merge.Info_Renamed.Info.DocCount == 0 || (mergedUpdates != null && mergedUpdates.PendingDeleteCount == merge.Info_Renamed.Info.DocCount);

                if (infoStream.IsEnabled("IW"))
                {
                    if (allDeleted)
                    {
                        infoStream.Message("IW", "merged segment " + merge.Info_Renamed + " is 100% deleted" + (KeepFullyDeletedSegments_Renamed ? "" : "; skipping insert"));
                    }
                }

                bool dropSegment = allDeleted && !KeepFullyDeletedSegments_Renamed;

                // If we merged no segments then we better be dropping
                // the new segment:
                Debug.Assert(merge.Segments.Count > 0 || dropSegment);

                Debug.Assert(merge.Info_Renamed.Info.DocCount != 0 || KeepFullyDeletedSegments_Renamed || dropSegment);

                if (mergedUpdates != null)
                {
                    bool success = false;
                    try
                    {
                        if (dropSegment)
                        {
                            mergedUpdates.DropChanges();
                        }
                        // Pass false for assertInfoLive because the merged
                        // segment is not yet live (only below do we commit it
                        // to the segmentInfos):
                        readerPool.Release(mergedUpdates, false);
                        success = true;
                    }
                    finally
                    {
                        if (!success)
                        {
                            mergedUpdates.DropChanges();
                            readerPool.Drop(merge.Info_Renamed);
                        }
                    }
                }

                // Must do this after readerPool.release, in case an
                // exception is hit e.g. writing the live docs for the
                // merge segment, in which case we need to abort the
                // merge:
                segmentInfos.ApplyMergeChanges(merge, dropSegment);

                if (dropSegment)
                {
                    Debug.Assert(!segmentInfos.Contains(merge.Info_Renamed));
                    readerPool.Drop(merge.Info_Renamed);
                    Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                }

                bool success_ = false;
                try
                {
                    // Must close before checkpoint, otherwise IFD won't be
                    // able to delete the held-open files from the merge
                    // readers:
                    CloseMergeReaders(merge, false);
                    success_ = true;
                }
                finally
                {
                    // Must note the change to segmentInfos so any commits
                    // in-flight don't lose it (IFD will incRef/protect the
                    // new files we created):
                    if (success_)
                    {
                        Checkpoint();
                    }
                    else
                    {
                        try
                        {
                            Checkpoint();
                        }
                        catch (Exception)
                        {
                            // Ignore so we keep throwing original exception.
                        }
                    }
                }

                Deleter.DeletePendingFiles();

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "after commitMerge: " + SegString());
                }

                if (merge.MaxNumSegments != -1 && !dropSegment)
                {
                    // cascade the forceMerge:
                    if (!SegmentsToMerge.ContainsKey(merge.Info_Renamed))
                    {
                        SegmentsToMerge[merge.Info_Renamed] = false;
                    }
                }

                return true;
            }
        }
Exemple #5
0
        /// <summary>
        /// Does the actual (time-consuming) work of the merge,
        ///  but without holding synchronized lock on IndexWriter
        ///  instance
        /// </summary>
        private int MergeMiddle(MergePolicy.OneMerge merge)
        {
            merge.CheckAborted(directory);

            string mergedName = merge.Info_Renamed.Info.Name;

            IList<SegmentCommitInfo> sourceSegments = merge.Segments;

            IOContext context = new IOContext(merge.MergeInfo);

            MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory);
            TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory);

            if (infoStream.IsEnabled("IW"))
            {
                infoStream.Message("IW", "merging " + SegString(merge.Segments));
            }

            merge.Readers = new List<SegmentReader>();

            // this is try/finally to make sure merger's readers are
            // closed:
            bool success = false;
            try
            {
                int segUpto = 0;
                while (segUpto < sourceSegments.Count)
                {
                    SegmentCommitInfo info = sourceSegments[segUpto];

                    // Hold onto the "live" reader; we will use this to
                    // commit merged deletes
                    ReadersAndUpdates rld = readerPool.Get(info, true);

                    // Carefully pull the most recent live docs and reader
                    SegmentReader reader;
                    Bits liveDocs;
                    int delCount;

                    lock (this)
                    {
                        // Must sync to ensure BufferedDeletesStream cannot change liveDocs,
                        // pendingDeleteCount and field updates while we pull a copy:
                        reader = rld.GetReaderForMerge(context);
                        liveDocs = rld.ReadOnlyLiveDocs;
                        delCount = rld.PendingDeleteCount + info.DelCount;

                        Debug.Assert(reader != null);
                        Debug.Assert(rld.VerifyDocCounts());

                        if (infoStream.IsEnabled("IW"))
                        {
                            if (rld.PendingDeleteCount != 0)
                            {
                                infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount + " pendingDelCount=" + rld.PendingDeleteCount);
                            }
                            else if (info.DelCount != 0)
                            {
                                infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount);
                            }
                            else
                            {
                                infoStream.Message("IW", "seg=" + SegString(info) + " no deletes");
                            }
                        }
                    }

                    // Deletes might have happened after we pulled the merge reader and
                    // before we got a read-only copy of the segment's actual live docs
                    // (taking pending deletes into account). In that case we need to
                    // make a new reader with updated live docs and del count.
                    if (reader.NumDeletedDocs() != delCount)
                    {
                        // fix the reader's live docs and del count
                        Debug.Assert(delCount > reader.NumDeletedDocs()); // beware of zombies

                        SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.Info.DocCount - delCount);
                        bool released = false;
                        try
                        {
                            rld.Release(reader);
                            released = true;
                        }
                        finally
                        {
                            if (!released)
                            {
                                newReader.DecRef();
                            }
                        }

                        reader = newReader;
                    }

                    merge.Readers.Add(reader);
                    Debug.Assert(delCount <= info.Info.DocCount, "delCount=" + delCount + " info.docCount=" + info.Info.DocCount + " rld.pendingDeleteCount=" + rld.PendingDeleteCount + " info.getDelCount()=" + info.DelCount);
                    segUpto++;
                }

                //      System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders());

                // we pass merge.getMergeReaders() instead of merge.readers to allow the
                // OneMerge to return a view over the actual segments to merge
                SegmentMerger merger = new SegmentMerger(merge.MergeReaders, merge.Info_Renamed.Info, infoStream, dirWrapper, Config_Renamed.TermIndexInterval, checkAbort, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge);

                merge.CheckAborted(directory);

                // this is where all the work happens:
                MergeState mergeState;
                bool success3 = false;
                try
                {
                    if (!merger.ShouldMerge())
                    {
                        // would result in a 0 document segment: nothing to merge!
                        mergeState = new MergeState(new List<AtomicReader>(), merge.Info_Renamed.Info, infoStream, checkAbort);
                    }
                    else
                    {
                        mergeState = merger.Merge();
                    }
                    success3 = true;
                }
                finally
                {
                    if (!success3)
                    {
                        lock (this)
                        {
                            Deleter.Refresh(merge.Info_Renamed.Info.Name);
                        }
                    }
                }
                Debug.Assert(mergeState.SegmentInfo == merge.Info_Renamed.Info);
                merge.Info_Renamed.Info.Files = new HashSet<string>(dirWrapper.CreatedFiles);

                // Record which codec was used to write the segment

                if (infoStream.IsEnabled("IW"))
                {
                    if (merge.Info_Renamed.Info.DocCount == 0)
                    {
                        infoStream.Message("IW", "merge away fully deleted segments");
                    }
                    else
                    {
                        infoStream.Message("IW", "merge codec=" + Codec + " docCount=" + merge.Info_Renamed.Info.DocCount + "; merged segment has " + (mergeState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (mergeState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (mergeState.FieldInfos.HasProx() ? "freqs" : "no freqs"));
                    }
                }

                // Very important to do this before opening the reader
                // because codec must know if prox was written for
                // this segment:
                //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
                bool useCompoundFile;
                lock (this) // Guard segmentInfos
                {
                    useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, merge.Info_Renamed);
                }

                if (useCompoundFile)
                {
                    success = false;

                    ICollection<string> filesToRemove = merge.Info_Renamed.Files();

                    try
                    {
                        filesToRemove = CreateCompoundFile(infoStream, directory, checkAbort, merge.Info_Renamed.Info, context);
                        success = true;
                    }
                    catch (System.IO.IOException ioe)
                    {
                        lock (this)
                        {
                            if (merge.Aborted)
                            {
                                // this can happen if rollback or close(false)
                                // is called -- fall through to logic below to
                                // remove the partially created CFS:
                            }
                            else
                            {
                                HandleMergeException(ioe, merge);
                            }
                        }
                    }
                    catch (Exception t)
                    {
                        HandleMergeException(t, merge);
                    }
                    finally
                    {
                        if (!success)
                        {
                            if (infoStream.IsEnabled("IW"))
                            {
                                infoStream.Message("IW", "hit exception creating compound file during merge");
                            }

                            lock (this)
                            {
                                Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION));
                                Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
                                Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                            }
                        }
                    }

                    // So that, if we hit exc in deleteNewFiles (next)
                    // or in commitMerge (later), we close the
                    // per-segment readers in the finally clause below:
                    success = false;

                    lock (this)
                    {
                        // delete new non cfs files directly: they were never
                        // registered with IFD
                        Deleter.DeleteNewFiles(filesToRemove);

                        if (merge.Aborted)
                        {
                            if (infoStream.IsEnabled("IW"))
                            {
                                infoStream.Message("IW", "abort merge after building CFS");
                            }
                            Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION));
                            Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
                            return 0;
                        }
                    }

                    merge.Info_Renamed.Info.UseCompoundFile = true;
                }
                else
                {
                    // So that, if we hit exc in commitMerge (later),
                    // we close the per-segment readers in the finally
                    // clause below:
                    success = false;
                }

                // Have codec write SegmentInfo.  Must do this after
                // creating CFS so that 1) .si isn't slurped into CFS,
                // and 2) .si reflects useCompoundFile=true change
                // above:
                bool success2 = false;
                try
                {
                    Codec.SegmentInfoFormat().SegmentInfoWriter.Write(directory, merge.Info_Renamed.Info, mergeState.FieldInfos, context);
                    success2 = true;
                }
                finally
                {
                    if (!success2)
                    {
                        lock (this)
                        {
                            Deleter.DeleteNewFiles(merge.Info_Renamed.Files());
                        }
                    }
                }

                // TODO: ideally we would freeze merge.info here!!
                // because any changes after writing the .si will be
                // lost...

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", string.Format(CultureInfo.InvariantCulture, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.Info_Renamed.SizeInBytes() / 1024.0 / 1024.0, merge.EstimatedMergeBytes / 1024 / 1024.0));
                }

                IndexReaderWarmer mergedSegmentWarmer = Config_Renamed.MergedSegmentWarmer;
                if (PoolReaders && mergedSegmentWarmer != null && merge.Info_Renamed.Info.DocCount != 0)
                {
                    ReadersAndUpdates rld = readerPool.Get(merge.Info_Renamed, true);
                    SegmentReader sr = rld.GetReader(IOContext.READ);
                    try
                    {
                        mergedSegmentWarmer.Warm(sr);
                    }
                    finally
                    {
                        lock (this)
                        {
                            rld.Release(sr);
                            readerPool.Release(rld);
                        }
                    }
                }

                // Force READ context because we merge deletes onto
                // this reader:
                if (!CommitMerge(merge, mergeState))
                {
                    // commitMerge will return false if this merge was
                    // aborted
                    return 0;
                }

                success = true;
            }
            finally
            {
                // Readers are already closed in commitMerge if we didn't hit
                // an exc:
                if (!success)
                {
                    CloseMergeReaders(merge, true);
                }
            }

            return merge.Info_Renamed.Info.DocCount;
        }
Exemple #6
0
 private void MaybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, int docUpto, MergedDeletesAndUpdates holder, string[] mergingFields, DocValuesFieldUpdates[] dvFieldUpdates, DocValuesFieldUpdates.Iterator[] updatesIters, int curDoc)
 {
     int newDoc = -1;
     for (int idx = 0; idx < mergingFields.Length; idx++)
     {
         DocValuesFieldUpdates.Iterator updatesIter = updatesIters[idx];
         if (updatesIter.Doc() == curDoc) // document has an update
         {
             if (holder.MergedDeletesAndUpdates_Renamed == null)
             {
                 holder.Init(readerPool, merge, mergeState, false);
             }
             if (newDoc == -1) // map once per all field updates, but only if there are any updates
             {
                 newDoc = holder.DocMap.Map(docUpto);
             }
             DocValuesFieldUpdates dvUpdates = dvFieldUpdates[idx];
             dvUpdates.Add(newDoc, updatesIter.Value());
             updatesIter.NextDoc(); // advance to next document
         }
         else
         {
             Debug.Assert(updatesIter.Doc() > curDoc, "field=" + mergingFields[idx] + " updateDoc=" + updatesIter.Doc() + " curDoc=" + curDoc);
         }
     }
 }
Exemple #7
0
        /// <summary>
        /// Carefully merges deletes and updates for the segments we just merged. this
        /// is tricky because, although merging will clear all deletes (compacts the
        /// documents) and compact all the updates, new deletes and updates may have
        /// been flushed to the segments since the merge was started. this method
        /// "carries over" such new deletes and updates onto the newly merged segment,
        /// and saves the resulting deletes and updates files (incrementing the delete
        /// and DV generations for merge.info). If no deletes were flushed, no new
        /// deletes file is saved.
        /// </summary>
        private ReadersAndUpdates CommitMergedDeletesAndUpdates(MergePolicy.OneMerge merge, MergeState mergeState)
        {
            lock (this)
            {
                Debug.Assert(TestPoint("startCommitMergeDeletes"));

                IList<SegmentCommitInfo> sourceSegments = merge.Segments;

                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "commitMergeDeletes " + SegString(merge.Segments));
                }

                // Carefully merge deletes that occurred after we
                // started merging:
                int docUpto = 0;
                long minGen = long.MaxValue;

                // Lazy init (only when we find a delete to carry over):
                MergedDeletesAndUpdates holder = new MergedDeletesAndUpdates();
                DocValuesFieldUpdates.Container mergedDVUpdates = new DocValuesFieldUpdates.Container();

                for (int i = 0; i < sourceSegments.Count; i++)
                {
                    SegmentCommitInfo info = sourceSegments[i];
                    minGen = Math.Min(info.BufferedDeletesGen, minGen);
                    int docCount = info.Info.DocCount;
                    Bits prevLiveDocs = merge.Readers[i].LiveDocs;
                    ReadersAndUpdates rld = readerPool.Get(info, false);
                    // We hold a ref so it should still be in the pool:
                    Debug.Assert(rld != null, "seg=" + info.Info.Name);
                    Bits currentLiveDocs = rld.LiveDocs;
                    IDictionary<string, DocValuesFieldUpdates> mergingFieldUpdates = rld.MergingFieldUpdates;
                    string[] mergingFields;
                    DocValuesFieldUpdates[] dvFieldUpdates;
                    DocValuesFieldUpdates.Iterator[] updatesIters;
                    if (mergingFieldUpdates.Count == 0)
                    {
                        mergingFields = null;
                        updatesIters = null;
                        dvFieldUpdates = null;
                    }
                    else
                    {
                        mergingFields = new string[mergingFieldUpdates.Count];
                        dvFieldUpdates = new DocValuesFieldUpdates[mergingFieldUpdates.Count];
                        updatesIters = new DocValuesFieldUpdates.Iterator[mergingFieldUpdates.Count];
                        int idx = 0;
                        foreach (KeyValuePair<string, DocValuesFieldUpdates> e in mergingFieldUpdates)
                        {
                            string field = e.Key;
                            DocValuesFieldUpdates updates = e.Value;
                            mergingFields[idx] = field;
                            dvFieldUpdates[idx] = mergedDVUpdates.GetUpdates(field, updates.Type);
                            if (dvFieldUpdates[idx] == null)
                            {
                                dvFieldUpdates[idx] = mergedDVUpdates.NewUpdates(field, updates.Type, mergeState.SegmentInfo.DocCount);
                            }
                            updatesIters[idx] = updates.GetIterator();
                            updatesIters[idx].NextDoc(); // advance to first update doc
                            ++idx;
                        }
                    }
                    //      System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: info=" + info + ", mergingUpdates=" + mergingUpdates);

                    if (prevLiveDocs != null)
                    {
                        // If we had deletions on starting the merge we must
                        // still have deletions now:
                        Debug.Assert(currentLiveDocs != null);
                        Debug.Assert(prevLiveDocs.Length() == docCount);
                        Debug.Assert(currentLiveDocs.Length() == docCount);

                        // There were deletes on this segment when the merge
                        // started.  The merge has collapsed away those
                        // deletes, but, if new deletes were flushed since
                        // the merge started, we must now carefully keep any
                        // newly flushed deletes but mapping them to the new
                        // docIDs.

                        // Since we copy-on-write, if any new deletes were
                        // applied after merging has started, we can just
                        // check if the before/after liveDocs have changed.
                        // If so, we must carefully merge the liveDocs one
                        // doc at a time:
                        if (currentLiveDocs != prevLiveDocs)
                        {
                            // this means this segment received new deletes
                            // since we started the merge, so we
                            // must merge them:
                            for (int j = 0; j < docCount; j++)
                            {
                                if (!prevLiveDocs.Get(j))
                                {
                                    Debug.Assert(!currentLiveDocs.Get(j));
                                }
                                else
                                {
                                    if (!currentLiveDocs.Get(j))
                                    {
                                        if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs)
                                        {
                                            holder.Init(readerPool, merge, mergeState, true);
                                        }
                                        holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto));
                                        if (mergingFields != null) // advance all iters beyond the deleted document
                                        {
                                            SkipDeletedDoc(updatesIters, j);
                                        }
                                    }
                                    else if (mergingFields != null)
                                    {
                                        MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
                                    }
                                    docUpto++;
                                }
                            }
                        }
                        else if (mergingFields != null)
                        {
                            // need to check each non-deleted document if it has any updates
                            for (int j = 0; j < docCount; j++)
                            {
                                if (prevLiveDocs.Get(j))
                                {
                                    // document isn't deleted, check if any of the fields have an update to it
                                    MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
                                    // advance docUpto for every non-deleted document
                                    docUpto++;
                                }
                                else
                                {
                                    // advance all iters beyond the deleted document
                                    SkipDeletedDoc(updatesIters, j);
                                }
                            }
                        }
                        else
                        {
                            docUpto += info.Info.DocCount - info.DelCount - rld.PendingDeleteCount;
                        }
                    }
                    else if (currentLiveDocs != null)
                    {
                        Debug.Assert(currentLiveDocs.Length() == docCount);
                        // this segment had no deletes before but now it
                        // does:
                        for (int j = 0; j < docCount; j++)
                        {
                            if (!currentLiveDocs.Get(j))
                            {
                                if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs)
                                {
                                    holder.Init(readerPool, merge, mergeState, true);
                                }
                                holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto));
                                if (mergingFields != null) // advance all iters beyond the deleted document
                                {
                                    SkipDeletedDoc(updatesIters, j);
                                }
                            }
                            else if (mergingFields != null)
                            {
                                MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
                            }
                            docUpto++;
                        }
                    }
                    else if (mergingFields != null)
                    {
                        // no deletions before or after, but there were updates
                        for (int j = 0; j < docCount; j++)
                        {
                            MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j);
                            // advance docUpto for every non-deleted document
                            docUpto++;
                        }
                    }
                    else
                    {
                        // No deletes or updates before or after
                        docUpto += info.Info.DocCount;
                    }
                }

                Debug.Assert(docUpto == merge.Info_Renamed.Info.DocCount);

                if (mergedDVUpdates.Any())
                {
                    //      System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates);
                    bool success = false;
                    try
                    {
                        // if any error occurs while writing the field updates we should release
                        // the info, otherwise it stays in the pool but is considered not "live"
                        // which later causes false exceptions in pool.dropAll().
                        // NOTE: currently this is the only place which throws a true
                        // IOException. If this ever changes, we need to extend that try/finally
                        // block to the rest of the method too.
                        holder.MergedDeletesAndUpdates_Renamed.WriteFieldUpdates(directory, mergedDVUpdates);
                        success = true;
                    }
                    finally
                    {
                        if (!success)
                        {
                            holder.MergedDeletesAndUpdates_Renamed.DropChanges();
                            readerPool.Drop(merge.Info_Renamed);
                        }
                    }
                }

                if (infoStream.IsEnabled("IW"))
                {
                    if (holder.MergedDeletesAndUpdates_Renamed == null)
                    {
                        infoStream.Message("IW", "no new deletes or field updates since merge started");
                    }
                    else
                    {
                        string msg = holder.MergedDeletesAndUpdates_Renamed.PendingDeleteCount + " new deletes";
                        if (mergedDVUpdates.Any())
                        {
                            msg += " and " + mergedDVUpdates.Size() + " new field updates";
                        }
                        msg += " since merge started";
                        infoStream.Message("IW", msg);
                    }
                }

                merge.Info_Renamed.BufferedDeletesGen = minGen;

                return holder.MergedDeletesAndUpdates_Renamed;
            }
        }
Exemple #8
0
 internal void Init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, bool initWritableLiveDocs)
 {
     if (MergedDeletesAndUpdates_Renamed == null)
     {
         MergedDeletesAndUpdates_Renamed = readerPool.Get(merge.Info_Renamed, true);
         DocMap = merge.GetDocMap(mergeState);
         Debug.Assert(DocMap.IsConsistent(merge.Info_Renamed.Info.DocCount));
     }
     if (initWritableLiveDocs && !InitializedWritableLiveDocs)
     {
         MergedDeletesAndUpdates_Renamed.InitWritableLiveDocs();
         this.InitializedWritableLiveDocs = true;
     }
 }
Exemple #9
0
 /// <summary>
 /// Expert: If <seealso cref="#getMergeReaders()"/> reorders document IDs, this method
 ///  must be overridden to return a mapping from the <i>natural</i> doc ID
 ///  (the doc ID that would result from a natural merge) to the actual doc
 ///  ID. this mapping is used to apply deletions that happened during the
 ///  merge to the new segment.
 /// </summary>
 public virtual DocMap GetDocMap(MergeState mergeState)
 {
     return new DocMapAnonymousInnerClassHelper(this);
 }
 private static bool Equals(MergeState.DocMap map1, MergeState.DocMap map2)
 {
     if (map1.MaxDoc != map2.MaxDoc)
     {
         return false;
     }
     for (int i = 0; i < map1.MaxDoc; ++i)
     {
         if (map1.Get(i) != map2.Get(i))
         {
             return false;
         }
     }
     return true;
 }
Exemple #11
0
 /// <summary>
 /// Expert: If <seealso cref="#getMergeReaders()"/> reorders document IDs, this method
 ///  must be overridden to return a mapping from the <i>natural</i> doc ID
 ///  (the doc ID that would result from a natural merge) to the actual doc
 ///  ID. this mapping is used to apply deletions that happened during the
 ///  merge to the new segment.
 /// </summary>
 public virtual DocMap GetDocMap(MergeState mergeState)
 {
     return(new DocMapAnonymousInnerClassHelper(this));
 }
Exemple #12
0
        public virtual void TestMerge()
        {
            Codec       codec = Codec.Default;
            SegmentInfo si    = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null);

            SegmentMerger merger     = new SegmentMerger(Arrays.AsList <AtomicReader>(Reader1, Reader2), si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random()), true);
            MergeState    mergeState = merger.Merge();
            int           docsMerged = mergeState.SegmentInfo.DocCount;

            Assert.IsTrue(docsMerged == 2);
            //Should be able to open a new SegmentReader against the new directory
            SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));

            Assert.IsTrue(mergedReader != null);
            Assert.IsTrue(mergedReader.NumDocs == 2);
            Document newDoc1 = mergedReader.Document(0);

            Assert.IsTrue(newDoc1 != null);
            //There are 2 unstored fields on the document
            Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count);
            Document newDoc2 = mergedReader.Document(1);

            Assert.IsTrue(newDoc2 != null);
            Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count);

            DocsEnum termDocs = TestUtil.Docs(Random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0);

            Assert.IsTrue(termDocs != null);
            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            int tvCount = 0;

            foreach (FieldInfo fieldInfo in mergedReader.FieldInfos)
            {
                if (fieldInfo.HasVectors)
                {
                    tvCount++;
                }
            }

            //System.out.println("stored size: " + stored.Size());
            Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector");

            Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY);

            Assert.IsNotNull(vector);
            Assert.AreEqual(3, vector.Count);
            TermsEnum termsEnum = vector.GetIterator(null);

            int i = 0;

            while (termsEnum.Next() != null)
            {
                string term = termsEnum.Term.Utf8ToString();
                int    freq = (int)termsEnum.TotalTermFreq;
                //System.out.println("Term: " + term + " Freq: " + freq);
                Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term, StringComparison.Ordinal) != -1);
                Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq);
                i++;
            }

            TestSegmentReader.CheckNorms(mergedReader);
            mergedReader.Dispose();
        }