private void Initialize(SegmentInfo si) { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (Directory().FileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) deletedDocs = new BitVector(Directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } }
protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc) { lock (parent) { this.deletedDocs = parent.deletedDocs; } }
public SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.freqStream.Clone(); this.deletedDocs = parent.deletedDocs; this.skipInterval = parent.tis.GetSkipInterval(); }
protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc()) { lock (parent) { this.deletedDocs = parent.deletedDocs; } }
public SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput)parent.freqStream.Clone(); this.deletedDocs = parent.deletedDocs; this.skipInterval = parent.tis.GetSkipInterval(); }
protected internal AllTermDocs(SegmentReader parent) { lock (parent) { this.deletedDocs = parent.deletedDocs; } this.maxDoc = parent.MaxDoc(); }
protected internal override void DoDelete(int docNum) { if (deletedDocs == null) deletedDocs = new BitVector(MaxDoc()); deletedDocsDirty = true; undeleteAll = false; deletedDocs.Set(docNum); }
/*protected internal*/ public SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().SkipInterval; this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels; }
public /*protected internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput)parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().SkipInterval; this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels; }
public /*protected internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().GetSkipInterval(); this.maxSkipLevels = parent.core.GetTermsReader().GetMaxSkipLevels(); }
protected internal override void DoDelete(int docNum) { if (deletedDocs == null) { deletedDocs = new BitVector(MaxDoc()); } deletedDocsDirty = true; undeleteAll = false; if (!deletedDocs.GetAndSet(docNum)) { pendingDeleteCount++; } }
private void LoadDeletedDocs() { // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.Count() > MaxDoc()) { throw new CorruptIndexException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } } }
private void LoadDeletedDocs() { // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); System.Diagnostics.Debug.Assert(si.GetDelCount() == deletedDocs.Count(), "delete count mismatch: info=" + si.GetDelCount() + " vs BitVector=" + deletedDocs.Count()); // Verify # deletes does not exceed maxDoc for this segment: System.Diagnostics.Debug.Assert(si.GetDelCount() <= MaxDoc(), "delete count mismatch: " + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } else { System.Diagnostics.Debug.Assert(si.GetDelCount() == 0); } }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(merge.registerDone); if (hitOOM) return false; if (infoStream != null) Message("CommitMerge: " + merge.SegString(directory)); // If merge was explicitly aborted, or, if abort() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); System.Diagnostics.Debug.Assert(merge.increfDone); DecrefMergeSegments(merge); deleter.Refresh(merge.info.name); return false; } bool success = false; int start; try { SegmentInfos sourceSegmentsClone = merge.segmentsClone; SegmentInfos sourceSegments = merge.segments; start = EnsureContiguousMerge(merge); if (infoStream != null) Message("commitMerge " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: BitVector deletes = null; int docUpto = 0; int numSegmentsToMerge = sourceSegments.Count; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo previousInfo = sourceSegmentsClone.Info(i); SegmentInfo currentInfo = sourceSegments.Info(i); System.Diagnostics.Debug.Assert(currentInfo.docCount == previousInfo.docCount); int docCount = currentInfo.docCount; if (previousInfo.HasDeletions()) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. System.Diagnostics.Debug.Assert(currentInfo.HasDeletions()); // Load deletes present @ start of merge, for this segment: BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.GetDelFileName()); if (!currentInfo.GetDelFileName().Equals(previousInfo.GetDelFileName())) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (previousDeletes.Get(j)) System.Diagnostics.Debug.Assert(currentDeletes.Get(j)); else { if (currentDeletes.Get(j)) deletes.Set(docUpto); docUpto++; } } } else docUpto += docCount - previousDeletes.Count(); } else if (currentInfo.HasDeletions()) { // This segment had no deletes before but now it // does: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(directory, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (currentDeletes.Get(j)) deletes.Set(docUpto); docUpto++; } } // No deletes before or after else docUpto += currentInfo.docCount; merge.CheckAborted(directory); } if (deletes != null) { merge.info.AdvanceDelGen(); deletes.Write(directory, merge.info.GetDelFileName()); } success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception creating merged deletes file"); deleter.Refresh(merge.info.name); } } // Simple optimization: if the doc store we are using // has been closed and is in now compound format (but // wasn't when we started), then we will switch to the // compound format as well: System.String mergeDocStoreSegment = merge.info.GetDocStoreSegment(); if (mergeDocStoreSegment != null && !merge.info.GetDocStoreIsCompoundFile()) { int size = segmentInfos.Count; for (int i = 0; i < size; i++) { SegmentInfo info = segmentInfos.Info(i); System.String docStoreSegment = info.GetDocStoreSegment(); if (docStoreSegment != null && docStoreSegment.Equals(mergeDocStoreSegment) && info.GetDocStoreIsCompoundFile()) { merge.info.SetDocStoreIsCompoundFile(true); break; } } } success = false; SegmentInfos rollback = null; try { rollback = (SegmentInfos) segmentInfos.Clone(); ((System.Collections.IList) ((System.Collections.ArrayList) segmentInfos).GetRange(start, start + merge.segments.Count - start)).Clear(); segmentInfos.Insert(start, merge.info); Checkpoint(); success = true; } finally { if (!success && rollback != null) { if (infoStream != null) Message("hit exception when checkpointing after merge"); segmentInfos.Clear(); segmentInfos.AddRange(rollback); DeletePartialSegmentsFile(); deleter.Refresh(merge.info.name); } } if (merge.optimize) segmentsToOptimize.Add(merge.info, merge.info); // Must checkpoint before decrefing so any newly // referenced files in the new merge.info are incref'd // first: deleter.Checkpoint(segmentInfos, autoCommit); DecrefMergeSegments(merge); return true; } }
private void LoadDeletedDocs() { // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); deletedDocsRef = new Ref(); System.Diagnostics.Debug.Assert(CheckDeletedCounts()); } else System.Diagnostics.Debug.Assert(si.GetDelCount() == 0); }
private void Initialize(SegmentInfo si) { segment = si.name; this.si = si; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); // Verify two sources of "maxDoc" agree: if (fieldsReader.Size() != si.docCount) { throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.Count() > MaxDoc()) { throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name); } } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq"); proxStream = cfsDir.OpenInput(segment + ".prx"); OpenNorms(cfsDir); if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
/// <summary> Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental.</summary> /// <param name="bv">BitVector to clone /// </param> /// <returns> New BitVector /// </returns> protected internal virtual BitVector CloneDeletedDocs(BitVector bv) { return (BitVector) bv.Clone(); }
protected internal override void DoClose() { bool hasReferencedReader = (referencedSegmentReader != null); termVectorsLocal.Close(); //termVectorsLocal.Clear(); if (hasReferencedReader) { referencedSegmentReader.DecRefReaderNotNorms(); referencedSegmentReader = null; } deletedDocs = null; // close the single norms stream if (singleNormStream != null) { // we can close this stream, even if the norms // are shared, because every reader has it's own // singleNormStream singleNormStream.Close(); singleNormStream = null; } // re-opened SegmentReaders have their own instance of FieldsReader if (fieldsReader != null) { fieldsReader.Close(); } if (!hasReferencedReader) { // close everything, nothing is shared anymore with other readers if (tis != null) { tis.Close(); } if (freqStream != null) freqStream.Close(); if (proxStream != null) proxStream.Close(); if (termVectorsReaderOrig != null) termVectorsReaderOrig.Close(); if (cfsReader != null) cfsReader.Close(); if (storeCFSReader != null) storeCFSReader.Close(); // maybe close directory base.DoClose(); } }
protected internal override void DoClose() { termVectorsLocal.Close(); fieldsReaderLocal.Close(); if (deletedDocs != null) { deletedDocsRef.DecRef(); // null so if an app hangs on to us we still free most ram deletedDocs = null; } System.Collections.IEnumerator it = norms.Values.GetEnumerator(); while (it.MoveNext()) { ((Norm) it.Current).DecRef(); } if (core != null) { core.DecRef(); } }
protected internal override void DoUndeleteAll() { deletedDocs = null; deletedDocsDirty = false; undeleteAll = true; }
protected internal override void DoClose() { bool hasReferencedReader = (referencedSegmentReader != null); termVectorsLocal.Close(); //termVectorsLocal.Clear(); if (hasReferencedReader) { referencedSegmentReader.DecRefReaderNotNorms(); referencedSegmentReader = null; } deletedDocs = null; // close the single norms stream if (singleNormStream != null) { // we can close this stream, even if the norms // are shared, because every reader has it's own // singleNormStream singleNormStream.Close(); singleNormStream = null; } // re-opened SegmentReaders have their own instance of FieldsReader if (fieldsReader != null) { fieldsReader.Close(); } if (!hasReferencedReader) { // close everything, nothing is shared anymore with other readers if (tis != null) { tis.Close(); } if (freqStream != null) { freqStream.Close(); } if (proxStream != null) { proxStream.Close(); } if (termVectorsReaderOrig != null) { termVectorsReaderOrig.Close(); } if (cfsReader != null) { cfsReader.Close(); } if (storeCFSReader != null) { storeCFSReader.Close(); } // maybe close directory base.DoClose(); } }
public int GetDelCount() { if (delCount == - 1) { if (HasDeletions()) { System.String delFileName = GetDelFileName(); delCount = new BitVector(dir, delFileName).Count(); } else delCount = 0; } System.Diagnostics.Debug.Assert(delCount <= docCount); return delCount; }
protected internal override void DoDelete(int docNum) { if (deletedDocs == null) deletedDocs = new BitVector(MaxDoc()); deletedDocsDirty = true; undeleteAll = false; if (!deletedDocs.GetAndSet(docNum)) pendingDeleteCount++; }
protected internal override void DoDelete(int docNum) { if (deletedDocs == null) { deletedDocs = new BitVector(MaxDoc()); deletedDocsRef = new Ref(); } // there is more than 1 SegmentReader with a reference to this // deletedDocs BitVector so decRef the current deletedDocsRef, // clone the BitVector, create a new deletedDocsRef if (deletedDocsRef.RefCount() > 1) { Ref oldRef = deletedDocsRef; deletedDocs = CloneDeletedDocs(deletedDocs); deletedDocsRef = new Ref(); oldRef.DecRef(); } deletedDocsDirty = true; if (!deletedDocs.GetAndSet(docNum)) pendingDeleteCount++; }
protected internal override void DoUndeleteAll() { deletedDocsDirty = false; if (deletedDocs != null) { System.Diagnostics.Debug.Assert(deletedDocsRef != null); deletedDocsRef.DecRef(); deletedDocs = null; deletedDocsRef = null; pendingDeleteCount = 0; si.ClearDelGen(); si.SetDelCount(0); } else { System.Diagnostics.Debug.Assert(deletedDocsRef == null); System.Diagnostics.Debug.Assert(pendingDeleteCount == 0); } }
private void LoadDeletedDocs() { // NOTE: the bitvector is stored using the regular directory, not cfs if (HasDeletions(si)) { deletedDocs = new BitVector(Directory(), si.GetDelFileName()); deletedDocsRef = new Ref(); System.Diagnostics.Debug.Assert(si.GetDelCount() == deletedDocs.Count(), "delete count mismatch: info=" + si.GetDelCount() + " vs BitVector=" + deletedDocs.Count()); // Verify # deletes does not exceed maxDoc for this // segment: System.Diagnostics.Debug.Assert(si.GetDelCount() <= MaxDoc(), "delete count mismatch: " + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment + si.name"); } else System.Diagnostics.Debug.Assert(si.GetDelCount() == 0); }
/** Carefully merges deletes for the segments we just * merged. This is tricky because, although merging will * clear all deletes (compacts the documents), new * deletes may have been flushed to the segments since * the merge was started. This method "carries over" * such new deletes onto the newly merged segment, and * saves the resulting deletes file (incrementing the * delete generation for merge.info). If no deletes were * flushed, no new deletes file is saved. */ private void CommitMergedDeletes(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes")); SegmentInfos sourceSegmentsClone = merge.segmentsClone; SegmentInfos sourceSegments = merge.segments; if (infoStream != null) Message("commitMergeDeletes " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: BitVector deletes = null; int docUpto = 0; int delCount = 0; int numSegmentsToMerge = sourceSegments.Count; for (int i = 0; i < numSegmentsToMerge; i++) { SegmentInfo previousInfo = sourceSegmentsClone.Info(i); SegmentInfo currentInfo = sourceSegments.Info(i); System.Diagnostics.Debug.Assert(currentInfo.docCount == previousInfo.docCount); int docCount = currentInfo.docCount; if (previousInfo.HasDeletions()) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. System.Diagnostics.Debug.Assert(currentInfo.HasDeletions()); // Load deletes present @ start of merge, for this segment: BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.GetDelFileName()); if (!currentInfo.GetDelFileName().Equals(previousInfo.GetDelFileName())) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (previousDeletes.Get(j)) System.Diagnostics.Debug.Assert(currentDeletes.Get(j)); else { if (currentDeletes.Get(j)) { deletes.Set(docUpto); delCount++; } docUpto++; } } } else docUpto += docCount - previousDeletes.Count(); } else if (currentInfo.HasDeletions()) { // This segment had no deletes before but now it // does: if (deletes == null) deletes = new BitVector(merge.info.docCount); BitVector currentDeletes = new BitVector(directory, currentInfo.GetDelFileName()); for (int j = 0; j < docCount; j++) { if (currentDeletes.Get(j)) { deletes.Set(docUpto); delCount++; } docUpto++; } } else // No deletes before or after docUpto += currentInfo.docCount; } if (deletes != null) { merge.info.AdvanceDelGen(); Message("commit merge deletes to " + merge.info.GetDelFileName()); deletes.Write(directory, merge.info.GetDelFileName()); merge.info.SetDelCount(delCount); System.Diagnostics.Debug.Assert(delCount == deletes.Count()); } } }