protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc()) { lock (parent) { this.deletedDocs = parent.deletedDocs; } }
public /*protected internal*/ SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.core.freqStream.Clone(); lock (parent) { this.deletedDocs = parent.deletedDocs; } this.skipInterval = parent.core.GetTermsReader().GetSkipInterval(); this.maxSkipLevels = parent.core.GetTermsReader().GetMaxSkipLevels(); }
internal MySegmentTermDocs(SegmentReader p):base(p) { }
public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek) { InitBlock(enclosingInstance); this.in_Renamed = in_Renamed; this.number = number; this.normSeek = normSeek; }
private void InitBlock(SegmentReader enclosingInstance) { this.enclosingInstance = enclosingInstance; }
public FieldsReaderLocal(SegmentReader enclosingInstance) { InitBlock(enclosingInstance); }
/// <summary>Carefully merges deletes for the segments we just /// merged. This is tricky because, although merging will /// clear all deletes (compacts the documents), new /// deletes may have been flushed to the segments since /// the merge was started. This method "carries over" /// such new deletes onto the newly merged segment, and /// saves the resulting deletes file (incrementing the /// delete generation for merge.info). If no deletes were /// flushed, no new deletes file is saved. /// </summary> private void CommitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMergeDeletes")); SegmentInfos sourceSegments = merge.segments; if (infoStream != null) Message("commitMergeDeletes " + merge.SegString(directory)); // Carefully merge deletes that occurred after we // started merging: int docUpto = 0; int delCount = 0; for (int i = 0; i < sourceSegments.Count; i++) { SegmentInfo info = sourceSegments.Info(i); int docCount = info.docCount; SegmentReader previousReader = merge.readersClone[i]; SegmentReader currentReader = merge.readers[i]; if (previousReader.HasDeletions()) { // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. if (currentReader.NumDeletedDocs() > previousReader.NumDeletedDocs()) { // This means this segment has had new deletes // committed since we started the merge, so we // must merge them: for (int j = 0; j < docCount; j++) { if (previousReader.IsDeleted(j)) { System.Diagnostics.Debug.Assert(currentReader.IsDeleted(j)); } else { if (currentReader.IsDeleted(j)) { mergeReader.DoDelete(docUpto); delCount++; } docUpto++; } } } else { docUpto += docCount - previousReader.NumDeletedDocs(); } } else if (currentReader.HasDeletions()) { // This segment had no deletes before but now it // does: for (int j = 0; j < docCount; j++) { if (currentReader.IsDeleted(j)) { mergeReader.DoDelete(docUpto); delCount++; } docUpto++; } } // No deletes before or after else docUpto += info.docCount; } System.Diagnostics.Debug.Assert(mergeReader.NumDeletedDocs() == delCount); mergeReader.hasChanges = delCount > 0; } }
internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.IDictionary oldNormsCache, bool doClone, int termInfosIndexDivisor):base(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor) { }
/// <summary> Test stored fields for a segment.</summary> private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { Status.StoredFieldStatus status = new Status.StoredFieldStatus(); try { if (infoStream != null) { infoStream.Write(" test: stored fields......."); } // Scan stored fields for all documents for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; Document doc = reader.Document(j); status.totFields += doc.GetFields().Count; } } // Validate docCount if (status.docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); } Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
// Used by near real-time search internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) { this.directory = writer.GetDirectory(); this.readOnly = true; segmentInfos = infos; segmentInfosStart = (SegmentInfos) infos.Clone(); this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: SupportClass.CollectionsHelper.AddAllIfNotContains(synced, infos.Files(directory, true)); } // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Count; SegmentReader[] readers = new SegmentReader[numSegments]; Directory dir = writer.GetDirectory(); int upto = 0; for (int i = 0; i < numSegments; i++) { bool success = false; try { SegmentInfo info = infos.Info(i); if (info.dir == dir) { readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor); } success = true; } finally { if (!success) { // Close all readers we had opened: for (upto--; upto >= 0; upto--) { try { readers[upto].Close(); } catch (System.Exception ignore) { // keep going - we want to clean up as much as possible } } } } } this.writer = writer; if (upto < readers.Length) { // This means some segments were in a foreign Directory SegmentReader[] newReaders = new SegmentReader[upto]; Array.Copy(readers, 0, newReaders, 0, upto); readers = newReaders; } Initialize(readers); }
/// <summary>Construct reading the named set of readers. </summary> internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = sis; this.deletionPolicy = deletionPolicy; this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: SupportClass.CollectionsHelper.AddAllIfNotContains(synced, sis.Files(directory, true)); } // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. SegmentReader[] readers = new SegmentReader[sis.Count]; for (int i = sis.Count - 1; i >= 0; i--) { bool success = false; try { readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor); success = true; } finally { if (!success) { // Close all readers we had opened: for (i++; i < sis.Count; i++) { try { readers[i].Close(); } catch (System.Exception ignore) { // keep going - we want to clean up as much as possible } } } } } Initialize(readers); }
internal SegmentTermPositions(SegmentReader p):base(p) { this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time }
/// <summary> Release the segment reader (i.e. decRef it and close if there /// are no more references. /// </summary> /// <param name="sr"> /// </param> /// <throws> IOException </throws> public virtual void Release(SegmentReader sr, bool drop) { lock (this) { bool pooled = readerMap.Contains(sr.GetSegmentInfo()); System.Diagnostics.Debug.Assert(!pooled || readerMap[sr.GetSegmentInfo()] == sr); // Drop caller's ref; for an external reader (not // pooled), this decRef will close it sr.DecRef(); if (pooled && (drop || (!Enclosing_Instance.poolReaders && sr.GetRefCount() == 1))) { // We invoke deleter.checkpoint below, so we must be // sync'd on IW if there are changes: // TODO: java 5 // assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this); // Discard (don't save) changes when we are dropping // the reader; this is used only on the sub-readers // after a successful merge. sr.hasChanges &= !drop; bool hasChanges = sr.hasChanges; // Drop our ref -- this will commit any pending // changes to the dir sr.Close(); // We are the last ref to this reader; since we're // not pooling readers, we release it: readerMap.Remove(sr.GetSegmentInfo()); if (hasChanges) { // Must checkpoint w/ deleter, because this // segment reader will have created new _X_N.del // file. enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); } } } }
/// <summary> Release the segment reader (i.e. decRef it and close if there /// are no more references. /// </summary> /// <param name="sr"> /// </param> /// <throws> IOException </throws> public virtual void Release(SegmentReader sr) { lock (this) { Release(sr, false); } }
/* FIXME if we want to support non-contiguous segment merges */ private bool CommitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader) { lock (this) { System.Diagnostics.Debug.Assert(TestPoint("startCommitMerge")); if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot complete merge"); } if (infoStream != null) Message("commitMerge: " + merge.SegString(directory) + " index=" + SegString()); System.Diagnostics.Debug.Assert(merge.registerDone); // If merge was explicitly aborted, or, if rollback() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.IsAborted()) { if (infoStream != null) Message("commitMerge: skipping merge " + merge.SegString(directory) + ": it was aborted"); return false; } int start = EnsureContiguousMerge(merge); CommitMergedDeletes(merge, mergedReader); docWriter.RemapDeletes(segmentInfos, merger.GetDocMaps(), merger.GetDelCounts(), merge, mergedDocCount); // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: SetMergeDocStoreIsCompoundFile(merge); merge.info.SetHasProx(merger.HasProx()); ((System.Collections.IList) ((System.Collections.ArrayList) segmentInfos).GetRange(start, start + merge.segments.Count - start)).Clear(); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(merge.info)); segmentInfos.Insert(start, merge.info); CloseMergeReaders(merge, false); // Must note the change to segmentInfos so any commits // in-flight don't lose it: Checkpoint(); // If the merged segments had pending changes, clear // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.Clear(merge.segments); if (merge.optimize) { // cascade the optimize: segmentsToOptimize[merge.info] = merge.info; } return true; } }
/// <summary> Test field norms.</summary> private Status.FieldNormStatus TestFieldNorms(System.Collections.Generic.ICollection<string> fieldNames, SegmentReader reader) { Status.FieldNormStatus status = new Status.FieldNormStatus(); try { // Test Field Norms if (infoStream != null) { infoStream.Write(" test: field norms........."); } System.Collections.IEnumerator it = fieldNames.GetEnumerator(); byte[] b = new byte[reader.MaxDoc()]; while (it.MoveNext()) { System.String fieldName = (System.String) it.Current; if (reader.HasNorms(fieldName)) { reader.Norms(fieldName, b, 0); ++status.totFields; } } Msg("OK [" + status.totFields + " fields]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { Status.TermIndexStatus status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc(); while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = - 1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = - 1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < - 1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
/// <summary>This constructor is only used for {@link #Reopen()} </summary> internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.IDictionary oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor) { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: SupportClass.CollectionsHelper.AddAllIfNotContains(synced, infos.Files(directory, true)); } // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name System.Collections.IDictionary segmentReaders = new System.Collections.Hashtable(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0; i < oldReaders.Length; i++) { segmentReaders[oldReaders[i].GetSegmentName()] = (System.Int32) i; } } SegmentReader[] newReaders = new SegmentReader[infos.Count]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers bool[] readerShared = new bool[infos.Count]; for (int i = infos.Count - 1; i >= 0; i--) { // find SegmentReader for this segment int? oldReaderIndex = (int?)segmentReaders[infos.Info(i).name]; if (oldReaderIndex.HasValue == false) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[oldReaderIndex.Value]; } bool success = false; try { SegmentReader newReader; if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].GetSegmentInfo().GetUseCompoundFile()) { // We should never see a totally new segment during cloning System.Diagnostics.Debug.Assert(!doClone); // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor); } else { newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly); } if (newReader == newReaders[i]) { // this reader will be shared between the old and the new one, // so we must incRef it readerShared[i] = true; newReader.IncRef(); } else { readerShared[i] = false; newReaders[i] = newReader; } success = true; } finally { if (!success) { for (i++; i < infos.Count; i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].Close(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].DecRef(); } } catch (System.IO.IOException ignore) { // keep going - we want to clean up as much as possible } } } } } } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache Initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache != null) { System.Collections.IEnumerator it = new System.Collections.Hashtable(oldNormsCache).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.String field = (System.String) entry.Key; if (!HasNorms(field)) { continue; } byte[] oldBytes = (byte[]) entry.Value; byte[] bytes = new byte[MaxDoc()]; for (int i = 0; i < subReaders.Length; i++) { int? oldReaderIndex = (int?)segmentReaders[subReaders[i].GetSegmentName()]; // this SegmentReader was not re-opened, we can copy all of its norms if (oldReaderIndex.HasValue && (oldReaders[oldReaderIndex.Value] == subReaders[i] || oldReaders[oldReaderIndex.Value].norms[field] == subReaders[i].norms[field])) { // we don't have to synchronize here: either this constructor is called from a SegmentReader, // in which case no old norms cache is present, or it is called from MultiReader.reopen(), // which is synchronized Array.Copy(oldBytes, oldStarts[oldReaderIndex.Value], bytes, starts[i], starts[i + 1] - starts[i]); } else { subReaders[i].Norms(field, bytes, starts[i]); } } normsCache[field] = bytes; // update cache } } }
/// <summary> Test term vectors for a segment.</summary> private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { Status.TermVectorStatus status = new Status.TermVectorStatus(); try { if (infoStream != null) { infoStream.Write(" test: term vectors........"); } for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; TermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { status.totVectors += tfv.Length; } } } Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
private void Initialize(SegmentReader[] subReaders) { this.subReaders = subReaders; starts = new int[subReaders.Length + 1]; // build starts array for (int i = 0; i < subReaders.Length; i++) { starts[i] = maxDoc; maxDoc += subReaders[i].MaxDoc(); // compute maxDocs if (subReaders[i].HasDeletions()) hasDeletions = true; } starts[subReaders.Length] = maxDoc; if (!readOnly) { maxIndexVersion = SegmentInfos.ReadCurrentVersion(directory); } }
internal CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) { segment = si.name; this.readBufferSize = readBufferSize; this.dir = dir; bool success = false; try { Directory dir0 = dir; if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); this.termsIndexDivisor = termsIndexDivisor; TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor); if (termsIndexDivisor == - 1) { tisNoIndex = reader; } else { tis = reader; tisNoIndex = null; } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize); if (fieldInfos.HasProx()) { proxStream = cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize); } else { proxStream = null; } success = true; } finally { if (!success) { DecRef(); } } // Must assign this at the end -- if we hit an // exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is // not assigned yet). this.origInstance = origInstance; }
private DirectoryOwningReader(IndexReader in_Renamed, SegmentReader.Ref ref_Renamed):base(in_Renamed) { this.ref_Renamed = ref_Renamed; ref_Renamed.IncRef(); }