/// <summary>Returns true if this single info is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(SegmentInfo info) { bool hasDeletions = writer.NumDeletedDocs(info) > 0; return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && (info.GetUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0); }
// Javadoc inherited public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info) { return useCompoundFile; }
protected internal virtual long SizeDocs(SegmentInfo info) { if (calibrateSizeByDeletes) { int delCount = writer.NumDeletedDocs(info); return (info.docCount - (long) delCount); } else { return info.docCount; } }
/// <summary> Test stored fields for a segment.</summary> private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { Status.StoredFieldStatus status = new Status.StoredFieldStatus(); try { if (infoStream != null) { infoStream.Write(" test: stored fields......."); } // Scan stored fields for all documents for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; Document doc = reader.Document(j); status.totFields += doc.GetFields().Count; } } // Validate docCount if (status.docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); } Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
/// <summary> Copy everything from src SegmentInfo into our instance.</summary> internal void Reset(SegmentInfo src) { ClearFiles(); name = src.name; docCount = src.docCount; dir = src.dir; preLockless = src.preLockless; delGen = src.delGen; docStoreOffset = src.docStoreOffset; docStoreIsCompoundFile = src.docStoreIsCompoundFile; if (src.normGen == null) { normGen = null; } else { normGen = new long[src.normGen.Length]; Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length); } isCompoundFile = src.isCompoundFile; hasSingleNormFile = src.hasSingleNormFile; delCount = src.delCount; }
internal static SegmentReader Get(SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor) { return Get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor); }
internal virtual SegmentReader ReopenSegment(SegmentInfo si, bool doClone, bool openReadOnly) { lock (this) { bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); bool normsUpToDate = true; bool[] fieldNormsChanged = new bool[core.fieldInfos.Size()]; int fieldCount = core.fieldInfos.Size(); for (int i = 0; i < fieldCount; i++) { if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } // if we're cloning we need to run through the reopenSegment logic // also if both old and new readers aren't readonly, we clone to avoid sharing modifications if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) { return this; } // When cloning, the incoming SegmentInfos should not // have any changes in it: System.Diagnostics.Debug.Assert(!doClone ||(normsUpToDate && deletionsUpToDate)); // clone reader SegmentReader clone; try { if (openReadOnly) clone = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL); else clone = (SegmentReader) System.Activator.CreateInstance(IMPL); } catch (System.Exception e) { throw new System.SystemException("cannot load SegmentReader class: " + e, e); } bool success = false; try { core.IncRef(); clone.core = core; clone.readOnly = openReadOnly; clone.si = si; clone.readBufferSize = readBufferSize; if (!openReadOnly && hasChanges) { // My pending changes transfer to the new reader clone.pendingDeleteCount = pendingDeleteCount; clone.deletedDocsDirty = deletedDocsDirty; clone.normsDirty = normsDirty; clone.hasChanges = hasChanges; hasChanges = false; } if (doClone) { if (deletedDocs != null) { deletedDocsRef.IncRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } else { if (!deletionsUpToDate) { // load deleted docs System.Diagnostics.Debug.Assert(clone.deletedDocs == null); clone.LoadDeletedDocs(); } else if (deletedDocs != null) { deletedDocsRef.IncRef(); clone.deletedDocs = deletedDocs; clone.deletedDocsRef = deletedDocsRef; } } clone.SetDisableFakeNorms(GetDisableFakeNorms()); clone.norms = new System.Collections.Hashtable(); // Clone norms for (int i = 0; i < fieldNormsChanged.Length; i++) { // Clone unchanged norms to the cloned reader if (doClone || !fieldNormsChanged[i]) { System.String curField = core.fieldInfos.FieldInfo(i).name; Norm norm = (Norm) this.norms[curField]; if (norm != null) clone.norms[curField] = norm.Clone(); } } // If we are not cloning, then this will open anew // any norms that have changed: clone.OpenNorms(si.GetUseCompoundFile()?core.GetCFSReader():Directory(), readBufferSize); success = true; } finally { if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.DecRef(); } } return clone; } }
internal CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) { segment = si.name; this.readBufferSize = readBufferSize; this.dir = dir; bool success = false; try { Directory dir0 = dir; if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); this.termsIndexDivisor = termsIndexDivisor; TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor); if (termsIndexDivisor == - 1) { tisNoIndex = reader; } else { tis = reader; tisNoIndex = null; } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize); if (fieldInfos.HasProx()) { proxStream = cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize); } else { proxStream = null; } success = true; } finally { if (!success) { DecRef(); } } // Must assign this at the end -- if we hit an // exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is // not assigned yet). this.origInstance = origInstance; }
internal static bool HasDeletions(SegmentInfo si) { // Don't call ensureOpen() here (it could affect performance) return si.HasDeletions(); }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { Status.TermIndexStatus status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc(); while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < -1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return(status); }
/// <summary>Flush all pending docs to a new segment </summary> internal int Flush(bool closeDocStore) { lock (this) { System.Diagnostics.Debug.Assert(AllThreadsIdle()); System.Diagnostics.Debug.Assert(numDocsInRAM > 0); System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM); System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0); System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); InitFlushState(false); docStoreOffset = numDocsInStore; if (infoStream != null) Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM); bool success = false; try { if (closeDocStore) { System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null); System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName)); CloseDocStore(); flushState.numDocsInStore = 0; } System.Collections.Hashtable threads = new System.Collections.Hashtable(); for (int i = 0; i < threadStates.Length; i++) threads[threadStates[i].consumer] = threadStates[i].consumer; consumer.Flush(threads, flushState); if (infoStream != null) { SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory); long newSegmentSize = si.SizeInBytes(); System.String message = System.String.Format(nf, " oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}", new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) }); Message(message); } flushedDocCount += flushState.numDocs; DoAfterFlush(); success = true; } finally { if (!success) { Abort(); } } System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); return flushState.numDocs; } }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary <string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) { throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
/// <summary> Returns true if a newly flushed (not from merge) /// segment should use the compound file format. /// </summary> public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
/// <summary> Obtain the number of deleted docs for a pooled reader. /// If the reader isn't being pooled, the segmentInfo's /// delCount is returned. /// </summary> public virtual int NumDeletedDocs(SegmentInfo info) { SegmentReader reader = readerPool.GetIfExists(info); try { if (reader != null) { return reader.NumDeletedDocs(); } else { return info.GetDelCount(); } } finally { if (reader != null) { readerPool.Release(reader); } } }
// Returns a ref public virtual SegmentReader GetIfExists(SegmentInfo info) { lock (this) { SegmentReader sr = (SegmentReader) readerMap[info]; if (sr != null) { sr.IncRef(); } return sr; } }
internal static bool UsesCompoundFile(SegmentInfo si) { return si.GetUseCompoundFile(); }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static SegmentReader Get(bool readOnly, SegmentInfo si, int termInfosIndexDivisor) { return Get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor); }
internal static bool HasSeparateNorms(SegmentInfo si) { return si.HasSeparateNorms(); }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static SegmentReader Get(bool readOnly, Directory dir, SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor) { SegmentReader instance; try { if (readOnly) instance = (SegmentReader) System.Activator.CreateInstance(READONLY_IMPL); else instance = (SegmentReader) System.Activator.CreateInstance(IMPL); } catch (System.Exception e) { throw new System.SystemException("cannot load SegmentReader class: " + e, e); } instance.readOnly = readOnly; instance.si = si; instance.readBufferSize = readBufferSize; bool success = false; try { instance.core = new CoreReaders(instance, dir, si, readBufferSize, termInfosIndexDivisor); if (doOpenStores) { instance.core.OpenDocStores(si); } instance.LoadDeletedDocs(); instance.OpenNorms(instance.core.cfsDir, readBufferSize); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { instance.DoClose(); } } return instance; }
internal virtual void SetSegmentInfo(SegmentInfo info) { si = info; }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { Status.TermIndexStatus status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc(); while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term(); int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = - 1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc(); int freq = termPositions.Freq(); if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = - 1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < - 1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions()) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
internal virtual void StartCommit() { rollbackSegmentInfo = (SegmentInfo)si.Clone(); rollbackHasChanges = hasChanges; rollbackDeletedDocsDirty = deletedDocsDirty; rollbackNormsDirty = normsDirty; rollbackPendingDeleteCount = pendingDeleteCount; System.Collections.IEnumerator it = norms.Values.GetEnumerator(); while (it.MoveNext()) { Norm norm = (Norm) it.Current; norm.rollbackDirty = norm.dirty; } }
/// <summary> Test term vectors for a segment.</summary> private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { Status.TermVectorStatus status = new Status.TermVectorStatus(); try { if (infoStream != null) { infoStream.Write(" test: term vectors........"); } for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; TermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { status.totVectors += tfv.Length; } } } Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
// NOTE: only called from IndexWriter when a near // real-time reader is opened, or applyDeletes is run, // sharing a segment that's still being merged. This // method is not fully thread safe, and relies on the // synchronization in IndexWriter internal void LoadTermsIndex(SegmentInfo si, int termsIndexDivisor) { lock (this) { if (tis == null) { Directory dir0; if (si.GetUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open the // terms reader with index, the segment has switched // to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } dir0 = cfsReader; } else { dir0 = dir; } tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); } } }
public System.Object Clone() { SegmentInfo si = new SegmentInfo(name, docCount, dir); si.isCompoundFile = isCompoundFile; si.delGen = delGen; si.delCount = delCount; si.hasProx = hasProx; si.preLockless = preLockless; si.hasSingleNormFile = hasSingleNormFile; if (this.diagnostics != null) { si.diagnostics = new System.Collections.Generic.Dictionary<string, string>(); foreach (string o in diagnostics.Keys) { si.diagnostics.Add(o,diagnostics[o]); } } if (normGen != null) { si.normGen = new long[normGen.Length]; normGen.CopyTo(si.normGen, 0); } si.docStoreOffset = docStoreOffset; si.docStoreSegment = docStoreSegment; si.docStoreIsCompoundFile = docStoreIsCompoundFile; if (this.files != null) { si.files = new System.Collections.Generic.List<string>(); foreach (string file in files) { si.files.Add(file); } } return si; }
internal void OpenDocStores(SegmentInfo si) { lock (this) { System.Diagnostics.Debug.Assert(si.name.Equals(segment)); if (fieldsReaderOrig == null) { Directory storeDir; if (si.GetDocStoreOffset() != - 1) { if (si.GetDocStoreIsCompoundFile()) { System.Diagnostics.Debug.Assert(storeCFSReader == null); storeCFSReader = new CompoundFileReader(dir, si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } } else if (si.GetUseCompoundFile()) { // In some cases, we were originally opened when CFS // was not used, but then we are asked to open doc // stores after the segment has switched to CFS if (cfsReader == null) { cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); } storeDir = cfsReader; System.Diagnostics.Debug.Assert(storeDir != null); } else { storeDir = dir; System.Diagnostics.Debug.Assert(storeDir != null); } System.String storesSegment; if (si.GetDocStoreOffset() != - 1) { storesSegment = si.GetDocStoreSegment(); } else { storesSegment = segment; } fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == - 1 && fieldsReaderOrig.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount); } if (fieldInfos.HasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } } } }
abstract protected internal long Size(SegmentInfo info);
// Flush all pending changes to the next generation // separate norms file. public void ReWrite(SegmentInfo si) { System.Diagnostics.Debug.Assert(refCount > 0 && (origNorm == null || origNorm.refCount > 0), "refCount=" + refCount + " origNorm=" + origNorm); // NOTE: norms are re-written in regular directory, not cfs si.AdvanceNormGen(this.number); string normFileName = si.GetNormFileName(this.number); IndexOutput @out = enclosingInstance.Directory().CreateOutput(normFileName); bool success = false; try { try { @out.WriteBytes(bytes, enclosingInstance.MaxDoc()); } finally { @out.Close(); } success = true; } finally { if (!success) { try { enclosingInstance.Directory().DeleteFile(normFileName); } catch (Exception t) { // suppress this so we keep throwing the // original exception } } } this.dirty = false; }
protected internal virtual long SizeBytes(SegmentInfo info) { long byteSize = info.SizeInBytes(); if (calibrateSizeByDeletes) { int delCount = writer.NumDeletedDocs(info); float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount)); return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio))); } else { return byteSize; } }
public static SegmentReader Get(SegmentInfo si) { return Get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); }
protected internal override long Size(SegmentInfo info) { return SizeDocs(info); }
/// <summary> Obtain a SegmentReader from the readerPool. The reader /// must be returned by calling {@link #Release(SegmentReader)} /// /// </summary> /// <seealso cref="Release(SegmentReader)"> /// </seealso> /// <param name="info"> /// </param> /// <param name="doOpenStores"> /// </param> /// <param name="readBufferSize"> /// </param> /// <param name="termsIndexDivisor"> /// </param> /// <throws> IOException </throws> public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores, int readBufferSize, int termsIndexDivisor) { lock (this) { if (Enclosing_Instance.poolReaders) { readBufferSize = BufferedIndexInput.BUFFER_SIZE; } SegmentReader sr = (SegmentReader) readerMap[info]; if (sr == null) { // TODO: we may want to avoid doing this while // synchronized // Returns a ref, which we xfer to readerMap: sr = SegmentReader.Get(info, readBufferSize, doOpenStores, termsIndexDivisor); if (info.dir == enclosingInstance.directory) { // Only pool if reader is not external readerMap[info]=sr; } } else { if (doOpenStores) { sr.OpenDocStores(); } if (termsIndexDivisor != - 1 && !sr.TermsIndexLoaded()) { // If this reader was originally opened because we // needed to merge it, we didn't load the terms // index. But now, if the caller wants the terms // index (eg because it's doing deletes, or an NRT // reader is being opened) we ask the reader to // load its terms index. sr.LoadTermsIndex(termsIndexDivisor); } } // Return a ref to our caller if (info.dir == enclosingInstance.directory) { // Only incRef if we pooled (reader is not external) sr.IncRef(); } return sr; } }