abstract protected internal long Size(SegmentInfo info);
/// <summary> Obtain a SegmentReader from the readerPool. The reader /// must be returned by calling <see cref="Release(SegmentReader)" /> /// </summary> /// <seealso cref="Release(SegmentReader)"> /// </seealso> /// <param name="info"> /// </param> /// <param name="doOpenStores"> /// </param> /// <throws> IOException </throws> public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores) { lock (this) { return Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, enclosingInstance.readerTermsIndexDivisor); } }
// Returns a ref public virtual SegmentReader GetIfExists(SegmentInfo info) { lock (this) { SegmentReader sr = readerMap[info]; if (sr != null) { sr.IncRef(); } return sr; } }
public virtual SegmentInfo MapToLive(SegmentInfo info) { lock (this) { int idx = Enclosing_Instance.segmentInfos.IndexOf(info); if (idx != - 1) { info = Enclosing_Instance.segmentInfos[idx]; } return info; } }
private void SetDiagnostics(SegmentInfo info, System.String source, IDictionary<string, string> details) { IDictionary<string, string> diagnostics = new Dictionary<string,string>(); diagnostics["source"] = source; diagnostics["lucene.version"] = Constants.LUCENE_VERSION; diagnostics["os"] = Constants.OS_NAME + ""; diagnostics["os.arch"] = Constants.OS_ARCH + ""; diagnostics["os.version"] = Constants.OS_VERSION + ""; diagnostics["java.version"] = Constants.JAVA_VERSION + ""; diagnostics["java.vendor"] = Constants.JAVA_VENDOR + ""; if (details != null) { //System.Collections.ArrayList keys = new System.Collections.ArrayList(details.Keys); //System.Collections.ArrayList values = new System.Collections.ArrayList(details.Values); foreach (string key in details.Keys) { diagnostics[key] = details[key]; } } info.Diagnostics = diagnostics; }
/// <summary> Returns true if a newly flushed (not from merge) /// segment should use the compound file format. /// </summary> public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
// TODO: this method should not have to be entirely // synchronized, ie, merges should be allowed to commit // even while a flush is happening private bool DoFlushInternal(bool flushDocStores, bool flushDeletes) { lock (this) { if (hitOOM) { throw new System.SystemException("this writer hit an OutOfMemoryError; cannot flush"); } EnsureOpen(false); System.Diagnostics.Debug.Assert(TestPoint("startDoFlush")); DoBeforeFlush(); flushCount++; // If we are flushing because too many deletes // accumulated, then we should apply the deletes to free // RAM: flushDeletes |= docWriter.DoApplyDeletes(); // Make sure no threads are actively adding a document. // Returns true if docWriter is currently aborting, in // which case we skip flushing this segment if (infoStream != null) { Message("flush: now pause all indexing threads"); } if (docWriter.PauseAllThreads()) { docWriter.ResumeAllThreads(); return false; } try { SegmentInfo newSegment = null; int numDocs = docWriter.NumDocsInRAM; // Always flush docs if there are any bool flushDocs = numDocs > 0; System.String docStoreSegment = docWriter.DocStoreSegment; System.Diagnostics.Debug.Assert(docStoreSegment != null || numDocs == 0, "dss=" + docStoreSegment + " numDocs=" + numDocs); if (docStoreSegment == null) flushDocStores = false; int docStoreOffset = docWriter.DocStoreOffset; bool docStoreIsCompoundFile = false; if (infoStream != null) { Message(" flush: segment=" + docWriter.Segment + " docStoreSegment=" + docWriter.DocStoreSegment + " docStoreOffset=" + docStoreOffset + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + " numDocs=" + numDocs + " numBufDelTerms=" + docWriter.GetNumBufferedDeleteTerms()); Message(" index before flush " + SegString()); } // Check if the doc stores must be separately flushed // because other segments, besides the one we are about // to flush, reference it if (flushDocStores && (!flushDocs || !docWriter.Segment.Equals(docWriter.DocStoreSegment))) { // We must separately flush the doc store if (infoStream != null) Message(" flush shared docStore segment " + docStoreSegment); docStoreIsCompoundFile = FlushDocStores(); flushDocStores = false; } System.String segment = docWriter.Segment; // If we are flushing docs, segment must not be null: System.Diagnostics.Debug.Assert(segment != null || !flushDocs); if (flushDocs) { bool success = false; int flushedDocCount; try { flushedDocCount = docWriter.Flush(flushDocStores); if (infoStream != null) { Message("flushedFiles=" + docWriter.GetFlushedFiles()); } success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception flushing segment " + segment); deleter.Refresh(segment); } } if (0 == docStoreOffset && flushDocStores) { // This means we are flushing private doc stores // with this segment, so it will not be shared // with other segments System.Diagnostics.Debug.Assert(docStoreSegment != null); System.Diagnostics.Debug.Assert(docStoreSegment.Equals(segment)); docStoreOffset = - 1; docStoreIsCompoundFile = false; docStoreSegment = null; } // Create new SegmentInfo, but do not add to our // segmentInfos until deletes are flushed // successfully. newSegment = new SegmentInfo(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter.HasProx()); SetDiagnostics(newSegment, "flush"); } docWriter.PushDeletes(); if (flushDocs) { segmentInfos.Add(newSegment); Checkpoint(); } if (flushDocs && mergePolicy.UseCompoundFile(segmentInfos, newSegment)) { // Now build compound file bool success = false; try { docWriter.CreateCompoundFile(segment); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file for newly flushed segment " + segment); deleter.DeleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); } } newSegment.SetUseCompoundFile(true); Checkpoint(); } if (flushDeletes) { ApplyDeletes(); } if (flushDocs) Checkpoint(); DoAfterFlush(); return flushDocs; } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "doFlush"); // never hit return false; } finally { docWriter.ResumeAllThreads(); } } }
public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info) { return useCompoundFile; }
/// <summary>Returns a <see cref="Status" /> instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(List <string> onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } return(result); } int numSegments = sis.Count; var segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.cantOpenSegments = true; return(result); } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } result.missingSegmentVersion = true; return(result); } finally { if (input != null) { input.Close(); } } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) { sFormat = "FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos.FORMAT_LOCKLESS) { sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) { sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) { sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos.FORMAT_CHECKSUM) { sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_DEL_COUNT) { sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_HAS_PROX) { sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos.FORMAT_USER_DATA) { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) { sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.UserData; System.String userDataString; if (sis.UserData.Count > 0) { userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); } foreach (string s in onlySegments) { if (infoStream != null) { infoStream.Write(" " + s); } } result.segmentsChecked.AddRange(onlySegments); Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return(result); } result.newSegments = (SegmentInfos)sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) { continue; } var segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.HasProx); segInfoStat.hasProx = info.HasProx; Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); IDictionary <string, string> diagnostics = info.Diagnostics; segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.DocStoreOffset; if (docStoreOffset != -1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.DocStoreSegment); segInfoStat.docStoreSegment = info.DocStoreSegment; Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile); segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile; } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc) { throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc != info.docCount) { throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount); } // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } ICollection <string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); const string comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) { infoStream.WriteLine(t.StackTrace); } Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) { reader.Close(); } } // Keeper result.newSegments.Add((SegmentInfo)info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else { Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); } return(result); }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { var status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term var myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc; while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term; int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = -1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc; int freq = termPositions.Freq; if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = -1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < -1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } lastPos = pos; } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return(status); }
/// <summary>Checks if any merges are now necessary and returns a /// <see cref="MergePolicy.MergeSpecification" /> if so. A merge /// is necessary when there are more than <see cref="MergeFactor" /> /// segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the <see cref="MergeScheduler" /> /// to use concurrency. /// </summary> public override MergeSpecification FindMerges(SegmentInfos infos) { int numSegments = infos.Count; if (Verbose()) { Message("findMerges: " + numSegments + " segments"); } // Compute levels, which is just log (base mergeFactor) // of the size of each segment float[] levels = new float[numSegments]; float norm = (float)System.Math.Log(mergeFactor); for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); long size = Size(info); // Floor tiny segments if (size < 1) { size = 1; } levels[i] = (float)System.Math.Log(size) / norm; } float levelFloor; if (minMergeSize <= 0) { levelFloor = (float)0.0; } else { levelFloor = (float)(System.Math.Log(minMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int start = 0; while (start < numSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start]; for (int i = 1 + start; i < numSegments; i++) { float level = levels[i]; if (level > maxLevel) { maxLevel = level; } } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel < levelFloor) { // All remaining segments fall into the min level levelBottom = -1.0F; } else { levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) { levelBottom = levelFloor; } } int upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } upto--; } if (Verbose()) { Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); } // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int i = start; i < end; i++) { SegmentInfo info = infos.Info(i); anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs); } if (!anyTooLarge) { if (spec == null) { spec = new MergeSpecification(); } if (Verbose()) { Message(" " + start + " to " + end + ": add this merge"); } spec.Add(MakeOneMerge(infos, infos.Range(start, end))); } else if (Verbose()) { Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); } start = end; end = start + mergeFactor; } start = 1 + upto; } return(spec); }
/// <summary> Finds merges necessary to expunge all deletes from the /// index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. /// </summary> public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos) { int numSegments = segmentInfos.Count; if (Verbose()) { Message("findMergesToExpungeDeletes: " + numSegments + " segments"); } MergeSpecification spec = new MergeSpecification(); int firstSegmentWithDeletions = -1; for (int i = 0; i < numSegments; i++) { SegmentInfo info = segmentInfos.Info(i); int delCount = writer.NumDeletedDocs(info); if (delCount > 0) { if (Verbose()) { Message(" segment " + info.name + " has deletions"); } if (firstSegmentWithDeletions == -1) { firstSegmentWithDeletions = i; } else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with // deletions, so force a merge now: if (Verbose()) { Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); } spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { // End of a sequence of segments with deletions, so, // merge those past segments even if it's fewer than // mergeFactor segments if (Verbose()) { Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); } spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } if (firstSegmentWithDeletions != -1) { if (Verbose()) { Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); } spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments))); } return(spec); }
/// <summary>Returns the merges necessary to optimize the index. /// This merge policy defines "optimized" to mean only one /// segment in the index, where that segment has no /// deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile /// setting is true. This method returns multiple merges /// (mergeFactor at a time) so the <see cref="MergeScheduler" /> /// in use may make use of concurrency. /// </summary> public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet <SegmentInfo> segmentsToOptimize) { MergeSpecification spec; System.Diagnostics.Debug.Assert(maxNumSegments > 0); if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.Count; while (last > 0) { SegmentInfo info = infos.Info(--last); if (segmentsToOptimize.Contains(info)) { last++; break; } } if (last > 0) { spec = new MergeSpecification(); // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last))); last -= mergeFactor; } // Only if there are no full merges pending do we // add a final partial (< mergeFactor segments) merge: if (0 == spec.merges.Count) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !IsOptimized(infos.Info(0))) { spec.Add(MakeOneMerge(infos, infos.Range(0, last))); } } else if (last > maxNumSegments) { // Take care to pick a partial merge that is // least cost, but does not make the index too // lopsided. If we always just picked the // partial tail then we could produce a highly // lopsided index over time: // We must merge this many segments to leave // maxNumSegments in the index (from when // optimize was first kicked off): int finalMergeSize = last - maxNumSegments + 1; // Consider all possible starting points: long bestSize = 0; int bestStart = 0; for (int i = 0; i < last - finalMergeSize + 1; i++) { long sumSize = 0; for (int j = 0; j < finalMergeSize; j++) { sumSize += Size(infos.Info(j + i)); } if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize))); } } } else { spec = null; } } else { spec = null; } return(spec); }
/// <summary> Test term vectors for a segment.</summary> private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { var status = new Status.TermVectorStatus(); try { if (infoStream != null) { infoStream.Write(" test: term vectors........"); } for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; ITermFreqVector[] tfv = reader.GetTermFreqVectors(j); if (tfv != null) { status.totVectors += tfv.Length; } } } Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
protected internal virtual long SizeDocs(SegmentInfo info) { if (internalCalibrateSizeByDeletes) { int delCount = writer.NumDeletedDocs(info); return (info.docCount - (long) delCount); } else { return info.docCount; } }
/// <summary>Flush all pending docs to a new segment </summary> internal int Flush(bool closeDocStore) { lock (this) { System.Diagnostics.Debug.Assert(AllThreadsIdle()); System.Diagnostics.Debug.Assert(numDocsInRAM > 0); System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM); System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0); System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); InitFlushState(false); docStoreOffset = numDocsInStore; if (infoStream != null) Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM); bool success = false; try { if (closeDocStore) { System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null); System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName)); CloseDocStore(); flushState.numDocsInStore = 0; } ICollection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>(); for (int i = 0; i < threadStates.Length; i++) threads.Add(threadStates[i].consumer); consumer.Flush(threads, flushState); if (infoStream != null) { SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory); long newSegmentSize = si.SizeInBytes(); System.String message = System.String.Format(nf, " oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}", new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) }); Message(message); } flushedDocCount += flushState.numDocs; DoAfterFlush(); success = true; } finally { if (!success) { Abort(); } } System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); return flushState.numDocs; } }
protected internal virtual long SizeBytes(SegmentInfo info) { long byteSize = info.SizeInBytes(); if (internalCalibrateSizeByDeletes) { int delCount = writer.NumDeletedDocs(info); float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount)); return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio))); } else { return byteSize; } }
/// <summary>Merges the provided indexes into this index. /// <p/>After this completes, the index is optimized. <p/> /// <p/>The provided IndexReaders are not closed.<p/> /// /// <p/><b>NOTE:</b> while this is running, any attempts to /// add or delete documents (with another thread) will be /// paused until this method completes. /// /// <p/>See <see cref="AddIndexesNoOptimize(Directory[])" /> for /// details on transactional semantics, temporary free /// space required in the Directory, and non-CFS segments /// on an Exception.<p/> /// /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError /// you should immediately close the writer. See <a /// href="#OOME">above</a> for details.<p/> /// /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public virtual void AddIndexes(params IndexReader[] readers) { EnsureOpen(); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); // We must pre-acquire a read lock here (and upgrade to // write lock in startTransaction below) so that no // other addIndexes is allowed to start up after we have // flushed & optimized but before we then start our // transaction. This is because the merging below // requires that only one segment is present in the // index: AcquireRead(); try { SegmentInfo info = null; System.String mergedName = null; SegmentMerger merger = null; bool success = false; try { Flush(true, false, true); Optimize(); // start with zero or 1 seg success = true; } finally { // Take care to release the read lock if we hit an // exception before starting the transaction if (!success) ReleaseRead(); } // true means we already have a read lock; if this // call hits an exception it will release the write // lock: StartTransaction(true); try { mergedName = NewSegmentName(); merger = new SegmentMerger(this, mergedName, null); SegmentReader sReader = null; lock (this) { if (segmentInfos.Count == 1) { // add existing index, if any sReader = readerPool.Get(segmentInfos.Info(0), true, BufferedIndexInput.BUFFER_SIZE, - 1); } } success = false; try { if (sReader != null) merger.Add(sReader); for (int i = 0; i < readers.Length; i++) // add new indexes merger.Add(readers[i]); int docCount = merger.Merge(); // merge 'em lock (this) { segmentInfos.Clear(); // pop old infos & add new info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false, merger.HasProx()); SetDiagnostics(info, "addIndexes(params IndexReader[])"); segmentInfos.Add(info); } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); success = true; } finally { if (sReader != null) { readerPool.Release(sReader); } } } finally { if (!success) { if (infoStream != null) Message("hit exception in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } if (mergePolicy is LogMergePolicy && UseCompoundFile) { IList<string> files = null; lock (this) { // Must incRef our files so that if another thread // is running merge/optimize, it doesn't delete our // segment's files before we have a change to // finish making the compound file. if (segmentInfos.Contains(info)) { files = info.Files(); deleter.IncRef(files); } } if (files != null) { success = false; StartTransaction(false); try { merger.CreateCompoundFile(mergedName + ".cfs"); lock (this) { info.SetUseCompoundFile(true); } success = true; } finally { lock (this) { deleter.DecRef(files); } if (!success) { if (infoStream != null) Message("hit exception building compound file in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(params IndexReader[])"); } finally { if (docWriter != null) { docWriter.ResumeAllThreads(); } } }
/// <summary>Returns true if this single info is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(SegmentInfo info) { bool hasDeletions = writer.NumDeletedDocs(info) > 0; return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.Directory && (info.GetUseCompoundFile() == useCompoundFile || internalNoCFSRatio < 1.0); }
// used only by asserts public virtual bool InfoIsLive(SegmentInfo info) { lock (this) { int idx = Enclosing_Instance.segmentInfos.IndexOf(info); System.Diagnostics.Debug.Assert(idx != -1); System.Diagnostics.Debug.Assert(Enclosing_Instance.segmentInfos[idx] == info); return true; } }
/// <summary> Copy everything from src SegmentInfo into our instance.</summary> internal void Reset(SegmentInfo src) { ClearFiles(); name = src.name; docCount = src.docCount; dir = src.dir; preLockless = src.preLockless; delGen = src.delGen; docStoreOffset = src.docStoreOffset; docStoreIsCompoundFile = src.docStoreIsCompoundFile; if (src.normGen == null) { normGen = null; } else { normGen = new long[src.normGen.Length]; Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length); } isCompoundFile = src.isCompoundFile; hasSingleNormFile = src.hasSingleNormFile; delCount = src.delCount; }
private void SetDiagnostics(SegmentInfo info, System.String source) { SetDiagnostics(info, source, null); }
public System.Object Clone() { SegmentInfo si = new SegmentInfo(name, docCount, dir); si.isCompoundFile = isCompoundFile; si.delGen = delGen; si.delCount = delCount; si.hasProx = hasProx; si.preLockless = preLockless; si.hasSingleNormFile = hasSingleNormFile; si.diagnostics = new HashMap<string, string>(this.diagnostics); if (this.diagnostics != null) { si.diagnostics = new System.Collections.Generic.Dictionary<string, string>(); foreach (string o in diagnostics.Keys) { si.diagnostics.Add(o,diagnostics[o]); } } if (normGen != null) { si.normGen = new long[normGen.Length]; normGen.CopyTo(si.normGen, 0); } si.docStoreOffset = docStoreOffset; si.docStoreSegment = docStoreSegment; si.docStoreIsCompoundFile = docStoreIsCompoundFile; if (this.files != null) { si.files = new System.Collections.Generic.List<string>(); foreach (string file in files) { si.files.Add(file); } } return si; }
/// <summary> Returns a ref to a clone. NOTE: this clone is not /// enrolled in the pool, so you should simply close() /// it when you're done (ie, do not call release()). /// </summary> public virtual SegmentReader GetReadOnlyClone(SegmentInfo info, bool doOpenStores, int termInfosIndexDivisor) { lock (this) { SegmentReader sr = Get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor); try { return (SegmentReader) sr.Clone(true); } finally { sr.DecRef(); } } }
/// <summary> Test the term index.</summary> private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) { var status = new Status.TermIndexStatus(); try { if (infoStream != null) { infoStream.Write(" test: terms, freq, prox..."); } TermEnum termEnum = reader.Terms(); TermPositions termPositions = reader.TermPositions(); // Used only to count up # deleted docs for this term var myTermDocs = new MySegmentTermDocs(reader); int maxDoc = reader.MaxDoc; while (termEnum.Next()) { status.termCount++; Term term = termEnum.Term; int docFreq = termEnum.DocFreq(); termPositions.Seek(term); int lastDoc = - 1; int freq0 = 0; status.totFreq += docFreq; while (termPositions.Next()) { freq0++; int doc = termPositions.Doc; int freq = termPositions.Freq; if (doc <= lastDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); } if (doc >= maxDoc) { throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); } lastDoc = doc; if (freq <= 0) { throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); } int lastPos = - 1; status.totPos += freq; for (int j = 0; j < freq; j++) { int pos = termPositions.NextPosition(); if (pos < - 1) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); } if (pos < lastPos) { throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } lastPos = pos; } } // Now count how many deleted docs occurred in // this term: int delCount; if (reader.HasDeletions) { myTermDocs.Seek(term); while (myTermDocs.Next()) { } delCount = myTermDocs.delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } } Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
/// <summary> Obtain a SegmentReader from the readerPool. The reader /// must be returned by calling <see cref="Release(SegmentReader)" /> /// /// </summary> /// <seealso cref="Release(SegmentReader)"> /// </seealso> /// <param name="info"> /// </param> /// <param name="doOpenStores"> /// </param> /// <param name="readBufferSize"> /// </param> /// <param name="termsIndexDivisor"> /// </param> /// <throws> IOException </throws> public virtual SegmentReader Get(SegmentInfo info, bool doOpenStores, int readBufferSize, int termsIndexDivisor) { lock (this) { if (Enclosing_Instance.poolReaders) { readBufferSize = BufferedIndexInput.BUFFER_SIZE; } SegmentReader sr = readerMap[info]; if (sr == null) { // TODO: we may want to avoid doing this while // synchronized // Returns a ref, which we xfer to readerMap: sr = SegmentReader.Get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor); if (info.dir == enclosingInstance.directory) { // Only pool if reader is not external readerMap[info]=sr; } } else { if (doOpenStores) { sr.OpenDocStores(); } if (termsIndexDivisor != - 1 && !sr.TermsIndexLoaded()) { // If this reader was originally opened because we // needed to merge it, we didn't load the terms // index. But now, if the caller wants the terms // index (eg because it's doing deletes, or an NRT // reader is being opened) we ask the reader to // load its terms index. sr.LoadTermsIndex(termsIndexDivisor); } } // Return a ref to our caller if (info.dir == enclosingInstance.directory) { // Only incRef if we pooled (reader is not external) sr.IncRef(); } return sr; } }
/// <summary> Test stored fields for a segment.</summary> private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { var status = new Status.StoredFieldStatus(); try { if (infoStream != null) { infoStream.Write(" test: stored fields......."); } // Scan stored fields for all documents for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; Document.Document doc = reader.Document(j); status.totFields += doc.GetFields().Count; } } // Validate docCount if (status.docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); } Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return status; }
/// <summary> Obtain the number of deleted docs for a pooled reader. /// If the reader isn't being pooled, the segmentInfo's /// delCount is returned. /// </summary> public virtual int NumDeletedDocs(SegmentInfo info) { SegmentReader reader = readerPool.GetIfExists(info); try { if (reader != null) { return reader.NumDeletedDocs; } else { return info.GetDelCount(); } } finally { if (reader != null) { readerPool.Release(reader); } } }
public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info) { return(useCompoundFile); }