public /*protected internal*/ override System.Object DoBody(System.String segmentFileName) { SegmentInfos infos = new SegmentInfos(); infos.Read(directory, segmentFileName); if (readOnly) return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor); else return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor); }
internal int docShift; // total # deleted docs that were compacted by this merge public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) { this.docMaps = docMaps; SegmentInfo firstSegment = merge.segments.Info(0); int i = 0; while (true) { SegmentInfo info = infos.Info(i); if (info.Equals(firstSegment)) break; minDocID += info.docCount; i++; } int numDocs = 0; for (int j = 0; j < docMaps.Length; i++, j++) { numDocs += infos.Info(i).docCount; System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j))); } maxDocID = minDocID + numDocs; starts = new int[docMaps.Length]; newStarts = new int[docMaps.Length]; starts[0] = minDocID; newStarts[0] = minDocID; for (i = 1; i < docMaps.Length; i++) { int lastDocCount = merge.segments.Info(i - 1).docCount; starts[i] = starts[i - 1] + lastDocCount; newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; } docShift = numDocs - mergedDocCount; // There are rare cases when docShift is 0. It happens // if you try to delete a docID that's out of bounds, // because the SegmentReader still allocates deletedDocs // and pretends it has deletions ... so we can't make // this assert here // assert docShift > 0; // Make sure it all adds up: System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1])); }
/// <summary>This constructor is only used for {@link #Reopen()} </summary> internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.IDictionary oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor) { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: SupportClass.CollectionsHelper.AddAllIfNotContains(synced, infos.Files(directory, true)); } // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name System.Collections.IDictionary segmentReaders = new System.Collections.Hashtable(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0; i < oldReaders.Length; i++) { segmentReaders[oldReaders[i].GetSegmentName()] = (System.Int32) i; } } SegmentReader[] newReaders = new SegmentReader[infos.Count]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers bool[] readerShared = new bool[infos.Count]; for (int i = infos.Count - 1; i >= 0; i--) { // find SegmentReader for this segment int? oldReaderIndex = (int?)segmentReaders[infos.Info(i).name]; if (oldReaderIndex.HasValue == false) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[oldReaderIndex.Value]; } bool success = false; try { SegmentReader newReader; if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].GetSegmentInfo().GetUseCompoundFile()) { // We should never see a totally new segment during cloning System.Diagnostics.Debug.Assert(!doClone); // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor); } else { newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly); } if (newReader == newReaders[i]) { // this reader will be shared between the old and the new one, // so we must incRef it readerShared[i] = true; newReader.IncRef(); } else { readerShared[i] = false; newReaders[i] = newReader; } success = true; } finally { if (!success) { for (i++; i < infos.Count; i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].Close(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].DecRef(); } } catch (System.IO.IOException ignore) { // keep going - we want to clean up as much as possible } } } } } } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache Initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache != null) { System.Collections.IEnumerator it = new System.Collections.Hashtable(oldNormsCache).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.String field = (System.String) entry.Key; if (!HasNorms(field)) { continue; } byte[] oldBytes = (byte[]) entry.Value; byte[] bytes = new byte[MaxDoc()]; for (int i = 0; i < subReaders.Length; i++) { int? oldReaderIndex = (int?)segmentReaders[subReaders[i].GetSegmentName()]; // this SegmentReader was not re-opened, we can copy all of its norms if (oldReaderIndex.HasValue && (oldReaders[oldReaderIndex.Value] == subReaders[i] || oldReaders[oldReaderIndex.Value].norms[field] == subReaders[i].norms[field])) { // we don't have to synchronize here: either this constructor is called from a SegmentReader, // in which case no old norms cache is present, or it is called from MultiReader.reopen(), // which is synchronized Array.Copy(oldBytes, oldStarts[oldReaderIndex.Value], bytes, starts[i], starts[i + 1] - starts[i]); } else { subReaders[i].Norms(field, bytes, starts[i]); } } normsCache[field] = bytes; // update cache } } }
internal void DecRef(SegmentInfos segmentInfos) { System.Collections.Generic.IEnumerator<string> it = segmentInfos.Files(directory, false).GetEnumerator(); while (it.MoveNext()) { DecRef(it.Current); } }
/// <summary> For definition of "check point" see IndexWriter comments: /// "Clarification: Check Points (and commits)". /// /// Writer calls this when it has made a "consistent /// change" to the index, meaning new files are written to /// the index and the in-memory SegmentInfos have been /// modified to point to those files. /// /// This may or may not be a commit (segments_N may or may /// not have been written). /// /// We simply incref the files referenced by the new /// SegmentInfos and decref the files we had previously /// seen (if any). /// /// If this is a commit, we also call the policy to give it /// a chance to remove other commits. If any commits are /// removed, we decref their files as well. /// </summary> public void Checkpoint(SegmentInfos segmentInfos, bool isCommit) { if (infoStream != null) { Message("now checkpoint \"" + segmentInfos.GetCurrentSegmentFileName() + "\" [" + segmentInfos.Count + " segments " + "; isCommit = " + isCommit + "]"); } // Try again now to delete any previously un-deletable // files (because they were in use, on Windows): DeletePendingFiles(); // Incref the files: IncRef(segmentInfos, isCommit); if (isCommit) { // Append to our commits list: commits.Add(new CommitPoint(this, commitsToDelete, directory, segmentInfos)); // Tell policy so it can remove commits: policy.OnCommit(commits); // Decref files for commits that were deleted by the policy: DeleteCommits(); } else { System.Collections.Generic.IList<string> docWriterFiles; if (docWriter != null) { docWriterFiles = docWriter.OpenFiles(); if (docWriterFiles != null) // We must incRef these files before decRef'ing // last files to make sure we don't accidentally // delete them: IncRef(docWriterFiles); } else docWriterFiles = null; // DecRef old files from the last checkpoint, if any: int size = lastFiles.Count; if (size > 0) { for (int i = 0; i < size; i++) DecRef(lastFiles[i]); lastFiles.Clear(); } // Save files so we can decr on next checkpoint/commit: foreach (string fname in segmentInfos.Files(directory, false)) { lastFiles.Add(fname); } if (docWriterFiles != null) { foreach (string fname in docWriterFiles) { lastFiles.Add(fname); } } } }
protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) { bool doCFS; if (!useCompoundFile) { doCFS = false; } else if (noCFSRatio == 1.0) { doCFS = true; } else { long totSize = 0; for (int i = 0; i < infos.Count; i++) { totSize += Size(infos.Info(i)); } long mergeSize = 0; for (int i = 0; i < infosToMerge.Count; i++) { mergeSize += Size(infosToMerge.Info(i)); } doCFS = mergeSize <= noCFSRatio * totSize; } return new OneMerge(infosToMerge, doCFS); }
/// <summary> Finds merges necessary to expunge all deletes from the /// index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. /// </summary> public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos) { int numSegments = segmentInfos.Count; if (Verbose()) Message("findMergesToExpungeDeletes: " + numSegments + " segments"); MergeSpecification spec = new MergeSpecification(); int firstSegmentWithDeletions = - 1; for (int i = 0; i < numSegments; i++) { SegmentInfo info = segmentInfos.Info(i); int delCount = writer.NumDeletedDocs(info); if (delCount > 0) { if (Verbose()) Message(" segment " + info.name + " has deletions"); if (firstSegmentWithDeletions == - 1) firstSegmentWithDeletions = i; else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with // deletions, so force a merge now: if (Verbose()) Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != - 1) { // End of a sequence of segments with deletions, so, // merge those past segments even if it's fewer than // mergeFactor segments if (Verbose()) Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = - 1; } } if (firstSegmentWithDeletions != - 1) { if (Verbose()) Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments))); } return spec; }
private bool IsOptimized(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize) { int numSegments = infos.Count; int numToOptimize = 0; SegmentInfo optimizeInfo = null; for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) { SegmentInfo info = infos.Info(i); if (segmentsToOptimize.Contains(info)) { numToOptimize++; optimizeInfo = info; } } return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo)); }
// Javadoc inherited public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info) { return useCompoundFile; }
internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.IDictionary oldNormsCache, bool doClone, int termInfosIndexDivisor):base(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor) { }
internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor):base(directory, sis, deletionPolicy, true, termInfosIndexDivisor) { }
/// <summary>Returns a {@link Status} instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(System.Collections.IList onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) infoStream.WriteLine(t.StackTrace); return result; } int numSegments = sis.Count; System.String segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.cantOpenSegments = true; return result; } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.missingSegmentVersion = true; return result; } finally { if (input != null) input.Close(); } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) sFormat = "FORMAT [Lucene Pre-2.1]"; if (format == SegmentInfos.FORMAT_LOCKLESS) sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; else { if (format == SegmentInfos.FORMAT_CHECKSUM) sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_DEL_COUNT) sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_HAS_PROX) sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_USER_DATA) sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.GetUserData(); System.String userDataString; if (sis.GetUserData().Count > 0) { userDataString = " userData=" + SupportClass.CollectionsHelper.CollectionToString(sis.GetUserData()); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) infoStream.Write("\nChecking only these segments:"); System.Collections.IEnumerator it = onlySegments.GetEnumerator(); while (it.MoveNext()) { if (infoStream != null) { infoStream.Write(" " + it.Current); } } System.Collections.IEnumerator e = onlySegments.GetEnumerator(); while (e.MoveNext() == true) { result.segmentsChecked.Add(e.Current); } Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return result; } result.newSegments = (SegmentInfos) sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) continue; Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.GetHasProx()); segInfoStat.hasProx = info.GetHasProx(); Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); System.Collections.Generic.IDictionary<string, string> diagnostics = info.GetDiagnostics(); segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + SupportClass.CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.GetDocStoreOffset(); if (docStoreOffset != - 1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.GetDocStoreSegment()); segInfoStat.docStoreSegment = info.GetDocStoreSegment(); Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile(); } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) infoStream.Write(" test: open reader........."); reader = SegmentReader.Get(info); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions()) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc()) { throw new System.SystemException("too many deleted docs: maxDoc()=" + reader.MaxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc() != info.docCount) throw new System.SystemException("SegmentReader.maxDoc() " + reader.MaxDoc() + " != SegmentInfos.docCount " + info.docCount); // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } System.Collections.Generic.ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new System.SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new System.SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new System.SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); System.String comment; comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) reader.Close(); } // Keeper result.newSegments.Add(info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); return result; }
public /*protected internal*/ override System.Object DoBody(System.String segmentFileName) { SegmentInfos infos = new SegmentInfos(); infos.Read(directory, segmentFileName); return Enclosing_Instance.DoReopen(infos, false, openReadOnly); }
private DirectoryReader DoReopen(SegmentInfos infos, bool doClone, bool openReadOnly) { lock (this) { DirectoryReader reader; if (openReadOnly) { reader = new ReadOnlyDirectoryReader(directory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor); } else { reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor); } reader.SetDisableFakeNorms(GetDisableFakeNorms()); return reader; } }
public override object DoBody(string segmentFileName) { SegmentInfos infos = new SegmentInfos(); infos.Read(this.dir, segmentFileName); return enclosingInstance.DoReopen(infos, false, openReadOnly); }
/// <summary> Returns a new SegmentInfos containg the SegmentInfo /// instances in the specified range first (inclusive) to /// last (exclusive), so total number of segments returned /// is last-first. /// </summary> public SegmentInfos Range(int first, int last) { SegmentInfos infos = new SegmentInfos(); infos.AddRange((System.Collections.IList) ((System.Collections.ArrayList) this).GetRange(first, last - first)); return infos; }
// Carry over generation numbers from another SegmentInfos internal void UpdateGeneration(SegmentInfos other) { lastGeneration = other.lastGeneration; generation = other.generation; version = other.version; }
internal ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor):base(writer, infos, termInfosIndexDivisor) { }
// Javadoc inherited public override bool UseCompoundDocStore(SegmentInfos infos) { return useCompoundDocStore; }
/// <summary>Replaces all segments in this instance, but keeps /// generation, version, counter so that future commits /// remain write once. /// </summary> internal void Replace(SegmentInfos other) { Clear(); AddRange(other); lastGeneration = other.lastGeneration; }
/// <summary>Returns the merges necessary to optimize the index. /// This merge policy defines "optimized" to mean only one /// segment in the index, where that segment has no /// deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile /// setting is true. This method returns multiple merges /// (mergeFactor at a time) so the {@link MergeScheduler} /// in use may make use of concurrency. /// </summary> public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize) { MergeSpecification spec; System.Diagnostics.Debug.Assert(maxNumSegments > 0); if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.Count; while (last > 0) { SegmentInfo info = infos.Info(--last); if (segmentsToOptimize.Contains(info)) { last++; break; } } if (last > 0) { spec = new MergeSpecification(); // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last))); last -= mergeFactor; } // Only if there are no full merges pending do we // add a final partial (< mergeFactor segments) merge: if (0 == spec.merges.Count) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !IsOptimized(infos.Info(0))) spec.Add(MakeOneMerge(infos, infos.Range(0, last))); } else if (last > maxNumSegments) { // Take care to pick a partial merge that is // least cost, but does not make the index too // lopsided. If we always just picked the // partial tail then we could produce a highly // lopsided index over time: // We must merge this many segments to leave // maxNumSegments in the index (from when // optimize was first kicked off): int finalMergeSize = last - maxNumSegments + 1; // Consider all possible starting points: long bestSize = 0; int bestStart = 0; for (int i = 0; i < last - finalMergeSize + 1; i++) { long sumSize = 0; for (int j = 0; j < finalMergeSize; j++) sumSize += Size(infos.Info(j + i)); if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize))); } } } else spec = null; } else spec = null; return spec; }
private void InitBlock(SegmentInfos enclosingInstance) { this.enclosingInstance = enclosingInstance; }
/// <summary>Checks if any merges are now necessary and returns a /// {@link MergePolicy.MergeSpecification} if so. A merge /// is necessary when there are more than {@link /// #setMergeFactor} segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the {@link /// MergeScheduler} to use concurrency. /// </summary> public override MergeSpecification FindMerges(SegmentInfos infos) { int numSegments = infos.Count; if (Verbose()) Message("findMerges: " + numSegments + " segments"); // Compute levels, which is just log (base mergeFactor) // of the size of each segment float[] levels = new float[numSegments]; float norm = (float) System.Math.Log(mergeFactor); for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); long size = Size(info); // Floor tiny segments if (size < 1) size = 1; levels[i] = (float) System.Math.Log(size) / norm; } float levelFloor; if (minMergeSize <= 0) levelFloor = (float) 0.0; else { levelFloor = (float) (System.Math.Log(minMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int start = 0; while (start < numSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start]; for (int i = 1 + start; i < numSegments; i++) { float level = levels[i]; if (level > maxLevel) maxLevel = level; } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel < levelFloor) // All remaining segments fall into the min level levelBottom = - 1.0F; else { levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) levelBottom = levelFloor; } int upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } upto--; } if (Verbose()) Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int i = start; i < end; i++) { SegmentInfo info = infos.Info(i); anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs); } if (!anyTooLarge) { if (spec == null) spec = new MergeSpecification(); if (Verbose()) Message(" " + start + " to " + end + ": add this merge"); spec.Add(MakeOneMerge(infos, infos.Range(start, end))); } else if (Verbose()) Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); start = end; end = start + mergeFactor; } start = 1 + upto; } return spec; }
/// <summary> Returns a copy of this instance, also copying each /// SegmentInfo. /// </summary> public override System.Object Clone() { SegmentInfos sis = new SegmentInfos(); for (int i = 0; i < this.Count; i++) { sis.Add(((SegmentInfo) this[i]).Clone()); } sis.counter = this.counter; sis.generation = this.generation; sis.lastGeneration = this.lastGeneration; // sis.pendingSegnOutput = this.pendingSegnOutput; // {{Aroush-2.9}} needed? sis.userData = new System.Collections.Generic.Dictionary<string, string>(userData); sis.version = this.version; return sis; }
/// <summary> Initialize the deleter: find all previous commits in /// the Directory, incref the files they reference, call /// the policy to let it delete commits. This will remove /// any files not referenced by any of the commits. /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, System.Collections.Generic.Dictionary<string, string> synced) { this.docWriter = docWriter; this.infoStream = infoStream; this.synced = synced; if (infoStream != null) { Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy); } this.policy = policy; this.directory = directory; // First pass: walk the files and initialize our ref // counts: long currentGen = segmentInfos.GetGeneration(); IndexFileNameFilter filter = IndexFileNameFilter.GetFilter(); System.String[] files = directory.ListAll(); CommitPoint currentCommitPoint = null; for (int i = 0; i < files.Length; i++) { System.String fileName = files[i]; if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) { // Add this file to refCounts with initial count 0: GetRefCount(fileName); if (fileName.StartsWith(IndexFileNames.SEGMENTS)) { // This is a commit (segments or segments_N), and // it's valid (<= the max gen). Load it, then // incref all files it refers to: if (infoStream != null) { Message("init: load commit \"" + fileName + "\""); } SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, fileName); } catch (System.IO.FileNotFoundException e) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth // between machines, it's very likely that the // dir listing will be stale and will claim a // file segments_X exists when in fact it // doesn't. So, we catch this and handle it // as if the file does not exist if (infoStream != null) { Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point"); } sis = null; } catch (System.IO.IOException e) { if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen) { throw e; } else { // Most likely we are opening an index that // has an aborted "future" commit, so suppress // exc in this case sis = null; } } if (sis != null) { CommitPoint commitPoint = new CommitPoint(this,commitsToDelete, directory, sis); if (sis.GetGeneration() == segmentInfos.GetGeneration()) { currentCommitPoint = commitPoint; } commits.Add(commitPoint); IncRef(sis, true); if (lastSegmentInfos == null || sis.GetGeneration() > lastSegmentInfos.GetGeneration()) { lastSegmentInfos = sis; } } } } } if (currentCommitPoint == null) { // We did not in fact see the segments_N file // corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds // the write lock. This can happen when the directory // listing was stale (eg when index accessed via NFS // client with stale directory listing cache). So we // try now to explicitly open this commit point: SegmentInfos sis = new SegmentInfos(); try { sis.Read(directory, segmentInfos.GetCurrentSegmentFileName()); } catch (System.IO.IOException e) { throw new CorruptIndexException("failed to locate current segments_N file"); } if (infoStream != null) Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName()); currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis); commits.Add(currentCommitPoint); IncRef(sis, true); } // We keep commits list in sorted order (oldest to newest): commits.Sort(); // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. System.Collections.Generic.IEnumerator<System.Collections.Generic.KeyValuePair<System.String, RefCount>> it = refCounts.GetEnumerator(); while (it.MoveNext()) { System.String fileName = (System.String) it.Current.Key; RefCount rc = (RefCount) refCounts[fileName]; if (0 == rc.count) { if (infoStream != null) { Message("init: removing unreferenced file \"" + fileName + "\""); } DeleteFile(fileName); } } // Finally, give policy a chance to remove things on // startup: policy.OnInit(commits); // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit Checkpoint(segmentInfos, false); startingCommitDeleted = currentCommitPoint.IsDeleted(); DeleteCommits(); }
/// <summary> Current version number from segments file.</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static long ReadCurrentVersion(Directory directory) { // Fully read the segments file: this ensures that it's // completely written so that if // IndexWriter.prepareCommit has been called (but not // yet commit), then the reader will still see itself as // current: SegmentInfos sis = new SegmentInfos(); sis.Read(directory); return sis.version; //return (long) ((System.Int64) new AnonymousClassFindSegmentsFile1(directory).Run()); //DIGY: AnonymousClassFindSegmentsFile1 can safely be deleted }
internal void IncRef(SegmentInfos segmentInfos, bool isCommit) { // If this is a commit point, also incRef the // segments_N file: System.Collections.Generic.IEnumerator<string> it = segmentInfos.Files(directory, isCommit).GetEnumerator(); while (it.MoveNext()) { IncRef(it.Current); } }
/// <summary> Returns userData from latest segments file</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static System.Collections.Generic.IDictionary<string, string> ReadCurrentUserData(Directory directory) { SegmentInfos sis = new SegmentInfos(); sis.Read(directory); return sis.GetUserData(); }
public CommitPoint(IndexFileDeleter enclosingInstance, System.Collections.ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos) { InitBlock(enclosingInstance); this.directory = directory; this.commitsToDelete = commitsToDelete; userData = segmentInfos.GetUserData(); segmentsFileName = segmentInfos.GetCurrentSegmentFileName(); version = segmentInfos.GetVersion(); generation = segmentInfos.GetGeneration(); files = segmentInfos.Files(directory, true); gen = segmentInfos.GetGeneration(); isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions(); System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory)); }
internal AnonymousClassFindSegmentsFile(SegmentInfos enclosingInstance, Mono.Lucene.Net.Store.Directory Param1):base(Param1) { InitBlock(enclosingInstance); }