public Info ( int i ) : Lucene.Net.Index.SegmentCommitInfo | ||
i | int | |
Résultat | Lucene.Net.Index.SegmentCommitInfo |
protected internal override DirectoryIndexReader DoReopen(SegmentInfos infos) { lock (this) { DirectoryIndexReader newReader; if (infos.Count == 1) { SegmentInfo si = infos.Info(0); if (segment.Equals(si.name) && si.GetUseCompoundFile() == this.si.GetUseCompoundFile()) { newReader = ReopenSegment(si); } else { // segment not referenced anymore, reopen not possible // or segment format changed newReader = SegmentReader.Get(readOnly, infos, infos.Info(0), false); } } else { if (readOnly) { return(new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] { this }, null, null)); } else { return(new MultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] { this }, null, null, false)); } } return(newReader); } }
public virtual void ListSegments() { for (int x = 0; x < infos.Size(); x++) { SegmentCommitInfo info = infos.Info(x); string sizeStr = string.Format(CultureInfo.InvariantCulture, "{0:###,###.###}", info.SizeInBytes()); Console.WriteLine(info.Info.Name + " " + sizeStr); } }
public virtual void TestAddIndexes() { Directory dir1 = NewDirectory(); Directory dir2 = NewDirectory(); IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); d1.Add(new TextField("f1", "first field", Field.Store.YES)); d1.Add(new TextField("f2", "second field", Field.Store.YES)); writer.AddDocument(d1); writer.Dispose(); writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d2 = new Document(); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; d2.Add(new TextField("f2", "second field", Field.Store.YES)); d2.Add(new Field("f1", "first field", customType2)); d2.Add(new TextField("f3", "third field", Field.Store.YES)); d2.Add(new TextField("f4", "fourth field", Field.Store.YES)); writer.AddDocument(d2); writer.Dispose(); writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); writer.AddIndexes(dir2); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir1); Assert.AreEqual(2, sis.Count); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); // make sure the ordering of the "external" segment is preserved Assert.AreEqual("f2", fis2.FieldInfo(0).Name); Assert.AreEqual("f1", fis2.FieldInfo(1).Name); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f4", fis2.FieldInfo(3).Name); dir1.Dispose(); dir2.Dispose(); }
public override System.Object DoBody() { SegmentInfos infos = new SegmentInfos(); infos.Read(directory); if (infos.Count == 1) { // index is optimized return SegmentReader.Get(infos, infos.Info(0), closeDirectory); } IndexReader[] readers = new IndexReader[infos.Count]; for (int i = 0; i < infos.Count; i++) readers[i] = SegmentReader.Get(infos.Info(i)); return new MultiReader(directory, infos, closeDirectory, readers); }
internal int docShift; // total # deleted docs that were compacted by this merge public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) { this.docMaps = docMaps; SegmentInfo firstSegment = merge.segments.Info(0); int i = 0; while (true) { SegmentInfo info = infos.Info(i); if (info.Equals(firstSegment)) { break; } minDocID += info.docCount; i++; } int numDocs = 0; for (int j = 0; j < docMaps.Length; i++, j++) { numDocs += infos.Info(i).docCount; System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j))); } maxDocID = minDocID + numDocs; starts = new int[docMaps.Length]; newStarts = new int[docMaps.Length]; starts[0] = minDocID; newStarts[0] = minDocID; for (i = 1; i < docMaps.Length; i++) { int lastDocCount = merge.segments.Info(i - 1).docCount; starts[i] = starts[i - 1] + lastDocCount; newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; } docShift = numDocs - mergedDocCount; // There are rare cases when docShift is 0. It happens // if you try to delete a docID that's out of bounds, // because the SegmentReader still allocates deletedDocs // and pretends it has deletions ... so we can't make // this assert here // assert docShift > 0; // Make sure it all adds up: System.Diagnostics.Debug.Assert(docShift == maxDocID - (newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1])); }
protected internal override object DoBody(System.String segmentFileName) { SegmentInfos infos = new SegmentInfos(); infos.Read(directory, segmentFileName); DirectoryIndexReader reader; if (infos.Count == 1) { // index is optimized reader = SegmentReader.Get(readOnly, infos, infos.Info(0), closeDirectory); } else if (readOnly) { reader = new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory); } else { reader = new MultiSegmentReader(directory, infos, closeDirectory, false); } reader.SetDeletionPolicy(deletionPolicy); return(reader); }
protected internal override object DoBody(string segmentFileName) { var sis = new SegmentInfos(); sis.Read(directory, segmentFileName); var readers = new SegmentReader[sis.Size()]; for (int i = sis.Size() - 1; i >= 0; i--) { System.IO.IOException prior = null; bool success = false; try { readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ); success = true; } catch (System.IO.IOException ex) { prior = ex; } finally { if (!success) { IOUtils.CloseWhileHandlingException(prior, readers); } } } return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false); }
public virtual void TestSingleMergeableSegment() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 5); AddDocs(writer, 3); // delete the last document, so that the last segment is merged. writer.DeleteDocuments(new Term("id", "10")); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Count); Assert.IsFalse(sis.Info(2).HasDeletions); }
internal virtual String SegString(Directory dir) { var b = new System.Text.StringBuilder(); int numSegments = segments.Count; for (int i = 0; i < numSegments; i++) { if (i > 0) { b.Append(' '); } b.Append(segments.Info(i).SegString(dir)); } if (info != null) { b.Append(" into ").Append(info.name); } if (optimize) { b.Append(" [optimize]"); } if (mergeDocStores) { b.Append(" [mergeDocStores]"); } return(b.ToString()); }
public virtual void TestByteSizeLimit() { // tests that the max merge size constraint is applied during forceMerge. Directory dir = new RAMDirectory(); // Prepare an index w/ several small segments and a large one. IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); const int numSegments = 15; for (int i = 0; i < numSegments; i++) { int numDocs = i == 7 ? 30 : 1; AddDocs(writer, numDocs); } writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); double min = sis.Info(0).SizeInBytes(); conf = NewWriterConfig(); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20); conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 3 segments in the index, because one of them exceeds the size limit sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); }
private int[] starts; // 1st docno for each segment #endregion Fields #region Constructors /// <summary>Construct reading the named set of readers. </summary> internal MultiSegmentReader(Directory directory, SegmentInfos sis, bool closeDirectory, bool readOnly) : base(directory, sis, closeDirectory, readOnly) { // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. SegmentReader[] readers = new SegmentReader[sis.Count]; for (int i = sis.Count - 1; i >= 0; i--) { try { readers[i] = SegmentReader.Get(readOnly, sis.Info(i)); } catch (System.IO.IOException e) { // Close all readers we had opened: for (i++; i < sis.Count; i++) { try { readers[i].Close(); } catch (System.IO.IOException) { // keep going - we want to clean up as much as possible } } throw e; } } Initialize(readers); }
/// <summary>Merges all segments from an array of indexes into this index. /// /// <p>This may be used to parallelize batch indexing. A large document /// collection can be broken into sub-collections. Each sub-collection can be /// indexed in parallel, on a different thread, process or machine. The /// complete index can then be created by merging sub-collection indexes /// with this method. /// /// <p>After this completes, the index is optimized. /// </summary> public virtual void AddIndexes(Directory[] dirs) { lock (this) { Optimize(); // start with zero or 1 seg int start = segmentInfos.Count; for (int i = 0; i < dirs.Length; i++) { SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.Read(dirs[i]); for (int j = 0; j < sis.Count; j++) { segmentInfos.Add(sis.Info(j)); // add each info } } // merge newly added segments in log(n) passes while (segmentInfos.Count > start + mergeFactor) { for (int base_Renamed = start; base_Renamed < segmentInfos.Count; base_Renamed++) { int end = System.Math.Min(segmentInfos.Count, base_Renamed + mergeFactor); if (end - base_Renamed > 1) { MergeSegments(base_Renamed, end); } } } Optimize(); // final cleanup } }
public virtual void TestSingleMergeableTooLargeSegment() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 5, true); // delete the last document writer.DeleteDocuments(new Term("id", "4")); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 2; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); Assert.IsTrue(sis.Info(0).HasDeletions()); }
protected internal override object DoBody(string segmentFileName) { var sis = new SegmentInfos(); sis.Read(directory, segmentFileName); var readers = new SegmentReader[sis.Count]; for (int i = sis.Count - 1; i >= 0; i--) { System.IO.IOException prior = null; bool success = false; try { readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ); success = true; } catch (System.IO.IOException ex) { prior = ex; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(prior, readers); } } } return(new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false)); }
protected internal override object DoBody(string segmentFileName) { var sis = new SegmentInfos(); sis.Read(directory, segmentFileName); var readers = new SegmentReader[sis.Count]; // LUCENENET: Ported over changes from 4.8.1 to this method for (int i = sis.Count - 1; i >= 0; i--) { //IOException prior = null; // LUCENENET: Not used bool success = false; try { readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(readers); } } } return(new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false)); }
/// <summary>Construct reading the named set of readers. </summary> internal MultiSegmentReader(Directory directory, SegmentInfos sis, bool closeDirectory, bool readOnly) : base(directory, sis, closeDirectory, readOnly) { // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. SegmentReader[] readers = new SegmentReader[sis.Count]; for (int i = sis.Count - 1; i >= 0; i--) { try { readers[i] = SegmentReader.Get(readOnly, sis.Info(i)); } catch (System.IO.IOException e) { // Close all readers we had opened: for (i++; i < sis.Count; i++) { try { readers[i].Close(); } catch (System.IO.IOException) { // keep going - we want to clean up as much as possible } } throw e; } } Initialize(readers); }
protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) { bool doCFS; if (!useCompoundFile) { doCFS = false; } else if (noCFSRatio == 1.0) { doCFS = true; } else { long totSize = 0; for (int i = 0; i < infos.Count; i++) { totSize += Size(infos.Info(i)); } long mergeSize = 0; for (int i = 0; i < infosToMerge.Count; i++) { mergeSize += Size(infosToMerge.Info(i)); } doCFS = mergeSize <= noCFSRatio * totSize; } return(new OneMerge(infosToMerge, doCFS)); }
public virtual void TestAddIndexes() { Directory dir1 = NewDirectory(); Directory dir2 = NewDirectory(); IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); d1.Add(new TextField("f1", "first field", Field.Store.YES)); d1.Add(new TextField("f2", "second field", Field.Store.YES)); writer.AddDocument(d1); writer.Dispose(); writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d2 = new Document(); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; d2.Add(new TextField("f2", "second field", Field.Store.YES)); d2.Add(new Field("f1", "first field", customType2)); d2.Add(new TextField("f3", "third field", Field.Store.YES)); d2.Add(new TextField("f4", "fourth field", Field.Store.YES)); writer.AddDocument(d2); writer.Dispose(); writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); writer.AddIndexes(dir2); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir1); Assert.AreEqual(2, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); // make sure the ordering of the "external" segment is preserved Assert.AreEqual("f2", fis2.FieldInfo(0).Name); Assert.AreEqual("f1", fis2.FieldInfo(1).Name); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f4", fis2.FieldInfo(3).Name); dir1.Dispose(); dir2.Dispose(); }
public override System.Object DoBody() { SegmentInfos infos = new SegmentInfos(); infos.Read(directory); if (infos.Count == 1) { // index is optimized return(SegmentReader.Get(infos, infos.Info(0), closeDirectory)); } IndexReader[] readers = new IndexReader[infos.Count]; for (int i = 0; i < infos.Count; i++) { readers[i] = SegmentReader.Get(infos.Info(i)); } return(new MultiReader(directory, infos, closeDirectory, readers)); }
internal int docShift; // total # deleted docs that were compacted by this merge public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) { this.docMaps = docMaps; SegmentInfo firstSegment = merge.segments.Info(0); int i = 0; while (true) { SegmentInfo info = infos.Info(i); if (info.Equals(firstSegment)) break; minDocID += info.docCount; i++; } int numDocs = 0; for (int j = 0; j < docMaps.Length; i++, j++) { numDocs += infos.Info(i).docCount; System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j))); } maxDocID = minDocID + numDocs; starts = new int[docMaps.Length]; newStarts = new int[docMaps.Length]; starts[0] = minDocID; newStarts[0] = minDocID; for (i = 1; i < docMaps.Length; i++) { int lastDocCount = merge.segments.Info(i - 1).docCount; starts[i] = starts[i - 1] + lastDocCount; newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; } docShift = numDocs - mergedDocCount; // There are rare cases when docShift is 0. It happens // if you try to delete a docID that's out of bounds, // because the SegmentReader still allocates deletedDocs // and pretends it has deletions ... so we can't make // this assert here // assert docShift > 0; // Make sure it all adds up: System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1])); }
/// <summary> /// Returns the merges necessary to merge the index, taking the max merge /// size or max merge docs into consideration. this method attempts to respect /// the <paramref name="maxNumSegments"/> parameter, however it might be, due to size /// constraints, that more than that number of segments will remain in the /// index. Also, this method does not guarantee that exactly /// <paramref name="maxNumSegments"/> will remain, but <= that number. /// </summary> private MergeSpecification FindForcedMergesSizeLimit(SegmentInfos infos, int maxNumSegments, int last) { MergeSpecification spec = new MergeSpecification(); IList <SegmentCommitInfo> segments = infos.AsList(); int start = last - 1; while (start >= 0) { SegmentCommitInfo info = infos.Info(start); if (Size(info) > m_maxMergeSizeForForcedMerge || SizeDocs(info) > m_maxMergeDocs) { if (IsVerbose) { Message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + m_maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + m_maxMergeDocs + ")"); } // need to skip that segment + add a merge for the 'right' segments, // unless there is only 1 which is merged. if (last - start - 1 > 1 || (start != last - 1 && !IsMerged(infos, infos.Info(start + 1)))) { // there is more than 1 segment to the right of // this one, or a mergeable single segment. spec.Add(new OneMerge(segments.SubList(start + 1, last))); } last = start; } else if (last - start == m_mergeFactor) { // mergeFactor eligible segments were found, add them as a merge. spec.Add(new OneMerge(segments.SubList(start, last))); last = start; } --start; } // Add any left-over segments, unless there is just 1 // already fully merged if (last > 0 && (++start + 1 < last || !IsMerged(infos, infos.Info(start)))) { spec.Add(new OneMerge(segments.SubList(start, last))); } return(spec.Merges.Count == 0 ? null : spec); }
internal ReaderCommit(SegmentInfos infos, Directory dir) { segmentsFileName = infos.GetCurrentSegmentFileName(); this.dir = dir; int size = infos.Count; files = new List <string>(size); files.Add(segmentsFileName); for (int i = 0; i < size; i++) { SegmentInfo info = infos.Info(i); if (info.dir == dir) { SupportClass.CollectionsSupport.AddAll(info.Files(), files); } } version = infos.GetVersion(); generation = infos.GetGeneration(); isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions(); }
public override System.Object DoBody(System.String segmentFileName) { SegmentInfos infos = new SegmentInfos(); infos.Read(directory, segmentFileName); if (infos.Count == 1) { // index is optimized return SegmentReader.Get(infos, infos.Info(0), closeDirectory); } else { // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. IndexReader[] readers = new IndexReader[infos.Count]; for (int i = infos.Count - 1; i >= 0; i--) { try { readers[i] = SegmentReader.Get(infos.Info(i)); } catch (System.IO.IOException e) { // Close all readers we had opened: for (i++; i < infos.Count; i++) { readers[i].Close(); } throw e; } } return new MultiReader(directory, infos, closeDirectory, readers); } }
internal void DecRef(SegmentInfos segmentInfos) { int size = segmentInfos.Count; for (int i = 0; i < size; i++) { SegmentInfo segmentInfo = segmentInfos.Info(i); if (segmentInfo.dir == directory) { DecRef(segmentInfo.Files()); } } }
public override System.Object DoBody(System.String segmentFileName) { SegmentInfos infos = new SegmentInfos(); infos.Read(directory, segmentFileName); if (infos.Count == 1) { // index is optimized return(SegmentReader.Get(infos, infos.Info(0), closeDirectory)); } else { // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. IndexReader[] readers = new IndexReader[infos.Count]; for (int i = infos.Count - 1; i >= 0; i--) { try { readers[i] = SegmentReader.Get(infos.Info(i)); } catch (System.IO.IOException e) { // Close all readers we had opened: for (i++; i < infos.Count; i++) { readers[i].Close(); } throw e; } } return(new MultiReader(directory, infos, closeDirectory, readers)); } }
public CommitPoint(IndexFileDeleter enclosingInstance, ICollection <IndexCommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos) { InitBlock(enclosingInstance); this.directory = directory; this.commitsToDelete = commitsToDelete; segmentsFileName = segmentInfos.GetCurrentSegmentFileName(); version = segmentInfos.GetVersion(); generation = segmentInfos.GetGeneration(); int size = segmentInfos.Count; files = new List <string>(size); files.Add(segmentsFileName); gen = segmentInfos.GetGeneration(); for (int i = 0; i < size; i++) { SegmentInfo segmentInfo = segmentInfos.Info(i); if (segmentInfo.dir == Enclosing_Instance.directory) { SupportClass.CollectionsSupport.AddAll(segmentInfo.Files(), files); } } isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions(); }
/// <summary> /// Finds merges necessary to expunge all deletes from the /// index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. /// </summary> public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos, IndexWriter writer) { this.writer = writer; int numSegments = segmentInfos.Count; Message("findMergesToExpungeDeletes: " + numSegments + " segments"); MergeSpecification spec = new MergeSpecification(); int firstSegmentWithDeletions = -1; for (int i = 0; i < numSegments; i++) { SegmentInfo info = segmentInfos.Info(i); if (info.HasDeletions()) { Message(" segment " + info.name + " has deletions"); if (firstSegmentWithDeletions == -1) { firstSegmentWithDeletions = i; } else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with // deletions, so force a merge now: Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, i), useCompoundFile)); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { // End of a sequence of segments with deletions, so, // merge those past segments even if it's fewer than // mergeFactor segments Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, i), useCompoundFile)); firstSegmentWithDeletions = -1; } } if (firstSegmentWithDeletions != -1) { Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); spec.Add(new OneMerge(segmentInfos.Range(firstSegmentWithDeletions, numSegments), useCompoundFile)); } return(spec); }
/// <summary>Returns the number of documents currently in this index. </summary> public virtual int DocCount() { lock (this) { int count = 0; for (int i = 0; i < segmentInfos.Count; i++) { SegmentInfo si = segmentInfos.Info(i); count += si.docCount; } return(count); } }
public CommitPoint(IndexFileDeleter enclosingInstance, System.Collections.ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos) { InitBlock(enclosingInstance); this.directory = directory; this.commitsToDelete = commitsToDelete; userData = segmentInfos.GetUserData(); segmentsFileName = segmentInfos.GetCurrentSegmentFileName(); version = segmentInfos.GetVersion(); generation = segmentInfos.GetGeneration(); files = segmentInfos.Files(directory, true); gen = segmentInfos.GetGeneration(); isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions(); System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory)); }
protected internal override DirectoryIndexReader DoReopen(SegmentInfos infos) { lock (this) { if (infos.Count == 1) { // The index has only one segment now, so we can't refresh the MultiSegmentReader. // Return a new SegmentReader instead SegmentReader newReader = SegmentReader.Get(infos, infos.Info(0), false); return(newReader); } else { return(new MultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache)); } } }
/// <summary> Rolls back state to just before the commit (this is /// called by commit() if there is some exception while /// committing). /// </summary> internal virtual void RollbackCommit() { if (directoryOwner) { for (int i = 0; i < segmentInfos.Count; i++) { // Rollback each segmentInfo. Because the // SegmentReader holds a reference to the // SegmentInfo we can't [easily] just replace // segmentInfos, so we reset it in place instead: segmentInfos.Info(i).Reset(rollbackSegmentInfos.Info(i)); } rollbackSegmentInfos = null; } hasChanges = rollbackHasChanges; }
private bool IsOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Dictionary <SegmentInfo, SegmentInfo> segmentsToOptimize) { int numSegments = infos.Count; int numToOptimize = 0; SegmentInfo optimizeInfo = null; for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) { SegmentInfo info = infos.Info(i); if (segmentsToOptimize.ContainsKey(info)) { numToOptimize++; optimizeInfo = info; } } return(numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(writer, optimizeInfo))); }
private bool IsOptimized(SegmentInfos infos, int maxNumSegments, ISet <SegmentInfo> segmentsToOptimize, IState state) { int numSegments = infos.Count; int numToOptimize = 0; SegmentInfo optimizeInfo = null; for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) { SegmentInfo info = infos.Info(i); if (segmentsToOptimize.Contains(info)) { numToOptimize++; optimizeInfo = info; } } return(numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo, state))); }
private bool IsOptimized(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize) { int numSegments = infos.Count; int numToOptimize = 0; SegmentInfo optimizeInfo = null; for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) { SegmentInfo info = infos.Info(i); if (segmentsToOptimize.Contains(info)) { numToOptimize++; optimizeInfo = info; } } return(numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo))); }
internal void IncRef(SegmentInfos segmentInfos, bool isCommit) { int size = segmentInfos.Count; for (int i = 0; i < size; i++) { SegmentInfo segmentInfo = segmentInfos.Info(i); if (segmentInfo.dir == directory) { IncRef(segmentInfo.Files()); } } if (isCommit) { // Since this is a commit point, also incref its // segments_N file: GetRefCount(segmentInfos.GetCurrentSegmentFileName()).IncRef(); } }
protected internal override System.Object DoBody(System.String segmentFileName) { SegmentInfos infos = new SegmentInfos(); infos.Read(directory, segmentFileName); DirectoryIndexReader reader; if (infos.Count == 1) { // index is optimized reader = SegmentReader.Get(infos, infos.Info(0), closeDirectory); } else { reader = new MultiSegmentReader(directory, infos, closeDirectory); } reader.SetDeletionPolicy(deletionPolicy); return reader; }
internal virtual void Init(Directory directory, SegmentInfos segmentInfos, bool closeDirectory, bool readOnly) { this.directory = directory; this.segmentInfos = segmentInfos; this.closeDirectory = closeDirectory; this.readOnly = readOnly; if (!readOnly && segmentInfos != null) { // we assume that this segments_N was properly sync'd prior for (int i = 0; i < segmentInfos.Count; i++) { SegmentInfo info = segmentInfos.Info(i); IList <string> files = info.Files(); for (int j = 0; j < files.Count; j++) { synced[files[j]] = files[j]; } } } }
/// <summary> /// Returns <c>true</c> if the number of segments eligible for /// merging is less than or equal to the specified /// <paramref name="maxNumSegments"/>. /// </summary> protected virtual bool IsMerged(SegmentInfos infos, int maxNumSegments, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge) { int numSegments = infos.Count; int numToMerge = 0; SegmentCommitInfo mergeInfo = null; bool segmentIsOriginal = false; for (int i = 0; i < numSegments && numToMerge <= maxNumSegments; i++) { SegmentCommitInfo info = infos.Info(i); bool?isOriginal; segmentsToMerge.TryGetValue(info, out isOriginal); if (isOriginal != null) { segmentIsOriginal = isOriginal.Value; numToMerge++; mergeInfo = info; } } return(numToMerge <= maxNumSegments && (numToMerge != 1 || !segmentIsOriginal || IsMerged(infos, mergeInfo))); }
public CommitPoint(IndexFileDeleter enclosingInstance, System.Collections.ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos) { InitBlock(enclosingInstance); this.directory = directory; this.commitsToDelete = commitsToDelete; userData = segmentInfos.GetUserData(); segmentsFileName = segmentInfos.GetCurrentSegmentFileName(); version = segmentInfos.GetVersion(); generation = segmentInfos.GetGeneration(); files = segmentInfos.Files(directory, true); gen = segmentInfos.GetGeneration(); isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions(); System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory)); }
/// <summary>This constructor is only used for <see cref="Reopen()" /> </summary> internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, IEnumerable<KeyValuePair<string, byte[]>> oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor) { this.internalDirectory = directory; this.readOnly = readOnly; this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(infos.Files(directory, true)); } // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name IDictionary<string, int> segmentReaders = new HashMap<string, int>(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0; i < oldReaders.Length; i++) { segmentReaders[oldReaders[i].SegmentName] = i; } } var newReaders = new SegmentReader[infos.Count]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers var readerShared = new bool[infos.Count]; for (int i = infos.Count - 1; i >= 0; i--) { // find SegmentReader for this segment if (!segmentReaders.ContainsKey(infos.Info(i).name)) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[segmentReaders[infos.Info(i).name]]; } bool success = false; try { SegmentReader newReader; if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].SegmentInfo.GetUseCompoundFile()) { // We should never see a totally new segment during cloning System.Diagnostics.Debug.Assert(!doClone); // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor); } else { newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly); } if (newReader == newReaders[i]) { // this reader will be shared between the old and the new one, // so we must incRef it readerShared[i] = true; newReader.IncRef(); } else { readerShared[i] = false; newReaders[i] = newReader; } success = true; } finally { if (!success) { for (i++; i < infos.Count; i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].Close(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].DecRef(); } } catch (System.IO.IOException) { // keep going - we want to clean up as much as possible } } } } } } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache Initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache != null) { foreach(var entry in oldNormsCache) { String field = entry.Key; if (!HasNorms(field)) { continue; } byte[] oldBytes = entry.Value; var bytes = new byte[MaxDoc]; for (int i = 0; i < subReaders.Length; i++) { int oldReaderIndex = segmentReaders[subReaders[i].SegmentName]; // this SegmentReader was not re-opened, we can copy all of its norms if (segmentReaders.ContainsKey(subReaders[i].SegmentName) && (oldReaders[oldReaderIndex] == subReaders[i] || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field])) { // we don't have to synchronize here: either this constructor is called from a SegmentReader, // in which case no old norms cache is present, or it is called from MultiReader.reopen(), // which is synchronized Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]); } else { subReaders[i].Norms(field, bytes, starts[i]); } } normsCache[field] = bytes; // update cache } } }
internal bool ApplyDeletes(SegmentInfos infos) { lock (this) { if (!HasDeletes()) return false; if (infoStream != null) Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments."); int infosEnd = infos.Count; int docStart = 0; bool any = false; for (int i = 0; i < infosEnd; i++) { // Make sure we never attempt to apply deletes to // segment in external dir System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory); SegmentReader reader = writer.readerPool.Get(infos.Info(i), false); try { any |= ApplyDeletes(reader, docStart); docStart += reader.MaxDoc(); } finally { writer.readerPool.Release(reader); } } deletesFlushed.Clear(); return any; } }
/// <summary> /// Returns a <seealso cref="Status"/> instance detailing /// the state of the index. /// </summary> /// <param name="onlySegments"> list of specific segment names to check /// /// <p>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. </param> public virtual Status DoCheckIndex(IList<string> onlySegments) { NumberFormatInfo nf = CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.Dir = Dir; try { sis.Read(Dir); } catch (Exception t) { Msg(infoStream, "ERROR: could not read any segments file in directory"); result.MissingSegments = true; if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } return result; } // find the oldest and newest segment versions string oldest = Convert.ToString(int.MaxValue), newest = Convert.ToString(int.MinValue); string oldSegs = null; bool foundNonNullVersion = false; IComparer<string> versionComparator = StringHelper.VersionComparator; foreach (SegmentCommitInfo si in sis.Segments) { string version = si.Info.Version; if (version == null) { // pre-3.1 segment oldSegs = "pre-3.1"; } else { foundNonNullVersion = true; if (versionComparator.Compare(version, oldest) < 0) { oldest = version; } if (versionComparator.Compare(version, newest) > 0) { newest = version; } } } int numSegments = sis.Size(); string segmentsFileName = sis.SegmentsFileName; // note: we only read the format byte (required preamble) here! IndexInput input = null; try { input = Dir.OpenInput(segmentsFileName, IOContext.READONCE); } catch (Exception t) { Msg(infoStream, "ERROR: could not open segments file in directory"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } result.CantOpenSegments = true; return result; } int format = 0; try { format = input.ReadInt(); } catch (Exception t) { Msg(infoStream, "ERROR: could not read segment file version in directory"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } result.MissingSegmentVersion = true; return result; } finally { if (input != null) { input.Dispose(); } } string sFormat = ""; bool skip = false; result.SegmentsFileName = segmentsFileName; result.NumSegments = numSegments; result.UserData = sis.UserData; string userDataString; if (sis.UserData.Count > 0) { userDataString = " userData=" + sis.UserData; } else { userDataString = ""; } string versionString = null; if (oldSegs != null) { if (foundNonNullVersion) { versionString = "versions=[" + oldSegs + " .. " + newest + "]"; } else { versionString = "version=" + oldSegs; } } else { versionString = oldest.Equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]"); } Msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " format=" + sFormat + userDataString); if (onlySegments != null) { result.Partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); foreach (string s in onlySegments) { infoStream.Write(" " + s); } } result.SegmentsChecked.AddRange(onlySegments); Msg(infoStream, ":"); } if (skip) { Msg(infoStream, "\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.ToolOutOfDate = true; return result; } result.NewSegments = (SegmentInfos)sis.Clone(); result.NewSegments.Clear(); result.MaxSegmentName = -1; for (int i = 0; i < numSegments; i++) { SegmentCommitInfo info = sis.Info(i); int segmentName = 0; try { segmentName = int.Parse /*Convert.ToInt32*/(info.Info.Name.Substring(1)); } catch { } if (segmentName > result.MaxSegmentName) { result.MaxSegmentName = segmentName; } if (onlySegments != null && !onlySegments.Contains(info.Info.Name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.SegmentInfos.Add(segInfoStat); Msg(infoStream, " " + (1 + i) + " of " + numSegments + ": name=" + info.Info.Name + " docCount=" + info.Info.DocCount); segInfoStat.Name = info.Info.Name; segInfoStat.DocCount = info.Info.DocCount; string version = info.Info.Version; if (info.Info.DocCount <= 0 && version != null && versionComparator.Compare(version, "4.5") >= 0) { throw new Exception("illegal number of documents: maxDoc=" + info.Info.DocCount); } int toLoseDocCount = info.Info.DocCount; AtomicReader reader = null; try { Codec codec = info.Info.Codec; Msg(infoStream, " codec=" + codec); segInfoStat.Codec = codec; Msg(infoStream, " compound=" + info.Info.UseCompoundFile); segInfoStat.Compound = info.Info.UseCompoundFile; Msg(infoStream, " numFiles=" + info.Files().Count); segInfoStat.NumFiles = info.Files().Count; segInfoStat.SizeMB = info.SizeInBytes() / (1024.0 * 1024.0); if (info.Info.GetAttribute(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY) == null) { // don't print size in bytes if its a 3.0 segment with shared docstores Msg(infoStream, " size (MB)=" + segInfoStat.SizeMB.ToString(nf)); } IDictionary<string, string> diagnostics = info.Info.Diagnostics; segInfoStat.Diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(infoStream, " diagnostics = " + diagnostics); } if (!info.HasDeletions()) { Msg(infoStream, " no deletions"); segInfoStat.HasDeletions = false; } else { Msg(infoStream, " has deletions [delGen=" + info.DelGen + "]"); segInfoStat.HasDeletions = true; segInfoStat.DeletionsGen = info.DelGen; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT); Msg(infoStream, "OK"); segInfoStat.OpenReaderPassed = true; if (infoStream != null) { infoStream.Write(" test: check integrity....."); } reader.CheckIntegrity(); Msg(infoStream, "OK"); if (infoStream != null) { infoStream.Write(" test: check live docs....."); } int numDocs = reader.NumDocs; toLoseDocCount = numDocs; if (reader.HasDeletions) { if (reader.NumDocs != info.Info.DocCount - info.DelCount) { throw new Exception("delete count mismatch: info=" + (info.Info.DocCount - info.DelCount) + " vs reader=" + reader.NumDocs); } if ((info.Info.DocCount - reader.NumDocs) > reader.MaxDoc) { throw new Exception("too many deleted docs: maxDoc()=" + reader.MaxDoc + " vs del count=" + (info.Info.DocCount - reader.NumDocs)); } if (info.Info.DocCount - numDocs != info.DelCount) { throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs)); } Bits liveDocs = reader.LiveDocs; if (liveDocs == null) { throw new Exception("segment should have deletions, but liveDocs is null"); } else { int numLive = 0; for (int j = 0; j < liveDocs.Length(); j++) { if (liveDocs.Get(j)) { numLive++; } } if (numLive != numDocs) { throw new Exception("liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive); } } segInfoStat.NumDeleted = info.Info.DocCount - numDocs; Msg(infoStream, "OK [" + (segInfoStat.NumDeleted) + " deleted docs]"); } else { if (info.DelCount != 0) { throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs)); } Bits liveDocs = reader.LiveDocs; if (liveDocs != null) { // its ok for it to be non-null here, as long as none are set right? for (int j = 0; j < liveDocs.Length(); j++) { if (!liveDocs.Get(j)) { throw new Exception("liveDocs mismatch: info says no deletions but doc " + j + " is deleted."); } } } Msg(infoStream, "OK"); } if (reader.MaxDoc != info.Info.DocCount) { throw new Exception("SegmentReader.maxDoc() " + reader.MaxDoc + " != SegmentInfos.docCount " + info.Info.DocCount); } // Test getFieldInfos() if (infoStream != null) { infoStream.Write(" test: fields.............."); } FieldInfos fieldInfos = reader.FieldInfos; Msg(infoStream, "OK [" + fieldInfos.Size() + " fields]"); segInfoStat.NumFields = fieldInfos.Size(); // Test Field Norms segInfoStat.FieldNormStatus = TestFieldNorms(reader, infoStream); // Test the Term Index segInfoStat.TermIndexStatus = TestPostings(reader, infoStream, Verbose); // Test Stored Fields segInfoStat.StoredFieldStatus = TestStoredFields(reader, infoStream); // Test Term Vectors segInfoStat.TermVectorStatus = TestTermVectors(reader, infoStream, Verbose, CrossCheckTermVectors_Renamed); segInfoStat.DocValuesStatus = TestDocValues(reader, infoStream); // Rethrow the first exception we encountered // this will cause stats for failed segments to be incremented properly if (segInfoStat.FieldNormStatus.Error != null) { throw new Exception("Field Norm test failed"); } else if (segInfoStat.TermIndexStatus.Error != null) { throw new Exception("Term Index test failed"); } else if (segInfoStat.StoredFieldStatus.Error != null) { throw new Exception("Stored Field test failed"); } else if (segInfoStat.TermVectorStatus.Error != null) { throw new Exception("Term Vector test failed"); } else if (segInfoStat.DocValuesStatus.Error != null) { throw new Exception("DocValues test failed"); } Msg(infoStream, ""); } catch (Exception t) { Msg(infoStream, "FAILED"); string comment; comment = "fixIndex() would remove reference to this segment"; Msg(infoStream, " WARNING: " + comment + "; full exception:"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } Msg(infoStream, ""); result.TotLoseDocCount += toLoseDocCount; result.NumBadSegments++; continue; } finally { if (reader != null) { reader.Dispose(); } } // Keeper result.NewSegments.Add((SegmentCommitInfo)info.Clone()); } if (0 == result.NumBadSegments) { result.Clean = true; } else { Msg(infoStream, "WARNING: " + result.NumBadSegments + " broken segments (containing " + result.TotLoseDocCount + " documents) detected"); } if (!(result.ValidCounter = (result.MaxSegmentName < sis.Counter))) { result.Clean = false; result.NewSegments.Counter = result.MaxSegmentName + 1; Msg(infoStream, "ERROR: Next segment name counter " + sis.Counter + " is not greater than max segment name " + result.MaxSegmentName); } if (result.Clean) { Msg(infoStream, "No problems were detected with this index.\n"); } return result; }
/// <summary> Merges all segments from an array of indexes into this /// index. /// /// <p/>This may be used to parallelize batch indexing. A large document /// collection can be broken into sub-collections. Each sub-collection can be /// indexed in parallel, on a different thread, process or machine. The /// complete index can then be created by merging sub-collection indexes /// with this method. /// /// <p/><b>NOTE:</b> the index in each Directory must not be /// changed (opened by a writer) while this method is /// running. This method does not acquire a write lock in /// each input Directory, so it is up to the caller to /// enforce this. /// /// <p/><b>NOTE:</b> while this is running, any attempts to /// add or delete documents (with another thread) will be /// paused until this method completes. /// /// <p/>This method is transactional in how Exceptions are /// handled: it does not commit a new segments_N file until /// all indexes are added. This means if an Exception /// occurs (for example disk full), then either no indexes /// will have been added or they all will have been.<p/> /// /// <p/>Note that this requires temporary free space in the /// Directory up to 2X the sum of all input indexes /// (including the starting index). If readers/searchers /// are open against the starting index, then temporary /// free space required will be higher by the size of the /// starting index (see {@link #Optimize()} for details). /// <p/> /// /// <p/>Once this completes, the final size of the index /// will be less than the sum of all input index sizes /// (including the starting index). It could be quite a /// bit smaller (if there were many pending deletes) or /// just slightly smaller.<p/> /// /// <p/> /// This requires this index not be among those to be added. /// /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError /// you should immediately close the writer. See <a /// href="#OOME">above</a> for details.<p/> /// /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public virtual void AddIndexesNoOptimize(Directory[] dirs) { EnsureOpen(); NoDupDirs(dirs); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); try { if (infoStream != null) Message("flush at addIndexesNoOptimize"); Flush(true, false, true); bool success = false; StartTransaction(false); try { int docCount = 0; lock (this) { EnsureOpen(); for (int i = 0; i < dirs.Length; i++) { if (directory == dirs[i]) { // cannot add this index: segments may be deleted in merge before added throw new System.ArgumentException("Cannot add this index to itself"); } SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.Read(dirs[i]); for (int j = 0; j < sis.Count; j++) { SegmentInfo info = sis.Info(j); System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info), "dup info dir=" + info.dir + " name=" + info.name); docCount += info.docCount; segmentInfos.Add(info); // add each info } } } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); MaybeMerge(); EnsureOpen(); // If after merging there remain segments in the index // that are in a different directory, just copy these // over into our index. This is necessary (before // finishing the transaction) to avoid leaving the // index in an unusable (inconsistent) state. ResolveExternalSegments(); EnsureOpen(); success = true; } finally { if (success) { CommitTransaction(); } else { RollbackTransaction(); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexesNoOptimize"); } finally { if (docWriter != null) { docWriter.ResumeAllThreads(); } } }
/// <summary>This contructor is only used for {@link #Reopen()} </summary> internal MultiSegmentReader(Directory directory, SegmentInfos infos, bool closeDirectory, SegmentReader[] oldReaders, int[] oldStarts, Dictionary<string, byte[]> oldNormsCache, bool readOnly) : base(directory, infos, closeDirectory, readOnly) { // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name Dictionary<string, int> segmentReaders = new Dictionary<string, int>(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0; i < oldReaders.Length; i++) { segmentReaders[oldReaders[i].GetSegmentName()] = i; } } SegmentReader[] newReaders = new SegmentReader[infos.Count]; // remember which readers are shared between the old and the re-opened // MultiSegmentReader - we have to incRef those readers bool[] readerShared = new bool[infos.Count]; for (int i = infos.Count - 1; i >= 0; i--) { // find SegmentReader for this segment int oldReaderIndex; if (!segmentReaders.ContainsKey(infos.Info(i).name)) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { oldReaderIndex = segmentReaders[infos.Info(i).name]; // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[oldReaderIndex]; } bool success = false; try { SegmentReader newReader; if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].GetSegmentInfo().GetUseCompoundFile()) { // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader.Get(readOnly, infos.Info(i)); } else { newReader = (SegmentReader) newReaders[i].ReopenSegment(infos.Info(i)); } if (newReader == newReaders[i]) { // this reader will be shared between the old and the new one, // so we must incRef it readerShared[i] = true; newReader.IncRef(); } else { readerShared[i] = false; newReaders[i] = newReader; } success = true; } finally { if (!success) { for (i++; i < infos.Count; i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].Close(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].DecRef(); } } catch (System.IO.IOException) { // keep going - we want to clean up as much as possible } } } } } } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache Initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache != null) { IEnumerator<KeyValuePair<string, byte[]>> it = oldNormsCache.GetEnumerator(); while (it.MoveNext()) { KeyValuePair<string, byte[]> entry = it.Current; string field = entry.Key; if (!HasNorms(field)) { continue; } byte[] oldBytes = entry.Value; byte[] bytes = new byte[MaxDoc()]; for (int i = 0; i < subReaders.Length; i++) { if (segmentReaders.ContainsKey(subReaders[i].GetSegmentName())) { int oldReaderIndex = segmentReaders[subReaders[i].GetSegmentName()]; // this SegmentReader was not re-opened, we can copy all of its norms if (oldReaders[oldReaderIndex] == subReaders[i] || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field]) { // we don't have to synchronize here: either this constructor is called from a SegmentReader, // in which case no old norms cache is present, or it is called from MultiReader.reopen(), // which is synchronized System.Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]); } else { subReaders[i].Norms(field, bytes, starts[i]); } } else { subReaders[i].Norms(field, bytes, starts[i]); } } normsCache[field] = bytes; // update cache } } }
internal ReaderCommit(SegmentInfos infos, Directory dir) { segmentsFileName = infos.GetCurrentSegmentFileName(); this.dir = dir; userData = infos.UserData; files = infos.Files(dir, true); version = infos.Version; generation = infos.Generation; isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions(); }
protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) { bool doCFS; if (!useCompoundFile) { doCFS = false; } else if (noCFSRatio == 1.0) { doCFS = true; } else { long totSize = 0; for (int i = 0; i < infos.Count; i++) { totSize += Size(infos.Info(i)); } long mergeSize = 0; for (int i = 0; i < infosToMerge.Count; i++) { mergeSize += Size(infosToMerge.Info(i)); } doCFS = mergeSize <= noCFSRatio * totSize; } return new OneMerge(infosToMerge, doCFS); }
internal ReaderCommit(SegmentInfos infos, Directory dir) { segmentsFileName = infos.GetCurrentSegmentFileName(); this.dir = dir; int size = infos.Count; files = new List<string>(size); files.Add(segmentsFileName); for (int i = 0; i < size; i++) { SegmentInfo info = infos.Info(i); if (info.dir == dir) SupportClass.CollectionsSupport.AddAll(info.Files(), files); } version = infos.GetVersion(); generation = infos.GetGeneration(); isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions(); }
internal virtual void Init(Directory directory, SegmentInfos segmentInfos, bool closeDirectory, bool readOnly) { this.directory = directory; this.segmentInfos = segmentInfos; this.closeDirectory = closeDirectory; this.readOnly = readOnly; if (!readOnly && segmentInfos != null) { // we assume that this segments_N was properly sync'd prior for (int i = 0; i < segmentInfos.Count; i++) { SegmentInfo info = segmentInfos.Info(i); IList<string> files = info.Files(); for (int j = 0; j < files.Count; j++) synced[files[j]] = files[j]; } } }
private void SetRollbackSegmentInfos(SegmentInfos infos) { lock (this) { rollbackSegmentInfos = (SegmentInfos) infos.Clone(); System.Diagnostics.Debug.Assert(!rollbackSegmentInfos.HasExternalSegments(directory)); rollbackSegments = new System.Collections.Hashtable(); int size = rollbackSegmentInfos.Count; for (int i = 0; i < size; i++) rollbackSegments[rollbackSegmentInfos.Info(i)] = (System.Int32) i; } }
private System.String SegString(SegmentInfos infos) { lock (this) { System.Text.StringBuilder buffer = new System.Text.StringBuilder(); int count = infos.Count; for (int i = 0; i < count; i++) { if (i > 0) { buffer.Append(' '); } SegmentInfo info = infos.Info(i); buffer.Append(info.SegString(directory)); if (info.dir != directory) buffer.Append("**"); } return buffer.ToString(); } }
/// <summary>Forcefully clear changes for the specifed segments, /// and remove from the pool. This is called on succesful merge. /// </summary> internal virtual void Clear(SegmentInfos infos) { lock (this) { if (infos == null) { System.Collections.IEnumerator iter = new System.Collections.Hashtable(readerMap).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry ent = (System.Collections.DictionaryEntry) iter.Current; ((SegmentReader) ent.Value).hasChanges = false; } } else { int numSegments = infos.Count; for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); if (readerMap.Contains(info)) { ((SegmentReader) readerMap[info]).hasChanges = false; } } } } }
// Used by near real-time search internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) { this.internalDirectory = writer.Directory; this.readOnly = true; segmentInfos = infos; segmentInfosStart = (SegmentInfos) infos.Clone(); this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(infos.Files(internalDirectory, true)); } // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Count; var readers = new SegmentReader[numSegments]; Directory dir = writer.Directory; int upto = 0; for (int i = 0; i < numSegments; i++) { bool success = false; try { SegmentInfo info = infos.Info(i); if (info.dir == dir) { readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor); } success = true; } finally { if (!success) { // Close all readers we had opened: for (upto--; upto >= 0; upto--) { try { readers[upto].Close(); } catch (System.Exception) { // keep going - we want to clean up as much as possible } } } } } this.writer = writer; if (upto < readers.Length) { // This means some segments were in a foreign Directory var newReaders = new SegmentReader[upto]; Array.Copy(readers, 0, newReaders, 0, upto); readers = newReaders; } Initialize(readers); }
/// <summary>Construct reading the named set of readers. </summary> internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) { internalDirectory = directory; this.readOnly = readOnly; this.segmentInfos = sis; this.deletionPolicy = deletionPolicy; this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: synced.UnionWith(sis.Files(directory, true)); } // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. var readers = new SegmentReader[sis.Count]; for (int i = sis.Count - 1; i >= 0; i--) { bool success = false; try { readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor); success = true; } finally { if (!success) { // Close all readers we had opened: for (i++; i < sis.Count; i++) { try { readers[i].Close(); } catch (System.Exception) { // keep going - we want to clean up as much as possible } } } } } Initialize(readers); }
/// <summary>Returns a <see cref="Status" /> instance detailing /// the state of the index. /// /// </summary> /// <param name="onlySegments">list of specific segment names to check /// /// <p/>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p/><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. /// </param> public virtual Status CheckIndex_Renamed_Method(List<string> onlySegments) { System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.Read(dir); } catch (System.Exception t) { Msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) infoStream.WriteLine(t.StackTrace); return result; } int numSegments = sis.Count; var segmentsFileName = sis.GetCurrentSegmentFileName(); IndexInput input = null; try { input = dir.OpenInput(segmentsFileName); } catch (System.Exception t) { Msg("ERROR: could not open segments file in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.cantOpenSegments = true; return result; } int format = 0; try { format = input.ReadInt(); } catch (System.Exception t) { Msg("ERROR: could not read segment file version in directory"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); result.missingSegmentVersion = true; return result; } finally { if (input != null) input.Close(); } System.String sFormat = ""; bool skip = false; if (format == SegmentInfos.FORMAT) sFormat = "FORMAT [Lucene Pre-2.1]"; if (format == SegmentInfos.FORMAT_LOCKLESS) sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; else { if (format == SegmentInfos.FORMAT_CHECKSUM) sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_DEL_COUNT) sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_HAS_PROX) sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_USER_DATA) sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; result.userData = sis.UserData; System.String userDataString; if (sis.UserData.Count > 0) { userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData); } else { userDataString = ""; } Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; if (infoStream != null) infoStream.Write("\nChecking only these segments:"); foreach(string s in onlySegments) { if (infoStream != null) { infoStream.Write(" " + s); } } result.segmentsChecked.AddRange(onlySegments); Msg(":"); } if (skip) { Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return result; } result.newSegments = (SegmentInfos) sis.Clone(); result.newSegments.Clear(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = sis.Info(i); if (onlySegments != null && !onlySegments.Contains(info.name)) continue; var segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.Add(segInfoStat); Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount; int toLoseDocCount = info.docCount; SegmentReader reader = null; try { Msg(" compound=" + info.GetUseCompoundFile()); segInfoStat.compound = info.GetUseCompoundFile(); Msg(" hasProx=" + info.HasProx); segInfoStat.hasProx = info.HasProx; Msg(" numFiles=" + info.Files().Count); segInfoStat.numFiles = info.Files().Count; Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); IDictionary<string, string> diagnostics = info.Diagnostics; segInfoStat.diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics)); } int docStoreOffset = info.DocStoreOffset; if (docStoreOffset != - 1) { Msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; Msg(" docStoreSegment=" + info.DocStoreSegment); segInfoStat.docStoreSegment = info.DocStoreSegment; Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile); segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile; } System.String delFileName = info.GetDelFileName(); if (delFileName == null) { Msg(" no deletions"); segInfoStat.hasDeletions = false; } else { Msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) infoStream.Write(" test: open reader........."); reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); segInfoStat.openReaderPassed = true; int numDocs = reader.NumDocs(); toLoseDocCount = numDocs; if (reader.HasDeletions) { if (reader.deletedDocs.Count() != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (reader.deletedDocs.Count() > reader.MaxDoc) { throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); } if (info.docCount - numDocs != info.GetDelCount()) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.GetDelCount() != 0) { throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); } Msg("OK"); } if (reader.MaxDoc != info.docCount) throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount); // Test getFieldNames() if (infoStream != null) { infoStream.Write(" test: fields.............."); } ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); Msg("OK [" + fieldNames.Count + " fields]"); segInfoStat.numFields = fieldNames.Count; // Test Field Norms segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); // Test the Term Index segInfoStat.termIndexStatus = TestTermIndex(info, reader); // Test Stored Fields segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); // Test Term Vectors segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); // Rethrow the first exception we encountered // This will cause stats for failed segments to be incremented properly if (segInfoStat.fieldNormStatus.error != null) { throw new SystemException("Field Norm test failed"); } else if (segInfoStat.termIndexStatus.error != null) { throw new SystemException("Term Index test failed"); } else if (segInfoStat.storedFieldStatus.error != null) { throw new SystemException("Stored Field test failed"); } else if (segInfoStat.termVectorStatus.error != null) { throw new System.SystemException("Term Vector test failed"); } Msg(""); } catch (System.Exception t) { Msg("FAILED"); const string comment = "fixIndex() would remove reference to this segment"; Msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) infoStream.WriteLine(t.StackTrace); Msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) reader.Close(); } // Keeper result.newSegments.Add((SegmentInfo)info.Clone()); } if (0 == result.numBadSegments) { result.clean = true; Msg("No problems were detected with this index.\n"); } else Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); return result; }
/// <summary>Returns the merges necessary to optimize the index. /// This merge policy defines "optimized" to mean only one /// segment in the index, where that segment has no /// deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile /// setting is true. This method returns multiple merges /// (mergeFactor at a time) so the {@link MergeScheduler} /// in use may make use of concurrency. /// </summary> public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize) { MergeSpecification spec; System.Diagnostics.Debug.Assert(maxNumSegments > 0); if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.Count; while (last > 0) { SegmentInfo info = infos.Info(--last); if (segmentsToOptimize.Contains(info)) { last++; break; } } if (last > 0) { spec = new MergeSpecification(); // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last))); last -= mergeFactor; } // Only if there are no full merges pending do we // add a final partial (< mergeFactor segments) merge: if (0 == spec.merges.Count) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !IsOptimized(infos.Info(0))) spec.Add(MakeOneMerge(infos, infos.Range(0, last))); } else if (last > maxNumSegments) { // Take care to pick a partial merge that is // least cost, but does not make the index too // lopsided. If we always just picked the // partial tail then we could produce a highly // lopsided index over time: // We must merge this many segments to leave // maxNumSegments in the index (from when // optimize was first kicked off): int finalMergeSize = last - maxNumSegments + 1; // Consider all possible starting points: long bestSize = 0; int bestStart = 0; for (int i = 0; i < last - finalMergeSize + 1; i++) { long sumSize = 0; for (int j = 0; j < finalMergeSize; j++) sumSize += Size(infos.Info(j + i)); if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize))); } } } else spec = null; } else spec = null; return spec; }
private bool IsOptimized(SegmentInfos infos, int maxNumSegments, System.Collections.Hashtable segmentsToOptimize) { int numSegments = infos.Count; int numToOptimize = 0; SegmentInfo optimizeInfo = null; for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) { SegmentInfo info = infos.Info(i); if (segmentsToOptimize.Contains(info)) { numToOptimize++; optimizeInfo = info; } } return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo)); }
/// <summary> Finds merges necessary to expunge all deletes from the /// index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. /// </summary> public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos) { int numSegments = segmentInfos.Count; if (Verbose()) Message("findMergesToExpungeDeletes: " + numSegments + " segments"); MergeSpecification spec = new MergeSpecification(); int firstSegmentWithDeletions = - 1; for (int i = 0; i < numSegments; i++) { SegmentInfo info = segmentInfos.Info(i); int delCount = writer.NumDeletedDocs(info); if (delCount > 0) { if (Verbose()) Message(" segment " + info.name + " has deletions"); if (firstSegmentWithDeletions == - 1) firstSegmentWithDeletions = i; else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with // deletions, so force a merge now: if (Verbose()) Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != - 1) { // End of a sequence of segments with deletions, so, // merge those past segments even if it's fewer than // mergeFactor segments if (Verbose()) Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = - 1; } } if (firstSegmentWithDeletions != - 1) { if (Verbose()) Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments))); } return spec; }
public virtual void AddIndexes(Directory[] dirs) { EnsureOpen(); NoDupDirs(dirs); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); try { if (infoStream != null) Message("flush at addIndexes"); Flush(true, false, true); bool success = false; StartTransaction(false); try { int docCount = 0; lock (this) { EnsureOpen(); for (int i = 0; i < dirs.Length; i++) { SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.Read(dirs[i]); for (int j = 0; j < sis.Count; j++) { SegmentInfo info = sis.Info(j); docCount += info.docCount; System.Diagnostics.Debug.Assert(!segmentInfos.Contains(info)); segmentInfos.Add(info); // add each info } } } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); Optimize(); success = true; } finally { if (success) { CommitTransaction(); } else { RollbackTransaction(); } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(Directory[])"); } finally { if (docWriter != null) { docWriter.ResumeAllThreads(); } } }
internal bool ApplyDeletes(SegmentInfos infos) { lock (this) { if (!HasDeletes()) return false; if (infoStream != null) Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + +infos.Count + " segments."); int infosEnd = infos.Count; int docStart = 0; bool any = false; for (int i = 0; i < infosEnd; i++) { IndexReader reader = SegmentReader.Get(infos.Info(i), false); bool success = false; try { any |= ApplyDeletes(reader, docStart); docStart += reader.MaxDoc(); success = true; } finally { if (reader != null) { try { if (success) reader.DoCommit(); } finally { reader.DoClose(); } } } } deletesFlushed.Clear(); return any; } }
/// <summary>Checks if any merges are now necessary and returns a /// {@link MergePolicy.MergeSpecification} if so. A merge /// is necessary when there are more than {@link /// #setMergeFactor} segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the {@link /// MergeScheduler} to use concurrency. /// </summary> public override MergeSpecification FindMerges(SegmentInfos infos) { int numSegments = infos.Count; if (Verbose()) Message("findMerges: " + numSegments + " segments"); // Compute levels, which is just log (base mergeFactor) // of the size of each segment float[] levels = new float[numSegments]; float norm = (float) System.Math.Log(mergeFactor); for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); long size = Size(info); // Floor tiny segments if (size < 1) size = 1; levels[i] = (float) System.Math.Log(size) / norm; } float levelFloor; if (minMergeSize <= 0) levelFloor = (float) 0.0; else { levelFloor = (float) (System.Math.Log(minMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int start = 0; while (start < numSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start]; for (int i = 1 + start; i < numSegments; i++) { float level = levels[i]; if (level > maxLevel) maxLevel = level; } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel < levelFloor) // All remaining segments fall into the min level levelBottom = - 1.0F; else { levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) levelBottom = levelFloor; } int upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } upto--; } if (Verbose()) Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int i = start; i < end; i++) { SegmentInfo info = infos.Info(i); anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs); } if (!anyTooLarge) { if (spec == null) spec = new MergeSpecification(); if (Verbose()) Message(" " + start + " to " + end + ": add this merge"); spec.Add(MakeOneMerge(infos, infos.Range(start, end))); } else if (Verbose()) Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); start = end; end = start + mergeFactor; } start = 1 + upto; } return spec; }