protected internal override object DoBody(string segmentFileName) { var sis = new SegmentInfos(); sis.Read(directory, segmentFileName); var readers = new SegmentReader[sis.Size()]; for (int i = sis.Size() - 1; i >= 0; i--) { System.IO.IOException prior = null; bool success = false; try { readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ); success = true; } catch (System.IO.IOException ex) { prior = ex; } finally { if (!success) { IOUtils.CloseWhileHandlingException(prior, readers); } } } return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false); }
protected internal override object DoBody(string segmentFileName) { var sis = new SegmentInfos(); sis.Read(directory, segmentFileName); var readers = new SegmentReader[sis.Size()]; for (int i = sis.Size() - 1; i >= 0; i--) { System.IO.IOException prior = null; bool success = false; try { readers[i] = new SegmentReader(sis.Info(i), termInfosIndexDivisor, IOContext.READ); success = true; } catch (System.IO.IOException ex) { prior = ex; } finally { if (!success) { IOUtils.CloseWhileHandlingException(prior, readers); } } } return(new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false)); }
public virtual void ListSegments() { for (int x = 0; x < infos.Size(); x++) { SegmentCommitInfo info = infos.Info(x); string sizeStr = string.Format(CultureInfo.InvariantCulture, "{0:###,###.###}", info.SizeInBytes()); Console.WriteLine(info.Info.Name + " " + sizeStr); } }
public virtual void TestAllSegmentsLarge() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 2; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); }
public virtual void TestMergeFactor() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 5); AddDocs(writer, 3); AddDocs(writer, 3); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; lmp.MergeFactor = 2; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 4 segments in the index, because of the merge factor and // max merge docs settings. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(4, sis.Size()); }
public virtual void TestNumDocsLimit() { // tests that the max merge docs constraint is applied during forceMerge. Directory dir = new RAMDirectory(); // Prepare an index w/ several small segments and a large one. IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 5); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 3 segments in the index, because one of them exceeds the size limit SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); }
private int GetNumberOfSegments(Directory dir) { SegmentInfos infos = new SegmentInfos(); infos.Read(dir); return(infos.Size()); }
public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos segmentInfos) { MergeSpecification mergeSpec = null; //System.out.println("MRMP: findMerges sis=" + segmentInfos); int numSegments = segmentInfos.Size(); IList <SegmentCommitInfo> segments = new List <SegmentCommitInfo>(); ICollection <SegmentCommitInfo> merging = Writer.Get().MergingSegments; foreach (SegmentCommitInfo sipc in segmentInfos.Segments) { if (!merging.Contains(sipc)) { segments.Add(sipc); } } numSegments = segments.Count; if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3)) { segments = CollectionsHelper.Shuffle(segments); // TODO: sometimes make more than 1 merge? mergeSpec = new MergeSpecification(); int segsToMerge = TestUtil.NextInt(Random, 1, numSegments); mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge))); } return(mergeSpec); }
public virtual void TestSingleNonMergeableSegment() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3, true); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); }
public virtual void TestOneLargeOneSmall() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 5); AddDocs(writer, 3); AddDocs(writer, 5); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(4, sis.Size()); }
public virtual void TestSingleMergeableTooLargeSegment() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 5, true); // delete the last document writer.DeleteDocuments(new Term("id", "4")); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 2; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); Assert.IsTrue(sis.Info(0).HasDeletions()); }
internal ReaderCommit(SegmentInfos infos, Directory dir) { SegmentsFileName_Renamed = infos.SegmentsFileName; this.Dir = dir; UserData_Renamed = infos.UserData; Files = infos.Files(dir, true); Generation_Renamed = infos.Generation; SegmentCount_Renamed = infos.Size(); }
public CommitPoint(ICollection <CommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos) { this.Directory_Renamed = directory; this.CommitsToDelete = commitsToDelete; UserData_Renamed = segmentInfos.UserData; SegmentsFileName_Renamed = segmentInfos.SegmentsFileName; Generation_Renamed = segmentInfos.Generation; Files = segmentInfos.Files(directory, true); SegmentCount_Renamed = segmentInfos.Size(); }
public virtual void TestMaxNumSegments2([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { Directory dir = NewDirectory(); Document doc = new Document(); doc.Add(NewStringField("content", "aaa", Field.Store.NO)); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MinMergeDocs = 1; ldmp.MergeFactor = 4; var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(2) .SetMergePolicy(ldmp) .SetMergeScheduler(scheduler); IndexWriter writer = new IndexWriter(dir, config); for (int iter = 0; iter < 10; iter++) { for (int i = 0; i < 19; i++) { writer.AddDocument(doc); } writer.Commit(); writer.WaitForMerges(); writer.Commit(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Size(); writer.ForceMerge(7); writer.Commit(); writer.WaitForMerges(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Size(); if (segCount < 7) { Assert.AreEqual(segCount, optSegCount); } else { Assert.AreEqual(7, optSegCount, "seg: " + segCount); } } writer.Dispose(); dir.Dispose(); }
public virtual void TestAddIndexes() { Directory dir1 = NewDirectory(); Directory dir2 = NewDirectory(); IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); d1.Add(new TextField("f1", "first field", Field.Store.YES)); d1.Add(new TextField("f2", "second field", Field.Store.YES)); writer.AddDocument(d1); writer.Dispose(); writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d2 = new Document(); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; d2.Add(new TextField("f2", "second field", Field.Store.YES)); d2.Add(new Field("f1", "first field", customType2)); d2.Add(new TextField("f3", "third field", Field.Store.YES)); d2.Add(new TextField("f4", "fourth field", Field.Store.YES)); writer.AddDocument(d2); writer.Dispose(); writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); writer.AddIndexes(dir2); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir1); Assert.AreEqual(2, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); // make sure the ordering of the "external" segment is preserved Assert.AreEqual("f2", fis2.FieldInfo(0).Name); Assert.AreEqual("f1", fis2.FieldInfo(1).Name); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f4", fis2.FieldInfo(3).Name); dir1.Dispose(); dir2.Dispose(); }
private int CheckAllSegmentsUpgraded(Directory dir) { SegmentInfos infos = new SegmentInfos(); infos.Read(dir); if (VERBOSE) { Console.WriteLine("checkAllSegmentsUpgraded: " + infos); } foreach (SegmentCommitInfo si in infos.Segments) { Assert.AreEqual(Constants.LUCENE_MAIN_VERSION, si.Info.Version); } return(infos.Size()); }
public virtual void TestPartialMerge() { Directory dir = NewDirectory(); Document doc = new Document(); doc.Add(NewStringField("content", "aaa", Field.Store.NO)); int incrMin = TEST_NIGHTLY ? 15 : 40; for (int numDocs = 10; numDocs < 500; numDocs += TestUtil.NextInt(Random(), incrMin, 5 * incrMin)) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MinMergeDocs = 1; ldmp.MergeFactor = 5; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(ldmp)); for (int j = 0; j < numDocs; j++) { writer.AddDocument(doc); } writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Size(); ldmp = new LogDocMergePolicy(); ldmp.MergeFactor = 5; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(ldmp)); writer.ForceMerge(3); writer.Dispose(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Size(); if (segCount < 3) { Assert.AreEqual(segCount, optSegCount); } else { Assert.AreEqual(3, optSegCount); } } dir.Dispose(); }
public virtual void TestBackgroundForceMerge() { Directory dir = NewDirectory(); for (int pass = 0; pass < 2; pass++) { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(51))); Document doc = new Document(); doc.Add(NewStringField("field", "aaa", Field.Store.NO)); for (int i = 0; i < 100; i++) { writer.AddDocument(doc); } writer.ForceMerge(1, false); if (0 == pass) { writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); Assert.AreEqual(1, reader.Leaves.Count); reader.Dispose(); } else { // Get another segment to flush so we can verify it is // NOT included in the merging writer.AddDocument(doc); writer.AddDocument(doc); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); Assert.IsTrue(reader.Leaves.Count > 1); reader.Dispose(); SegmentInfos infos = new SegmentInfos(); infos.Read(dir); Assert.AreEqual(2, infos.Size()); } } dir.Dispose(); }
/// <summary> /// Returns true if the number of segments eligible for /// merging is less than or equal to the specified {@code /// maxNumSegments}. /// </summary> protected internal virtual bool IsMerged(SegmentInfos infos, int maxNumSegments, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge) { int numSegments = infos.Size(); int numToMerge = 0; SegmentCommitInfo mergeInfo = null; bool segmentIsOriginal = false; for (int i = 0; i < numSegments && numToMerge <= maxNumSegments; i++) { SegmentCommitInfo info = infos.Info(i); bool?isOriginal; segmentsToMerge.TryGetValue(info, out isOriginal); if (isOriginal != null) { segmentIsOriginal = isOriginal.Value; numToMerge++; mergeInfo = info; } } return(numToMerge <= maxNumSegments && (numToMerge != 1 || !segmentIsOriginal || IsMerged(infos, mergeInfo))); }
public virtual void TestByteSizeLimit() { // tests that the max merge size constraint is applied during forceMerge. Directory dir = new RAMDirectory(); // Prepare an index w/ several small segments and a large one. IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); const int numSegments = 15; for (int i = 0; i < numSegments; i++) { int numDocs = i == 7 ? 30 : 1; AddDocs(writer, numDocs); } writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); double min = sis.Info(0).SizeInBytes(); conf = NewWriterConfig(); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20); conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 3 segments in the index, because one of them exceeds the size limit sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); }
public virtual void TestSameFieldNumbersAcrossSegments() { for (int i = 0; i < 2; i++) { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); d1.Add(new StringField("f1", "first field", Field.Store.YES)); d1.Add(new StringField("f2", "second field", Field.Store.YES)); writer.AddDocument(d1); if (i == 1) { writer.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); } else { writer.Commit(); } Document d2 = new Document(); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.StoreTermVectors = true; d2.Add(new TextField("f2", "second field", Field.Store.NO)); d2.Add(new Field("f1", "first field", customType2)); d2.Add(new TextField("f3", "third field", Field.Store.NO)); d2.Add(new TextField("f4", "fourth field", Field.Store.NO)); writer.AddDocument(d2); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(2, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.AreEqual("f2", fis2.FieldInfo(1).Name); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f4", fis2.FieldInfo(3).Name); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); writer.ForceMerge(1); writer.Dispose(); sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(0)); Assert.AreEqual("f1", fis3.FieldInfo(0).Name); Assert.AreEqual("f2", fis3.FieldInfo(1).Name); Assert.AreEqual("f3", fis3.FieldInfo(2).Name); Assert.AreEqual("f4", fis3.FieldInfo(3).Name); dir.Dispose(); } }
/// <summary> /// this constructor is only used for <seealso cref="#doOpenIfChanged(SegmentInfos)"/> </summary> private static DirectoryReader Open(Directory directory, SegmentInfos infos, IList<AtomicReader> oldReaders, int termInfosIndexDivisor) { // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name IDictionary<string, int?> segmentReaders = new Dictionary<string, int?>(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0, c = oldReaders.Count; i < c; i++) { SegmentReader sr = (SegmentReader)oldReaders[i]; segmentReaders[sr.SegmentName] = Convert.ToInt32(i); } } SegmentReader[] newReaders = new SegmentReader[infos.Size()]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers bool[] readerShared = new bool[infos.Size()]; for (int i = infos.Size() - 1; i >= 0; i--) { // find SegmentReader for this segment int? oldReaderIndex; segmentReaders.TryGetValue(infos.Info(i).Info.Name, out oldReaderIndex); if (oldReaderIndex == null) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = (SegmentReader)oldReaders[(int)oldReaderIndex]; } bool success = false; Exception prior = null; try { SegmentReader newReader; if (newReaders[i] == null || infos.Info(i).Info.UseCompoundFile != newReaders[i].SegmentInfo.Info.UseCompoundFile) { // this is a new reader; in case we hit an exception we can close it safely newReader = new SegmentReader(infos.Info(i), termInfosIndexDivisor, IOContext.READ); readerShared[i] = false; newReaders[i] = newReader; } else { if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen && newReaders[i].SegmentInfo.FieldInfosGen == infos.Info(i).FieldInfosGen) { // No change; this reader will be shared between // the old and the new one, so we must incRef // it: readerShared[i] = true; newReaders[i].IncRef(); } else { // there are changes to the reader, either liveDocs or DV updates readerShared[i] = false; // Steal the ref returned by SegmentReader ctor: Debug.Assert(infos.Info(i).Info.Dir == newReaders[i].SegmentInfo.Info.Dir); Debug.Assert(infos.Info(i).HasDeletions() || infos.Info(i).HasFieldUpdates()); if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen) { // only DV updates newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i], newReaders[i].LiveDocs, newReaders[i].NumDocs); } else { // both DV and liveDocs have changed newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i]); } } } success = true; } catch (Exception ex) { prior = ex; } finally { if (!success) { for (i++; i < infos.Size(); i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].Dispose(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].DecRef(); } } catch (Exception t) { if (prior == null) { prior = t; } } } } } // throw the first exception IOUtils.ReThrow(prior); } } return new StandardDirectoryReader(directory, newReaders, null, infos, termInfosIndexDivisor, false); }
public virtual void TestMaxNumSegments2() { Directory dir = NewDirectory(); Document doc = new Document(); doc.Add(NewStringField("content", "aaa", Field.Store.NO)); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.MinMergeDocs = 1; ldmp.MergeFactor = 4; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetMergePolicy(ldmp).SetMergeScheduler(new ConcurrentMergeScheduler())); for (int iter = 0; iter < 10; iter++) { for (int i = 0; i < 19; i++) { writer.AddDocument(doc); } writer.Commit(); writer.WaitForMerges(); writer.Commit(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Size(); writer.ForceMerge(7); writer.Commit(); writer.WaitForMerges(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Size(); if (segCount < 7) { Assert.AreEqual(segCount, optSegCount); } else { Assert.AreEqual(7, optSegCount, "seg: " + segCount); } } writer.Dispose(); dir.Dispose(); }
public virtual void TestFieldNumberGaps() { int numIters = AtLeast(13); for (int i = 0; i < numIters; i++) { Directory dir = NewDirectory(); { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d1 first field", Field.Store.YES)); d.Add(new TextField("f2", "d1 second field", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d2 first field", Field.Store.YES)); d.Add(new StoredField("f3", new byte[] { 1, 2, 3 })); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(2, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.IsNull(fis2.FieldInfo(1)); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d3 first field", Field.Store.YES)); d.Add(new TextField("f2", "d3 second field", Field.Store.YES)); d.Add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 })); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(2)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.IsNull(fis2.FieldInfo(1)); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f1", fis3.FieldInfo(0).Name); Assert.AreEqual("f2", fis3.FieldInfo(1).Name); Assert.AreEqual("f3", fis3.FieldInfo(2).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); writer.DeleteDocuments(new Term("f1", "d1")); // nuke the first segment entirely so that the segment with gaps is // loaded first! writer.ForceMergeDeletes(); writer.Dispose(); } IndexWriter writer_ = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy()).SetInfoStream(new FailOnNonBulkMergesInfoStream())); writer_.ForceMerge(1); writer_.Dispose(); SegmentInfos sis_ = new SegmentInfos(); sis_.Read(dir); Assert.AreEqual(1, sis_.Size()); FieldInfos fis1_ = SegmentReader.ReadFieldInfos(sis_.Info(0)); Assert.AreEqual("f1", fis1_.FieldInfo(0).Name); Assert.AreEqual("f2", fis1_.FieldInfo(1).Name); Assert.AreEqual("f3", fis1_.FieldInfo(2).Name); dir.Dispose(); } }
/// <summary> /// Used by near real-time search </summary> internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes) { // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Size(); IList <SegmentReader> readers = new List <SegmentReader>(); Directory dir = writer.Directory; SegmentInfos segmentInfos = (SegmentInfos)infos.Clone(); int infosUpto = 0; bool success = false; try { for (int i = 0; i < numSegments; i++) { // NOTE: important that we use infos not // segmentInfos here, so that we are passing the // actual instance of SegmentInfoPerCommit in // IndexWriter's segmentInfos: SegmentCommitInfo info = infos.Info(i); Debug.Assert(info.Info.Dir == dir); ReadersAndUpdates rld = writer.readerPool.Get(info, true); try { SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ); if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments) { // Steal the ref: readers.Add(reader); infosUpto++; } else { reader.DecRef(); segmentInfos.Remove(infosUpto); } } finally { writer.readerPool.Release(rld); } } writer.IncRefDeleter(segmentInfos); StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes); success = true; return(result); } finally { if (!success) { foreach (SegmentReader r in readers) { try { r.DecRef(); } catch (Exception th) { // ignore any exception that is thrown here to not mask any original // exception. } } } } }
/// <summary> /// Returns the merges necessary to merge the index down /// to a specified number of segments. /// this respects the <seealso cref="#maxMergeSizeForForcedMerge"/> setting. /// By default, and assuming {@code maxNumSegments=1}, only /// one segment will be left in the index, where that segment /// has no deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile /// setting is true. this method returns multiple merges /// (mergeFactor at a time) so the <seealso cref="MergeScheduler"/> /// in use may make use of concurrency. /// </summary> public override MergeSpecification FindForcedMerges(SegmentInfos infos, int maxNumSegments, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge) { Debug.Assert(maxNumSegments > 0); if (Verbose()) { Message("findForcedMerges: maxNumSegs=" + maxNumSegments + " segsToMerge=" + segmentsToMerge); } // If the segments are already merged (e.g. there's only 1 segment), or // there are <maxNumSegments:. if (IsMerged(infos, maxNumSegments, segmentsToMerge)) { if (Verbose()) { Message("already merged; skip"); } return(null); } // Find the newest (rightmost) segment that needs to // be merged (other segments may have been flushed // since merging started): int last = infos.Size(); while (last > 0) { SegmentCommitInfo info = infos.Info(--last); if (segmentsToMerge.ContainsKey(info)) { last++; break; } } if (last == 0) { if (Verbose()) { Message("last == 0; skip"); } return(null); } // There is only one segment already, and it is merged if (maxNumSegments == 1 && last == 1 && IsMerged(infos, infos.Info(0))) { if (Verbose()) { Message("already 1 seg; skip"); } return(null); } // Check if there are any segments above the threshold bool anyTooLarge = false; for (int i = 0; i < last; i++) { SegmentCommitInfo info = infos.Info(i); if (Size(info) > MaxMergeSizeForForcedMerge || SizeDocs(info) > MaxMergeDocs_Renamed) { anyTooLarge = true; break; } } if (anyTooLarge) { return(FindForcedMergesSizeLimit(infos, maxNumSegments, last)); } else { return(FindForcedMergesMaxNumSegments(infos, maxNumSegments, last)); } }
public override MergeSpecification FindMerges(MergeTrigger? mergeTrigger, SegmentInfos segmentInfos) { MergeSpecification mergeSpec = null; //System.out.println("MRMP: findMerges sis=" + segmentInfos); int numSegments = segmentInfos.Size(); IList<SegmentCommitInfo> segments = new List<SegmentCommitInfo>(); ICollection<SegmentCommitInfo> merging = Writer.Get().MergingSegments; foreach (SegmentCommitInfo sipc in segmentInfos.Segments) { if (!merging.Contains(sipc)) { segments.Add(sipc); } } numSegments = segments.Count; if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3)) { segments = CollectionsHelper.Shuffle(segments); // TODO: sometimes make more than 1 merge? mergeSpec = new MergeSpecification(); int segsToMerge = TestUtil.NextInt(Random, 1, numSegments); mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge))); } return mergeSpec; }
public CommitPoint(ICollection<CommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos) { this.Directory_Renamed = directory; this.CommitsToDelete = commitsToDelete; UserData_Renamed = segmentInfos.UserData; SegmentsFileName_Renamed = segmentInfos.SegmentsFileName; Generation_Renamed = segmentInfos.Generation; Files = segmentInfos.Files(directory, true); SegmentCount_Renamed = segmentInfos.Size(); }
private int CheckAllSegmentsUpgraded(Directory dir) { SegmentInfos infos = new SegmentInfos(); infos.Read(dir); if (VERBOSE) { Console.WriteLine("checkAllSegmentsUpgraded: " + infos); } foreach (SegmentCommitInfo si in infos.Segments) { Assert.AreEqual(Constants.LUCENE_MAIN_VERSION, si.Info.Version); } return infos.Size(); }
/// <summary> /// Checks if any merges are now necessary and returns a /// <seealso cref="MergePolicy.MergeSpecification"/> if so. A merge /// is necessary when there are more than {@link /// #setMergeFactor} segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the {@link /// MergeScheduler} to use concurrency. /// </summary> public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos infos) { int numSegments = infos.Size(); if (Verbose()) { Message("findMerges: " + numSegments + " segments"); } // Compute levels, which is just log (base mergeFactor) // of the size of each segment IList <SegmentInfoAndLevel> levels = new List <SegmentInfoAndLevel>(); var norm = (float)Math.Log(MergeFactor_Renamed); ICollection <SegmentCommitInfo> mergingSegments = Writer.Get().MergingSegments; for (int i = 0; i < numSegments; i++) { SegmentCommitInfo info = infos.Info(i); long size = Size(info); // Floor tiny segments if (size < 1) { size = 1; } SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float)Math.Log(size) / norm, i); levels.Add(infoLevel); if (Verbose()) { long segBytes = SizeBytes(info); string extra = mergingSegments.Contains(info) ? " [merging]" : ""; if (size >= MaxMergeSize) { extra += " [skip: too large]"; } Message("seg=" + Writer.Get().SegString(info) + " level=" + infoLevel.Level + " size=" + String.Format(CultureInfo.InvariantCulture, "{0:0.00} MB", segBytes / 1024 / 1024.0) + extra); } } float levelFloor; if (MinMergeSize <= 0) { levelFloor = (float)0.0; } else { levelFloor = (float)(Math.Log(MinMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int numMergeableSegments = levels.Count; int start = 0; while (start < numMergeableSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start].Level; for (int i = 1 + start; i < numMergeableSegments; i++) { float level = levels[i].Level; if (level > maxLevel) { maxLevel = level; } } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel <= levelFloor) { // All remaining segments fall into the min level levelBottom = -1.0F; } else { levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) { levelBottom = levelFloor; } } int upto = numMergeableSegments - 1; while (upto >= start) { if (levels[upto].Level >= levelBottom) { break; } upto--; } if (Verbose()) { Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); } // Finally, record all merges that are viable at this level: int end = start + MergeFactor_Renamed; while (end <= 1 + upto) { bool anyTooLarge = false; bool anyMerging = false; for (int i = start; i < end; i++) { SegmentCommitInfo info = levels[i].Info; anyTooLarge |= (Size(info) >= MaxMergeSize || SizeDocs(info) >= MaxMergeDocs_Renamed); if (mergingSegments.Contains(info)) { anyMerging = true; break; } } if (anyMerging) { // skip } else if (!anyTooLarge) { if (spec == null) { spec = new MergeSpecification(); } IList <SegmentCommitInfo> mergeInfos = new List <SegmentCommitInfo>(); for (int i = start; i < end; i++) { mergeInfos.Add(levels[i].Info); Debug.Assert(infos.Contains(levels[i].Info)); } if (Verbose()) { Message(" add merge=" + Writer.Get().SegString(mergeInfos) + " start=" + start + " end=" + end); } spec.Add(new OneMerge(mergeInfos)); } else if (Verbose()) { Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); } start = end; end = start + MergeFactor_Renamed; } start = 1 + upto; } return(spec); }
public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos infos) { if (Verbose()) { Message("findMerges: " + infos.Size() + " segments"); } if (infos.Size() == 0) { return(null); } ICollection <SegmentCommitInfo> merging = Writer.Get().MergingSegments; ICollection <SegmentCommitInfo> toBeMerged = new HashSet <SegmentCommitInfo>(); List <SegmentCommitInfo> infosSorted = new List <SegmentCommitInfo>(infos.AsList()); infosSorted.Sort(new SegmentByteSizeDescending(this)); // Compute total index bytes & print details about the index long totIndexBytes = 0; long minSegmentBytes = long.MaxValue; foreach (SegmentCommitInfo info in infosSorted) { long segBytes = Size(info); if (Verbose()) { string extra = merging.Contains(info) ? " [merging]" : ""; if (segBytes >= MaxMergedSegmentBytes / 2.0) { extra += " [skip: too large]"; } else if (segBytes < FloorSegmentBytes) { extra += " [floored]"; } Message(" seg=" + Writer.Get().SegString(info) + " size=" + String.Format(CultureInfo.InvariantCulture, "{0:0.00}", segBytes / 1024 / 1024.0) + " MB" + extra); } minSegmentBytes = Math.Min(segBytes, minSegmentBytes); // Accum total byte size totIndexBytes += segBytes; } // If we have too-large segments, grace them out // of the maxSegmentCount: int tooBigCount = 0; while (tooBigCount < infosSorted.Count && Size(infosSorted[tooBigCount]) >= MaxMergedSegmentBytes / 2.0) { totIndexBytes -= Size(infosSorted[tooBigCount]); tooBigCount++; } minSegmentBytes = FloorSize(minSegmentBytes); // Compute max allowed segs in the index long levelSize = minSegmentBytes; long bytesLeft = totIndexBytes; double allowedSegCount = 0; while (true) { double segCountLevel = bytesLeft / (double)levelSize; if (segCountLevel < SegsPerTier) { allowedSegCount += Math.Ceiling(segCountLevel); break; } allowedSegCount += SegsPerTier; bytesLeft -= (long)(SegsPerTier * levelSize); levelSize *= MaxMergeAtOnce_Renamed; } int allowedSegCountInt = (int)allowedSegCount; MergeSpecification spec = null; // Cycle to possibly select more than one merge: while (true) { long mergingBytes = 0; // Gather eligible segments for merging, ie segments // not already being merged and not already picked (by // prior iteration of this loop) for merging: IList <SegmentCommitInfo> eligible = new List <SegmentCommitInfo>(); for (int idx = tooBigCount; idx < infosSorted.Count; idx++) { SegmentCommitInfo info = infosSorted[idx]; if (merging.Contains(info)) { mergingBytes += info.SizeInBytes(); } else if (!toBeMerged.Contains(info)) { eligible.Add(info); } } bool maxMergeIsRunning = mergingBytes >= MaxMergedSegmentBytes; if (Verbose()) { Message(" allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.Count + " (eligible count=" + eligible.Count + ") tooBigCount=" + tooBigCount); } if (eligible.Count == 0) { return(spec); } if (eligible.Count >= allowedSegCountInt) { // OK we are over budget -- find best merge! MergeScore bestScore = null; IList <SegmentCommitInfo> best = null; bool bestTooLarge = false; long bestMergeBytes = 0; // Consider all merge starts: for (int startIdx = 0; startIdx <= eligible.Count - MaxMergeAtOnce_Renamed; startIdx++) { long totAfterMergeBytes = 0; IList <SegmentCommitInfo> candidate = new List <SegmentCommitInfo>(); bool hitTooLarge = false; for (int idx = startIdx; idx < eligible.Count && candidate.Count < MaxMergeAtOnce_Renamed; idx++) { SegmentCommitInfo info = eligible[idx]; long segBytes = Size(info); if (totAfterMergeBytes + segBytes > MaxMergedSegmentBytes) { hitTooLarge = true; // NOTE: we continue, so that we can try // "packing" smaller segments into this merge // to see if we can get closer to the max // size; this in general is not perfect since // this is really "bin packing" and we'd have // to try different permutations. continue; } candidate.Add(info); totAfterMergeBytes += segBytes; } MergeScore score = Score(candidate, hitTooLarge, mergingBytes); if (Verbose()) { Message(" maybe=" + Writer.Get().SegString(candidate) + " score=" + score.Score + " " + score.Explanation + " tooLarge=" + hitTooLarge + " size=" + string.Format(CultureInfo.InvariantCulture, "%.3f MB", totAfterMergeBytes / 1024.0 / 1024.0)); } // If we are already running a max sized merge // (maxMergeIsRunning), don't allow another max // sized merge to kick off: if ((bestScore == null || score.Score < bestScore.Score) && (!hitTooLarge || !maxMergeIsRunning)) { best = candidate; bestScore = score; bestTooLarge = hitTooLarge; bestMergeBytes = totAfterMergeBytes; } } if (best != null) { if (spec == null) { spec = new MergeSpecification(); } OneMerge merge = new OneMerge(best); spec.Add(merge); foreach (SegmentCommitInfo info in merge.Segments) { toBeMerged.Add(info); } if (Verbose()) { Message(" add merge=" + Writer.Get().SegString(merge.Segments) + " size=" + string.Format(CultureInfo.InvariantCulture, "%.3f MB", bestMergeBytes / 1024.0 / 1024.0) + " score=" + string.Format(CultureInfo.InvariantCulture, "%.3f", bestScore.Score) + " " + bestScore.Explanation + (bestTooLarge ? " [max merge]" : "")); } } else { return(spec); } } else { return(spec); } } }
/// <summary> /// Returns a <seealso cref="Status"/> instance detailing /// the state of the index. /// </summary> /// <param name="onlySegments"> list of specific segment names to check /// /// <p>As this method checks every byte in the specified /// segments, on a large index it can take quite a long /// time to run. /// /// <p><b>WARNING</b>: make sure /// you only call this when the index is not opened by any /// writer. </param> public virtual Status DoCheckIndex(IList<string> onlySegments) { NumberFormatInfo nf = CultureInfo.CurrentCulture.NumberFormat; SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.Dir = Dir; try { sis.Read(Dir); } catch (Exception t) { Msg(infoStream, "ERROR: could not read any segments file in directory"); result.MissingSegments = true; if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } return result; } // find the oldest and newest segment versions string oldest = Convert.ToString(int.MaxValue), newest = Convert.ToString(int.MinValue); string oldSegs = null; bool foundNonNullVersion = false; IComparer<string> versionComparator = StringHelper.VersionComparator; foreach (SegmentCommitInfo si in sis.Segments) { string version = si.Info.Version; if (version == null) { // pre-3.1 segment oldSegs = "pre-3.1"; } else { foundNonNullVersion = true; if (versionComparator.Compare(version, oldest) < 0) { oldest = version; } if (versionComparator.Compare(version, newest) > 0) { newest = version; } } } int numSegments = sis.Size(); string segmentsFileName = sis.SegmentsFileName; // note: we only read the format byte (required preamble) here! IndexInput input = null; try { input = Dir.OpenInput(segmentsFileName, IOContext.READONCE); } catch (Exception t) { Msg(infoStream, "ERROR: could not open segments file in directory"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } result.CantOpenSegments = true; return result; } int format = 0; try { format = input.ReadInt(); } catch (Exception t) { Msg(infoStream, "ERROR: could not read segment file version in directory"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } result.MissingSegmentVersion = true; return result; } finally { if (input != null) { input.Dispose(); } } string sFormat = ""; bool skip = false; result.SegmentsFileName = segmentsFileName; result.NumSegments = numSegments; result.UserData = sis.UserData; string userDataString; if (sis.UserData.Count > 0) { userDataString = " userData=" + sis.UserData; } else { userDataString = ""; } string versionString = null; if (oldSegs != null) { if (foundNonNullVersion) { versionString = "versions=[" + oldSegs + " .. " + newest + "]"; } else { versionString = "version=" + oldSegs; } } else { versionString = oldest.Equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]"); } Msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " format=" + sFormat + userDataString); if (onlySegments != null) { result.Partial = true; if (infoStream != null) { infoStream.Write("\nChecking only these segments:"); foreach (string s in onlySegments) { infoStream.Write(" " + s); } } result.SegmentsChecked.AddRange(onlySegments); Msg(infoStream, ":"); } if (skip) { Msg(infoStream, "\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.ToolOutOfDate = true; return result; } result.NewSegments = (SegmentInfos)sis.Clone(); result.NewSegments.Clear(); result.MaxSegmentName = -1; for (int i = 0; i < numSegments; i++) { SegmentCommitInfo info = sis.Info(i); int segmentName = 0; try { segmentName = int.Parse /*Convert.ToInt32*/(info.Info.Name.Substring(1)); } catch { } if (segmentName > result.MaxSegmentName) { result.MaxSegmentName = segmentName; } if (onlySegments != null && !onlySegments.Contains(info.Info.Name)) { continue; } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.SegmentInfos.Add(segInfoStat); Msg(infoStream, " " + (1 + i) + " of " + numSegments + ": name=" + info.Info.Name + " docCount=" + info.Info.DocCount); segInfoStat.Name = info.Info.Name; segInfoStat.DocCount = info.Info.DocCount; string version = info.Info.Version; if (info.Info.DocCount <= 0 && version != null && versionComparator.Compare(version, "4.5") >= 0) { throw new Exception("illegal number of documents: maxDoc=" + info.Info.DocCount); } int toLoseDocCount = info.Info.DocCount; AtomicReader reader = null; try { Codec codec = info.Info.Codec; Msg(infoStream, " codec=" + codec); segInfoStat.Codec = codec; Msg(infoStream, " compound=" + info.Info.UseCompoundFile); segInfoStat.Compound = info.Info.UseCompoundFile; Msg(infoStream, " numFiles=" + info.Files().Count); segInfoStat.NumFiles = info.Files().Count; segInfoStat.SizeMB = info.SizeInBytes() / (1024.0 * 1024.0); if (info.Info.GetAttribute(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY) == null) { // don't print size in bytes if its a 3.0 segment with shared docstores Msg(infoStream, " size (MB)=" + segInfoStat.SizeMB.ToString(nf)); } IDictionary<string, string> diagnostics = info.Info.Diagnostics; segInfoStat.Diagnostics = diagnostics; if (diagnostics.Count > 0) { Msg(infoStream, " diagnostics = " + diagnostics); } if (!info.HasDeletions()) { Msg(infoStream, " no deletions"); segInfoStat.HasDeletions = false; } else { Msg(infoStream, " has deletions [delGen=" + info.DelGen + "]"); segInfoStat.HasDeletions = true; segInfoStat.DeletionsGen = info.DelGen; } if (infoStream != null) { infoStream.Write(" test: open reader........."); } reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT); Msg(infoStream, "OK"); segInfoStat.OpenReaderPassed = true; if (infoStream != null) { infoStream.Write(" test: check integrity....."); } reader.CheckIntegrity(); Msg(infoStream, "OK"); if (infoStream != null) { infoStream.Write(" test: check live docs....."); } int numDocs = reader.NumDocs; toLoseDocCount = numDocs; if (reader.HasDeletions) { if (reader.NumDocs != info.Info.DocCount - info.DelCount) { throw new Exception("delete count mismatch: info=" + (info.Info.DocCount - info.DelCount) + " vs reader=" + reader.NumDocs); } if ((info.Info.DocCount - reader.NumDocs) > reader.MaxDoc) { throw new Exception("too many deleted docs: maxDoc()=" + reader.MaxDoc + " vs del count=" + (info.Info.DocCount - reader.NumDocs)); } if (info.Info.DocCount - numDocs != info.DelCount) { throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs)); } Bits liveDocs = reader.LiveDocs; if (liveDocs == null) { throw new Exception("segment should have deletions, but liveDocs is null"); } else { int numLive = 0; for (int j = 0; j < liveDocs.Length(); j++) { if (liveDocs.Get(j)) { numLive++; } } if (numLive != numDocs) { throw new Exception("liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive); } } segInfoStat.NumDeleted = info.Info.DocCount - numDocs; Msg(infoStream, "OK [" + (segInfoStat.NumDeleted) + " deleted docs]"); } else { if (info.DelCount != 0) { throw new Exception("delete count mismatch: info=" + info.DelCount + " vs reader=" + (info.Info.DocCount - numDocs)); } Bits liveDocs = reader.LiveDocs; if (liveDocs != null) { // its ok for it to be non-null here, as long as none are set right? for (int j = 0; j < liveDocs.Length(); j++) { if (!liveDocs.Get(j)) { throw new Exception("liveDocs mismatch: info says no deletions but doc " + j + " is deleted."); } } } Msg(infoStream, "OK"); } if (reader.MaxDoc != info.Info.DocCount) { throw new Exception("SegmentReader.maxDoc() " + reader.MaxDoc + " != SegmentInfos.docCount " + info.Info.DocCount); } // Test getFieldInfos() if (infoStream != null) { infoStream.Write(" test: fields.............."); } FieldInfos fieldInfos = reader.FieldInfos; Msg(infoStream, "OK [" + fieldInfos.Size() + " fields]"); segInfoStat.NumFields = fieldInfos.Size(); // Test Field Norms segInfoStat.FieldNormStatus = TestFieldNorms(reader, infoStream); // Test the Term Index segInfoStat.TermIndexStatus = TestPostings(reader, infoStream, Verbose); // Test Stored Fields segInfoStat.StoredFieldStatus = TestStoredFields(reader, infoStream); // Test Term Vectors segInfoStat.TermVectorStatus = TestTermVectors(reader, infoStream, Verbose, CrossCheckTermVectors_Renamed); segInfoStat.DocValuesStatus = TestDocValues(reader, infoStream); // Rethrow the first exception we encountered // this will cause stats for failed segments to be incremented properly if (segInfoStat.FieldNormStatus.Error != null) { throw new Exception("Field Norm test failed"); } else if (segInfoStat.TermIndexStatus.Error != null) { throw new Exception("Term Index test failed"); } else if (segInfoStat.StoredFieldStatus.Error != null) { throw new Exception("Stored Field test failed"); } else if (segInfoStat.TermVectorStatus.Error != null) { throw new Exception("Term Vector test failed"); } else if (segInfoStat.DocValuesStatus.Error != null) { throw new Exception("DocValues test failed"); } Msg(infoStream, ""); } catch (Exception t) { Msg(infoStream, "FAILED"); string comment; comment = "fixIndex() would remove reference to this segment"; Msg(infoStream, " WARNING: " + comment + "; full exception:"); if (infoStream != null) { // LUCENENET NOTE: Some tests rely on the error type being in // the message. We can't get the error type with StackTrace, we // need ToString() for that. infoStream.WriteLine(t.ToString()); //infoStream.WriteLine(t.StackTrace); } Msg(infoStream, ""); result.TotLoseDocCount += toLoseDocCount; result.NumBadSegments++; continue; } finally { if (reader != null) { reader.Dispose(); } } // Keeper result.NewSegments.Add((SegmentCommitInfo)info.Clone()); } if (0 == result.NumBadSegments) { result.Clean = true; } else { Msg(infoStream, "WARNING: " + result.NumBadSegments + " broken segments (containing " + result.TotLoseDocCount + " documents) detected"); } if (!(result.ValidCounter = (result.MaxSegmentName < sis.Counter))) { result.Clean = false; result.NewSegments.Counter = result.MaxSegmentName + 1; Msg(infoStream, "ERROR: Next segment name counter " + sis.Counter + " is not greater than max segment name " + result.MaxSegmentName); } if (result.Clean) { Msg(infoStream, "No problems were detected with this index.\n"); } return result; }
/// <summary> /// this constructor is only used for <seealso cref="#doOpenIfChanged(SegmentInfos)"/> </summary> private static DirectoryReader Open(Directory directory, SegmentInfos infos, IList <AtomicReader> oldReaders, int termInfosIndexDivisor) { // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name IDictionary <string, int?> segmentReaders = new Dictionary <string, int?>(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0, c = oldReaders.Count; i < c; i++) { SegmentReader sr = (SegmentReader)oldReaders[i]; segmentReaders[sr.SegmentName] = Convert.ToInt32(i); } } SegmentReader[] newReaders = new SegmentReader[infos.Size()]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers bool[] readerShared = new bool[infos.Size()]; for (int i = infos.Size() - 1; i >= 0; i--) { // find SegmentReader for this segment int?oldReaderIndex; segmentReaders.TryGetValue(infos.Info(i).Info.Name, out oldReaderIndex); if (oldReaderIndex == null) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = (SegmentReader)oldReaders[(int)oldReaderIndex]; } bool success = false; Exception prior = null; try { SegmentReader newReader; if (newReaders[i] == null || infos.Info(i).Info.UseCompoundFile != newReaders[i].SegmentInfo.Info.UseCompoundFile) { // this is a new reader; in case we hit an exception we can close it safely newReader = new SegmentReader(infos.Info(i), termInfosIndexDivisor, IOContext.READ); readerShared[i] = false; newReaders[i] = newReader; } else { if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen&& newReaders[i].SegmentInfo.FieldInfosGen == infos.Info(i).FieldInfosGen) { // No change; this reader will be shared between // the old and the new one, so we must incRef // it: readerShared[i] = true; newReaders[i].IncRef(); } else { // there are changes to the reader, either liveDocs or DV updates readerShared[i] = false; // Steal the ref returned by SegmentReader ctor: Debug.Assert(infos.Info(i).Info.Dir == newReaders[i].SegmentInfo.Info.Dir); Debug.Assert(infos.Info(i).HasDeletions() || infos.Info(i).HasFieldUpdates()); if (newReaders[i].SegmentInfo.DelGen == infos.Info(i).DelGen) { // only DV updates newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i], newReaders[i].LiveDocs, newReaders[i].NumDocs); } else { // both DV and liveDocs have changed newReaders[i] = new SegmentReader(infos.Info(i), newReaders[i]); } } } success = true; } catch (Exception ex) { prior = ex; } finally { if (!success) { for (i++; i < infos.Size(); i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].Dispose(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].DecRef(); } } catch (Exception t) { if (prior == null) { prior = t; } } } } } // throw the first exception IOUtils.ReThrow(prior); } } return(new StandardDirectoryReader(directory, newReaders, null, infos, termInfosIndexDivisor, false)); }
/// <summary> /// For definition of "check point" see IndexWriter comments: /// "Clarification: Check Points (and commits)". /// /// Writer calls this when it has made a "consistent /// change" to the index, meaning new files are written to /// the index and the in-memory SegmentInfos have been /// modified to point to those files. /// /// this may or may not be a commit (segments_N may or may /// not have been written). /// /// We simply incref the files referenced by the new /// SegmentInfos and decref the files we had previously /// seen (if any). /// /// If this is a commit, we also call the policy to give it /// a chance to remove other commits. If any commits are /// removed, we decref their files as well. /// </summary> public void Checkpoint(SegmentInfos segmentInfos, bool isCommit) { Debug.Assert(Locked()); //Debug.Assert(Thread.holdsLock(Writer)); long t0 = 0; if (InfoStream.IsEnabled("IFD")) { t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; InfoStream.Message("IFD", "now checkpoint \"" + Writer.SegString(Writer.ToLiveInfos(segmentInfos).Segments) + "\" [" + segmentInfos.Size() + " segments " + "; isCommit = " + isCommit + "]"); } // Try again now to delete any previously un-deletable // files (because they were in use, on Windows): DeletePendingFiles(); // Incref the files: IncRef(segmentInfos, isCommit); if (isCommit) { // Append to our commits list: Commits.Add(new CommitPoint(CommitsToDelete, Directory, segmentInfos)); // Tell policy so it can remove commits: Policy.OnCommit(Commits); // Decref files for commits that were deleted by the policy: DeleteCommits(); } else { // DecRef old files from the last checkpoint, if any: DecRef(LastFiles); LastFiles.Clear(); // Save files so we can decr on next checkpoint/commit: LastFiles.AddRange(segmentInfos.Files(Directory, false)); } if (InfoStream.IsEnabled("IFD")) { long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; InfoStream.Message("IFD", ((t1 - t0) / 1000000) + " msec to checkpoint"); } }
/// <summary> /// Used by near real-time search </summary> internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes) { // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Size(); IList<SegmentReader> readers = new List<SegmentReader>(); Directory dir = writer.Directory; SegmentInfos segmentInfos = (SegmentInfos)infos.Clone(); int infosUpto = 0; bool success = false; try { for (int i = 0; i < numSegments; i++) { // NOTE: important that we use infos not // segmentInfos here, so that we are passing the // actual instance of SegmentInfoPerCommit in // IndexWriter's segmentInfos: SegmentCommitInfo info = infos.Info(i); Debug.Assert(info.Info.Dir == dir); ReadersAndUpdates rld = writer.readerPool.Get(info, true); try { SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ); if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments) { // Steal the ref: readers.Add(reader); infosUpto++; } else { reader.DecRef(); segmentInfos.Remove(infosUpto); } } finally { writer.readerPool.Release(rld); } } writer.IncRefDeleter(segmentInfos); StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes); success = true; return result; } finally { if (!success) { foreach (SegmentReader r in readers) { try { r.DecRef(); } catch (Exception th) { // ignore any exception that is thrown here to not mask any original // exception. } } } } }
private int GetNumberOfSegments(Directory dir) { SegmentInfos infos = new SegmentInfos(); infos.Read(dir); return infos.Size(); }