public virtual void Test() { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostings")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(new ConcurrentMergeScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.OmitNorms = true; ft.IndexOptions = IndexOptions.DOCS_ONLY; Field field = new Field("field", new MyTokenStream(), ft); doc.Add(field); int numDocs = (int.MaxValue / 26) + 1; for (int i = 0; i < numDocs; i++) { w.AddDocument(doc); if (Verbose && i % 100000 == 0) { Console.WriteLine(i + " of " + numDocs + "..."); } } w.ForceMerge(1); w.Dispose(); dir.Dispose(); }
public virtual void TestUpgradeOldSingleSegmentIndexWithAdditions() { foreach (string name in OldSingleSegmentNames) { if (VERBOSE) { Console.WriteLine("testUpgradeOldSingleSegmentIndexWithAdditions: index=" + name); } Directory dir = NewDirectory(OldIndexDirs[name]); Assert.AreEqual(1, GetNumberOfSegments(dir), "Original index must be single segment"); // create a bunch of dummy segments int id = 40; RAMDirectory ramDir = new RAMDirectory(); for (int i = 0; i < 3; i++) { // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge: MergePolicy mp = Random().NextBoolean() ? (MergePolicy)NewLogMergePolicy() : NewTieredMergePolicy(); IndexWriterConfig iwc = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(mp); IndexWriter w = new IndexWriter(ramDir, iwc); // add few more docs: for (int j = 0; j < RANDOM_MULTIPLIER * Random().Next(30); j++) { AddDoc(w, id++); } w.Dispose(false); } // add dummy segments (which are all in current // version) to single segment index MergePolicy mp_ = Random().NextBoolean() ? (MergePolicy)NewLogMergePolicy() : NewTieredMergePolicy(); IndexWriterConfig iwc_ = (new IndexWriterConfig(TEST_VERSION_CURRENT, null)).SetMergePolicy(mp_); IndexWriter iw = new IndexWriter(dir, iwc_); iw.AddIndexes(ramDir); iw.Dispose(false); // determine count of segments in modified index int origSegCount = GetNumberOfSegments(dir); NewIndexUpgrader(dir).Upgrade(); int segCount = CheckAllSegmentsUpgraded(dir); Assert.AreEqual(origSegCount, segCount, "Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged"); dir.Dispose(); } }
internal int docShift; // total # deleted docs that were compacted by this merge public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) { this.docMaps = docMaps; SegmentInfo firstSegment = merge.segments.Info(0); int i = 0; while (true) { SegmentInfo info = infos.Info(i); if (info.Equals(firstSegment)) break; minDocID += info.docCount; i++; } int numDocs = 0; for (int j = 0; j < docMaps.Length; i++, j++) { numDocs += infos.Info(i).docCount; System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j))); } maxDocID = minDocID + numDocs; starts = new int[docMaps.Length]; newStarts = new int[docMaps.Length]; starts[0] = minDocID; newStarts[0] = minDocID; for (i = 1; i < docMaps.Length; i++) { int lastDocCount = merge.segments.Info(i - 1).docCount; starts[i] = starts[i - 1] + lastDocCount; newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; } docShift = numDocs - mergedDocCount; // There are rare cases when docShift is 0. It happens // if you try to delete a docID that's out of bounds, // because the SegmentReader still allocates deletedDocs // and pretends it has deletions ... so we can't make // this assert here // assert docShift > 0; // Make sure it all adds up: System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1])); }
public virtual void TestLiveChangeToCFS() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy(true)); // Start false: iwc.SetUseCompoundFile(false); iwc.MergePolicy.NoCFSRatio = 0.0d; IndexWriter w = new IndexWriter(dir, iwc); // Change to true: w.Config.SetUseCompoundFile(true); Document doc = new Document(); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit"); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); w.ForceMerge(1); w.Commit(); // no compound files after merge Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge"); MergePolicy lmp = w.Config.MergePolicy; lmp.NoCFSRatio = 1.0; lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity; w.AddDocument(doc); w.ForceMerge(1); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge"); w.Dispose(); dir.Dispose(); }
public virtual void TestMergeStability() { Directory dir = NewDirectory(); // do not use newMergePolicy that might return a MockMergePolicy that ignores the no-CFS ratio MergePolicy mp = NewTieredMergePolicy(); mp.NoCFSRatio = 0; var cfg = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(false).SetMergePolicy(mp); using (var w = new RandomIndexWriter(Random(), dir, cfg)) { var numDocs = AtLeast(500); for (var i = 0; i < numDocs; ++i) { var d = new Document(); AddRandomFields(d); w.AddDocument(d); } w.ForceMerge(1); w.Commit(); } IndexReader reader = DirectoryReader.Open(dir); Directory dir2 = NewDirectory(); mp = NewTieredMergePolicy(); mp.NoCFSRatio = 0; cfg = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(false).SetMergePolicy(mp); using (var w = new RandomIndexWriter(Random(), dir2, cfg)) { w.AddIndexes(reader); w.Commit(); } assertEquals(BytesUsedByExtension(dir), BytesUsedByExtension(dir2)); reader.Dispose(); dir.Dispose(); dir2.Dispose(); }
public virtual void TestKeepNoneOnInitDeletionPolicy() { for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; Directory dir = NewDirectory(); IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(new KeepNoneOnInitDeletionPolicy(this)).SetMaxBufferedDocs(10); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; IndexWriter writer = new IndexWriter(dir, conf); KeepNoneOnInitDeletionPolicy policy = (KeepNoneOnInitDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int i = 0; i < 107; i++) { AddDoc(writer); } writer.Dispose(); conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy); mp = conf.MergePolicy; mp.NoCFSRatio = 1.0; writer = new IndexWriter(dir, conf); policy = (KeepNoneOnInitDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.ForceMerge(1); writer.Dispose(); Assert.AreEqual(2, policy.NumOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.NumOnCommit); // Simplistic check: just verify the index is in fact // readable: IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); dir.Dispose(); } }
public virtual void TestClone() { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriterConfig clone = (IndexWriterConfig)conf.Clone(); // Make sure parameters that can't be reused are cloned IndexDeletionPolicy delPolicy = conf.IndexDeletionPolicy; IndexDeletionPolicy delPolicyClone = clone.IndexDeletionPolicy; Assert.IsTrue(delPolicy.GetType() == delPolicyClone.GetType() && (delPolicy != delPolicyClone || delPolicy.Clone() == delPolicyClone.Clone())); FlushPolicy flushPolicy = conf.FlushPolicy; FlushPolicy flushPolicyClone = clone.FlushPolicy; Assert.IsTrue(flushPolicy.GetType() == flushPolicyClone.GetType() && (flushPolicy != flushPolicyClone || flushPolicy.Clone() == flushPolicyClone.Clone())); DocumentsWriterPerThreadPool pool = conf.IndexerThreadPool; DocumentsWriterPerThreadPool poolClone = clone.IndexerThreadPool; Assert.IsTrue(pool.GetType() == poolClone.GetType() && (pool != poolClone || pool.Clone() == poolClone.Clone())); MergePolicy mergePolicy = conf.MergePolicy; MergePolicy mergePolicyClone = clone.MergePolicy; Assert.IsTrue(mergePolicy.GetType() == mergePolicyClone.GetType() && (mergePolicy != mergePolicyClone || mergePolicy.Clone() == mergePolicyClone.Clone())); IMergeScheduler mergeSched = conf.MergeScheduler; IMergeScheduler mergeSchedClone = clone.MergeScheduler; Assert.IsTrue(mergeSched.GetType() == mergeSchedClone.GetType() && (mergeSched != mergeSchedClone || mergeSched.Clone() == mergeSchedClone.Clone())); conf.SetMergeScheduler(new SerialMergeScheduler()); #if !FEATURE_CONCURRENTMERGESCHEDULER Assert.AreEqual(typeof(TaskMergeScheduler), clone.MergeScheduler.GetType()); #else Assert.AreEqual(typeof(ConcurrentMergeScheduler), clone.MergeScheduler.GetType()); #endif }
private void AssertSetters(MergePolicy lmp) { lmp.MaxCFSSegmentSizeMB = 2.0; Assert.AreEqual(2.0, lmp.MaxCFSSegmentSizeMB, EPSILON); lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity; Assert.AreEqual(long.MaxValue / 1024 / 1024.0, lmp.MaxCFSSegmentSizeMB, EPSILON * long.MaxValue); lmp.MaxCFSSegmentSizeMB = long.MaxValue / 1024 / 1024.0; Assert.AreEqual(long.MaxValue / 1024 / 1024.0, lmp.MaxCFSSegmentSizeMB, EPSILON * long.MaxValue); try { lmp.MaxCFSSegmentSizeMB = -2.0; Assert.Fail("Didn't throw IllegalArgumentException"); } catch (System.ArgumentException iae) { // pass } // TODO: Add more checks for other non-double setters! }
private void AssertSetters(MergePolicy lmp) { lmp.MaxCFSSegmentSizeMB = 2.0; Assert.AreEqual(2.0, lmp.MaxCFSSegmentSizeMB, EPSILON); lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity; Assert.AreEqual(long.MaxValue / 1024 / 1024.0, lmp.MaxCFSSegmentSizeMB, EPSILON * long.MaxValue); lmp.MaxCFSSegmentSizeMB = long.MaxValue / 1024 / 1024.0; Assert.AreEqual(long.MaxValue / 1024 / 1024.0, lmp.MaxCFSSegmentSizeMB, EPSILON * long.MaxValue); try { lmp.MaxCFSSegmentSizeMB = -2.0; Assert.Fail("Didn't throw IllegalArgumentException"); } catch (ArgumentOutOfRangeException) // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention) { // pass } // TODO: Add more checks for other non-double setters! }
public virtual void TestMergeStability() { using Directory dir = NewDirectory(); // do not use newMergePolicy that might return a MockMergePolicy that ignores the no-CFS ratio MergePolicy mp = NewTieredMergePolicy(); mp.NoCFSRatio = 0; var cfg = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetUseCompoundFile(false).SetMergePolicy(mp); using (var w = new RandomIndexWriter(Random, dir, cfg)) { var numDocs = AtLeast(500); for (var i = 0; i < numDocs; ++i) { var d = new Document(); AddRandomFields(d); w.AddDocument(d); } w.ForceMerge(1); w.Commit(); } using IndexReader reader = DirectoryReader.Open(dir); using Directory dir2 = NewDirectory(); mp = NewTieredMergePolicy(); mp.NoCFSRatio = 0; cfg = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetUseCompoundFile(false).SetMergePolicy(mp); using (var w = new RandomIndexWriter(Random, dir2, cfg)) { w.AddIndexes(reader); w.Commit(); } // LUCENENET: We need to explicitly call Equals() and use HashMap in order to ensure our // equality check is done correctly. Calling Assert.AreEqual doesn't guarantee this is done. Assert.True(BytesUsedByExtension(dir).Equals(BytesUsedByExtension(dir2))); }
public virtual void Test2BTerms_Mem() { if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) { throw RuntimeException.Create("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } dir.CheckIndexOnDispose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(new ConcurrentMergeScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random, TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " msec"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } savedTerms = ts.savedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms is null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new JCG.List <BytesRef>(savedTerms.GetView(numSavedTerms - 10, 10)); // LUCENENET: Converted end index to length Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
private void HandleMergeException(Exception t, MergePolicy.OneMerge merge) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "handleMergeException: merge=" + SegString(merge.Segments) + " exc=" + t); } // Set the exception on the merge, so if // forceMerge is waiting on us it sees the root // cause exception: merge.Exception = t; AddMergeException(merge); if ((t as MergePolicy.MergeAbortedException) != null) { // We can ignore this exception (it happens when // close(false) or rollback is called), unless the // merge involves segments from external directories, // in which case we must throw it so, for example, the // rollbackTransaction code in addIndexes* is // executed. if (merge.IsExternal) { throw t; } } else { IOUtils.ReThrow(t); } }
internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge) { InitBlock(); directory = writer.GetDirectory(); segment = name; if (merge != null) { checkAbort = new CheckAbort(merge, directory); } else { checkAbort = new AnonymousClassCheckAbort1(this, null, null); } termIndexInterval = writer.GetTermIndexInterval(); }
internal virtual void AddMergeException(MergePolicy.OneMerge merge) { lock (this) { System.Diagnostics.Debug.Assert(merge.GetException() != null); if (!mergeExceptions.Contains(merge) && mergeGen == merge.mergeGen) mergeExceptions.Add(merge); } }
private void EnsureValidMerge(MergePolicy.OneMerge merge) { lock (this) { foreach (SegmentCommitInfo info in merge.Segments) { if (!segmentInfos.Contains(info)) { throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.Info.Name + ") that is not in the current index " + SegString(), directory); } } } }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { const int N = 10; for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; Directory dir = NewDirectory(); IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(new KeepLastNDeletionPolicy(this, N)).SetMaxBufferedDocs(10); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; IndexWriter writer = new IndexWriter(dir, conf); KeepLastNDeletionPolicy policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.Dispose(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10); mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int j = 0; j < 17; j++) { AddDocWithID(writer, i * (N + 1) + j); } // this is a commit writer.Dispose(); conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetIndexDeletionPolicy(policy).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.DeleteDocuments(new Term("id", "" + (i * (N + 1) + 3))); // this is a commit writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(16, hits.Length); reader.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy)); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; // this will not commit: there are no changes // pending because we opened for "create": writer.Dispose(); } Assert.AreEqual(3 * (N + 1) + 1, policy.NumOnInit); Assert.AreEqual(3 * (N + 1) + 1, policy.NumOnCommit); IndexReader rwReader = DirectoryReader.Open(dir); IndexSearcher searcher_ = NewSearcher(rwReader); ScoreDoc[] hits_ = searcher_.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits_.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetLastCommitGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; rwReader.Dispose(); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.Open(dir); // Work backwards in commits on what the expected // count should be. searcher_ = NewSearcher(reader); hits_ = searcher_.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(expectedCount, hits_.Length); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } reader.Dispose(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (IOException /*e*/) { if (i != N) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Dispose(); } }
/// <summary>Called whenever a merge has completed and the merged segments had deletions </summary> internal void RemapDeletes(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) { lock (this) { if (docMaps == null) // The merged segments had no deletes so docIDs did not change and we have nothing to do return ; MergeDocIDRemapper mapper = new MergeDocIDRemapper(infos, docMaps, delCounts, merge, mergeDocCount); deletesInRAM.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); deletesFlushed.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); flushedDocCount -= mapper.docShift; } }
public virtual void TestExpirationTimeDeletionPolicy() { const double SECONDS = 2.0; Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new ExpirationTimeDeletionPolicy(this, dir, SECONDS)); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = 1.0; IndexWriter writer = new IndexWriter(dir, conf); ExpirationTimeDeletionPolicy policy = (ExpirationTimeDeletionPolicy)writer.Config.IndexDeletionPolicy; IDictionary <string, string> commitData = new Dictionary <string, string>(); commitData["commitTime"] = Convert.ToString(Environment.TickCount); writer.SetCommitData(commitData); writer.Commit(); writer.Dispose(); long lastDeleteTime = 0; int targetNumDelete = TestUtil.NextInt32(Random, 1, 5); while (policy.NumDelete < targetNumDelete) { // Record last time when writer performed deletes of // past commits lastDeleteTime = Environment.TickCount; conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy); mp = conf.MergePolicy; mp.NoCFSRatio = 1.0; writer = new IndexWriter(dir, conf); policy = (ExpirationTimeDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int j = 0; j < 17; j++) { AddDoc(writer); } commitData = new Dictionary <string, string>(); commitData["commitTime"] = Convert.ToString(Environment.TickCount); writer.SetCommitData(commitData); writer.Commit(); writer.Dispose(); Thread.Sleep((int)(1000.0 * (SECONDS / 5.0))); } // Then simplistic check: just verify that the // segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.GetLastCommitGeneration(dir); string fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); bool oneSecondResolution = true; while (gen > 0) { try { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); // if we are on a filesystem that seems to have only // 1 second resolution, allow +1 second in commit // age tolerance: SegmentInfos sis = new SegmentInfos(); sis.Read(dir, fileName); long modTime = Convert.ToInt64(sis.UserData["commitTime"]); oneSecondResolution &= (modTime % 1000) == 0; long leeway = (long)((SECONDS + (oneSecondResolution ? 1.0 : 0.0)) * 1000); Assert.IsTrue(lastDeleteTime - modTime <= leeway, "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted "); } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { // OK break; } dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } dir.Dispose(); }
/// <summary>Does the actual merge, by calling {@link IndexWriter#merge} </summary> protected internal virtual void DoMerge(MergePolicy.OneMerge merge) { writer.Merge(merge); }
public MergeThread(ConcurrentMergeScheduler enclosingInstance, IndexWriter writer, MergePolicy.OneMerge startMerge) { InitBlock(enclosingInstance); this.writer = writer; this.startMerge = startMerge; }
public virtual void RunTest(string testName) { m_failed.Value = (false); m_addCount.Value = 0; m_delCount.Value = 0; m_packCount.Value = 0; long t0 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results Random random = new J2N.Randomizer(Random.NextInt64()); using LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues); DirectoryInfo tempDir = CreateTempDir(testName); m_dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (m_dir is BaseDirectoryWrapper baseDirectoryWrapper) { baseDirectoryWrapper.CheckIndexOnDispose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random); analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TestNightly) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy tieredMergePolicy) { //tieredMergePolicy.MaxMergedSegmentMB = 5000.0; tieredMergePolicy.MaxMergedSegmentMB = 2500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } else if (mp is LogByteSizeMergePolicy logByteSizeMergePolicy) { //logByteSizeMergePolicy.MaxMergeMB = 1000.0; logByteSizeMergePolicy.MaxMergeMB = 500.0; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } else if (mp is LogMergePolicy logMergePolicy) { //logMergePolicy.MaxMergeDocs = 100000; logMergePolicy.MaxMergeDocs = 50000; // LUCENENET specific - reduced each number by 50% to keep testing time under 1 hour } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousClass(this)); if (Verbose) { conf.SetInfoStream(new PrintStreamInfoStreamAnonymousClass(Console.Out)); } m_writer = new IndexWriter(m_dir, conf); TestUtil.ReduceOpenFiles(m_writer); TaskScheduler es = LuceneTestCase.Random.NextBoolean() ? null : TaskScheduler.Default; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt32(LuceneTestCase.Random, 2, 4); //int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 300 : RandomMultiplier; // LUCENENET specific - lowered from 300 to 150 to reduce total time on Nightly // build to less than 1 hour. int RUN_TIME_SEC = LuceneTestCase.TestNightly ? 150 : RandomMultiplier; ISet <string> delIDs = new ConcurrentHashSet <string>(); ISet <string> delPackIDs = new ConcurrentHashSet <string>(); ConcurrentQueue <SubDocs> allSubDocs = new ConcurrentQueue <SubDocs>(); long stopTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + (RUN_TIME_SEC * 1000); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results ThreadJob[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (Verbose) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (Verbose) { Console.WriteLine("TEST: all searching done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (Verbose) { Console.WriteLine("TEST: done join indexing threads [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]; addCount=" + m_addCount + " delCount=" + m_delCount); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } IndexSearcher s = GetFinalSearcher(); if (Verbose) { Console.WriteLine("TEST: finalSearcher=" + s); } assertFalse(m_failed); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); assertEquals(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); assertEquals(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { assertEquals(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { assertEquals(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid"), CultureInfo.InvariantCulture); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = id.ToString(CultureInfo.InvariantCulture); if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + Collections.ToString(delIDs)); doFail = true; } } } assertFalse(doFail); assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, s.IndexReader.NumDocs); ReleaseSearcher(s); m_writer.Commit(); assertEquals("index=" + m_writer.SegString() + " addCount=" + m_addCount + " delCount=" + m_delCount, m_addCount - m_delCount, m_writer.NumDocs); DoClose(); m_writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(m_dir); m_dir.Dispose(); //System.IO.Directory.Delete(tempDir.FullName, true); TestUtil.Rm(tempDir); if (Verbose) { Console.WriteLine("TEST: done [" + ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t0) + " ms]"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results } }
private bool CommitMerge(MergePolicy.OneMerge merge, MergeState mergeState) { lock (this) { Debug.Assert(TestPoint("startCommitMerge")); if (HitOOM) { throw new InvalidOperationException("this writer hit an OutOfMemoryError; cannot complete merge"); } if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "commitMerge: " + SegString(merge.Segments) + " index=" + SegString()); } Debug.Assert(merge.RegisterDone); // If merge was explicitly aborted, or, if rollback() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified // deleter.refresh() call that will remove any index // file that current segments does not reference), we // abort this merge if (merge.Aborted) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "commitMerge: skip: it was aborted"); } // In case we opened and pooled a reader for this // segment, drop it now. this ensures that we close // the reader before trying to delete any of its // files. this is not a very big deal, since this // reader will never be used by any NRT reader, and // another thread is currently running close(false) // so it will be dropped shortly anyway, but not // doing this makes MockDirWrapper angry in // TestNRTThreads (LUCENE-5434): readerPool.Drop(merge.Info_Renamed); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); return false; } ReadersAndUpdates mergedUpdates = merge.Info_Renamed.Info.DocCount == 0 ? null : CommitMergedDeletesAndUpdates(merge, mergeState); // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMerge: mergedDeletes=" + mergedDeletes); // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: Debug.Assert(!segmentInfos.Contains(merge.Info_Renamed)); bool allDeleted = merge.Segments.Count == 0 || merge.Info_Renamed.Info.DocCount == 0 || (mergedUpdates != null && mergedUpdates.PendingDeleteCount == merge.Info_Renamed.Info.DocCount); if (infoStream.IsEnabled("IW")) { if (allDeleted) { infoStream.Message("IW", "merged segment " + merge.Info_Renamed + " is 100% deleted" + (KeepFullyDeletedSegments_Renamed ? "" : "; skipping insert")); } } bool dropSegment = allDeleted && !KeepFullyDeletedSegments_Renamed; // If we merged no segments then we better be dropping // the new segment: Debug.Assert(merge.Segments.Count > 0 || dropSegment); Debug.Assert(merge.Info_Renamed.Info.DocCount != 0 || KeepFullyDeletedSegments_Renamed || dropSegment); if (mergedUpdates != null) { bool success = false; try { if (dropSegment) { mergedUpdates.DropChanges(); } // Pass false for assertInfoLive because the merged // segment is not yet live (only below do we commit it // to the segmentInfos): readerPool.Release(mergedUpdates, false); success = true; } finally { if (!success) { mergedUpdates.DropChanges(); readerPool.Drop(merge.Info_Renamed); } } } // Must do this after readerPool.release, in case an // exception is hit e.g. writing the live docs for the // merge segment, in which case we need to abort the // merge: segmentInfos.ApplyMergeChanges(merge, dropSegment); if (dropSegment) { Debug.Assert(!segmentInfos.Contains(merge.Info_Renamed)); readerPool.Drop(merge.Info_Renamed); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } bool success_ = false; try { // Must close before checkpoint, otherwise IFD won't be // able to delete the held-open files from the merge // readers: CloseMergeReaders(merge, false); success_ = true; } finally { // Must note the change to segmentInfos so any commits // in-flight don't lose it (IFD will incRef/protect the // new files we created): if (success_) { Checkpoint(); } else { try { Checkpoint(); } catch (Exception) { // Ignore so we keep throwing original exception. } } } Deleter.DeletePendingFiles(); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "after commitMerge: " + SegString()); } if (merge.MaxNumSegments != -1 && !dropSegment) { // cascade the forceMerge: if (!SegmentsToMerge.ContainsKey(merge.Info_Renamed)) { SegmentsToMerge[merge.Info_Renamed] = false; } } return true; } }
/// <summary> /// Carefully merges deletes and updates for the segments we just merged. this /// is tricky because, although merging will clear all deletes (compacts the /// documents) and compact all the updates, new deletes and updates may have /// been flushed to the segments since the merge was started. this method /// "carries over" such new deletes and updates onto the newly merged segment, /// and saves the resulting deletes and updates files (incrementing the delete /// and DV generations for merge.info). If no deletes were flushed, no new /// deletes file is saved. /// </summary> private ReadersAndUpdates CommitMergedDeletesAndUpdates(MergePolicy.OneMerge merge, MergeState mergeState) { lock (this) { Debug.Assert(TestPoint("startCommitMergeDeletes")); IList<SegmentCommitInfo> sourceSegments = merge.Segments; if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "commitMergeDeletes " + SegString(merge.Segments)); } // Carefully merge deletes that occurred after we // started merging: int docUpto = 0; long minGen = long.MaxValue; // Lazy init (only when we find a delete to carry over): MergedDeletesAndUpdates holder = new MergedDeletesAndUpdates(); DocValuesFieldUpdates.Container mergedDVUpdates = new DocValuesFieldUpdates.Container(); for (int i = 0; i < sourceSegments.Count; i++) { SegmentCommitInfo info = sourceSegments[i]; minGen = Math.Min(info.BufferedDeletesGen, minGen); int docCount = info.Info.DocCount; Bits prevLiveDocs = merge.Readers[i].LiveDocs; ReadersAndUpdates rld = readerPool.Get(info, false); // We hold a ref so it should still be in the pool: Debug.Assert(rld != null, "seg=" + info.Info.Name); Bits currentLiveDocs = rld.LiveDocs; IDictionary<string, DocValuesFieldUpdates> mergingFieldUpdates = rld.MergingFieldUpdates; string[] mergingFields; DocValuesFieldUpdates[] dvFieldUpdates; DocValuesFieldUpdates.Iterator[] updatesIters; if (mergingFieldUpdates.Count == 0) { mergingFields = null; updatesIters = null; dvFieldUpdates = null; } else { mergingFields = new string[mergingFieldUpdates.Count]; dvFieldUpdates = new DocValuesFieldUpdates[mergingFieldUpdates.Count]; updatesIters = new DocValuesFieldUpdates.Iterator[mergingFieldUpdates.Count]; int idx = 0; foreach (KeyValuePair<string, DocValuesFieldUpdates> e in mergingFieldUpdates) { string field = e.Key; DocValuesFieldUpdates updates = e.Value; mergingFields[idx] = field; dvFieldUpdates[idx] = mergedDVUpdates.GetUpdates(field, updates.Type); if (dvFieldUpdates[idx] == null) { dvFieldUpdates[idx] = mergedDVUpdates.NewUpdates(field, updates.Type, mergeState.SegmentInfo.DocCount); } updatesIters[idx] = updates.GetIterator(); updatesIters[idx].NextDoc(); // advance to first update doc ++idx; } } // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: info=" + info + ", mergingUpdates=" + mergingUpdates); if (prevLiveDocs != null) { // If we had deletions on starting the merge we must // still have deletions now: Debug.Assert(currentLiveDocs != null); Debug.Assert(prevLiveDocs.Length() == docCount); Debug.Assert(currentLiveDocs.Length() == docCount); // There were deletes on this segment when the merge // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. // Since we copy-on-write, if any new deletes were // applied after merging has started, we can just // check if the before/after liveDocs have changed. // If so, we must carefully merge the liveDocs one // doc at a time: if (currentLiveDocs != prevLiveDocs) { // this means this segment received new deletes // since we started the merge, so we // must merge them: for (int j = 0; j < docCount; j++) { if (!prevLiveDocs.Get(j)) { Debug.Assert(!currentLiveDocs.Get(j)); } else { if (!currentLiveDocs.Get(j)) { if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs) { holder.Init(readerPool, merge, mergeState, true); } holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto)); if (mergingFields != null) // advance all iters beyond the deleted document { SkipDeletedDoc(updatesIters, j); } } else if (mergingFields != null) { MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); } docUpto++; } } } else if (mergingFields != null) { // need to check each non-deleted document if it has any updates for (int j = 0; j < docCount; j++) { if (prevLiveDocs.Get(j)) { // document isn't deleted, check if any of the fields have an update to it MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); // advance docUpto for every non-deleted document docUpto++; } else { // advance all iters beyond the deleted document SkipDeletedDoc(updatesIters, j); } } } else { docUpto += info.Info.DocCount - info.DelCount - rld.PendingDeleteCount; } } else if (currentLiveDocs != null) { Debug.Assert(currentLiveDocs.Length() == docCount); // this segment had no deletes before but now it // does: for (int j = 0; j < docCount; j++) { if (!currentLiveDocs.Get(j)) { if (holder.MergedDeletesAndUpdates_Renamed == null || !holder.InitializedWritableLiveDocs) { holder.Init(readerPool, merge, mergeState, true); } holder.MergedDeletesAndUpdates_Renamed.Delete(holder.DocMap.Map(docUpto)); if (mergingFields != null) // advance all iters beyond the deleted document { SkipDeletedDoc(updatesIters, j); } } else if (mergingFields != null) { MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); } docUpto++; } } else if (mergingFields != null) { // no deletions before or after, but there were updates for (int j = 0; j < docCount; j++) { MaybeApplyMergedDVUpdates(merge, mergeState, docUpto, holder, mergingFields, dvFieldUpdates, updatesIters, j); // advance docUpto for every non-deleted document docUpto++; } } else { // No deletes or updates before or after docUpto += info.Info.DocCount; } } Debug.Assert(docUpto == merge.Info_Renamed.Info.DocCount); if (mergedDVUpdates.Any()) { // System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates); bool success = false; try { // if any error occurs while writing the field updates we should release // the info, otherwise it stays in the pool but is considered not "live" // which later causes false exceptions in pool.dropAll(). // NOTE: currently this is the only place which throws a true // IOException. If this ever changes, we need to extend that try/finally // block to the rest of the method too. holder.MergedDeletesAndUpdates_Renamed.WriteFieldUpdates(directory, mergedDVUpdates); success = true; } finally { if (!success) { holder.MergedDeletesAndUpdates_Renamed.DropChanges(); readerPool.Drop(merge.Info_Renamed); } } } if (infoStream.IsEnabled("IW")) { if (holder.MergedDeletesAndUpdates_Renamed == null) { infoStream.Message("IW", "no new deletes or field updates since merge started"); } else { string msg = holder.MergedDeletesAndUpdates_Renamed.PendingDeleteCount + " new deletes"; if (mergedDVUpdates.Any()) { msg += " and " + mergedDVUpdates.Size() + " new field updates"; } msg += " since merge started"; infoStream.Message("IW", msg); } } merge.Info_Renamed.BufferedDeletesGen = minGen; return holder.MergedDeletesAndUpdates_Renamed; } }
private void MaybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, int docUpto, MergedDeletesAndUpdates holder, string[] mergingFields, DocValuesFieldUpdates[] dvFieldUpdates, DocValuesFieldUpdates.Iterator[] updatesIters, int curDoc) { int newDoc = -1; for (int idx = 0; idx < mergingFields.Length; idx++) { DocValuesFieldUpdates.Iterator updatesIter = updatesIters[idx]; if (updatesIter.Doc() == curDoc) // document has an update { if (holder.MergedDeletesAndUpdates_Renamed == null) { holder.Init(readerPool, merge, mergeState, false); } if (newDoc == -1) // map once per all field updates, but only if there are any updates { newDoc = holder.DocMap.Map(docUpto); } DocValuesFieldUpdates dvUpdates = dvFieldUpdates[idx]; dvUpdates.Add(newDoc, updatesIter.Value()); updatesIter.NextDoc(); // advance to next document } else { Debug.Assert(updatesIter.Doc() > curDoc, "field=" + mergingFields[idx] + " updateDoc=" + updatesIter.Doc() + " curDoc=" + curDoc); } } }
internal void Init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, bool initWritableLiveDocs) { if (MergedDeletesAndUpdates_Renamed == null) { MergedDeletesAndUpdates_Renamed = readerPool.Get(merge.Info_Renamed, true); DocMap = merge.GetDocMap(mergeState); Debug.Assert(DocMap.IsConsistent(merge.Info_Renamed.Info.DocCount)); } if (initWritableLiveDocs && !InitializedWritableLiveDocs) { MergedDeletesAndUpdates_Renamed.InitWritableLiveDocs(); this.InitializedWritableLiveDocs = true; } }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { MockDirectoryWrapper dir = new MockDirectoryWrapper(Random(), new MMapDirectory(CreateTempDir("4GBStoredFields"))); dir.Throttling = MockDirectoryWrapper.Throttling_e.NEVER; var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(); ft.IsIndexed = false; ft.IsStored = true; ft.Freeze(); int valueLength = RandomInts.NextIntBetween(Random(), 1 << 13, 1 << 20); var value = new byte[valueLength]; for (int i = 0; i < valueLength; ++i) { // random so that even compressing codecs can't compress it value[i] = (byte)Random().Next(256); } Field f = new Field("fld", value, ft); doc.Add(f); int numDocs = (int)((1L << 32) / valueLength + 100); for (int i = 0; i < numDocs; ++i) { w.AddDocument(doc); if (VERBOSE && i % (numDocs / 10) == 0) { Console.WriteLine(i + " of " + numDocs + "..."); } } w.ForceMerge(1); w.Dispose(); if (VERBOSE) { bool found = false; foreach (string file in dir.ListAll()) { if (file.EndsWith(".fdt", StringComparison.Ordinal)) { long fileLength = dir.FileLength(file); if (fileLength >= 1L << 32) { found = true; } Console.WriteLine("File length of " + file + " : " + fileLength); } } if (!found) { Console.WriteLine("No .fdt file larger than 4GB, test bug?"); } } DirectoryReader rd = DirectoryReader.Open(dir); Document sd = rd.Document(numDocs - 1); Assert.IsNotNull(sd); Assert.AreEqual(1, sd.Fields.Count); BytesRef valueRef = sd.GetBinaryValue("fld"); Assert.IsNotNull(valueRef); Assert.AreEqual(new BytesRef(value), valueRef); rd.Dispose(); dir.Dispose(); }
/// <summary> /// Merges the indicated segments, replacing them in the stack with a /// single segment. /// /// @lucene.experimental /// </summary> public virtual void Merge(MergePolicy.OneMerge merge) { bool success = false; long t0 = DateTime.Now.Millisecond; try { try { try { MergeInit(merge); //if (merge.info != null) { //System.out.println("MERGE: " + merge.info.info.name); //} if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "now merge\n merge=" + SegString(merge.Segments) + "\n index=" + SegString()); } MergeMiddle(merge); MergeSuccess(merge); success = true; } catch (Exception t) { HandleMergeException(t, merge); } } finally { lock (this) { MergeFinish(merge); if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "hit exception during merge"); } if (merge.Info_Renamed != null && !segmentInfos.Contains(merge.Info_Renamed)) { Deleter.Refresh(merge.Info_Renamed.Info.Name); } } // this merge (and, generally, any change to the // segments) may now enable new merges, so we call // merge policy & update pending merges. if (success && !merge.Aborted && (merge.MaxNumSegments != -1 || (!closed && !Closing))) { UpdatePendingMerges(MergeTrigger.MERGE_FINISHED, merge.MaxNumSegments); } } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "merge"); } if (merge.Info_Renamed != null && !merge.Aborted) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "merge time " + (DateTime.Now.Millisecond - t0) + " msec for " + merge.Info_Renamed.Info.DocCount + " docs"); } } }
/// <summary>Create and return a new MergeThread </summary> protected internal virtual MergeThread GetMergeThread(IndexWriter writer, MergePolicy.OneMerge merge) { lock (this) { MergeThread thread = new MergeThread(this, writer, merge); thread.SetThreadPriority(mergeThreadPriority); thread.IsBackground = true; thread.Name = "Lucene Merge Thread #" + mergeThreadCount++; return thread; } }
/// <summary> /// Hook that's called when the specified merge is complete. </summary> internal virtual void MergeSuccess(MergePolicy.OneMerge merge) { }
public virtual void SetRunningMerge(MergePolicy.OneMerge merge) { lock (this) { runningMerge = merge; } }
/// <summary> /// Checks whether this merge involves any segments /// already participating in a merge. If not, this merge /// is "registered", meaning we record that its segments /// are now participating in a merge, and true is /// returned. Else (the merge conflicts) false is /// returned. /// </summary> internal bool RegisterMerge(MergePolicy.OneMerge merge) { lock (this) { if (merge.RegisterDone) { return true; } Debug.Assert(merge.Segments.Count > 0); if (StopMerges) { merge.Abort(); throw new MergePolicy.MergeAbortedException("merge is aborted: " + SegString(merge.Segments)); } bool isExternal = false; foreach (SegmentCommitInfo info in merge.Segments) { if (mergingSegments.Contains(info)) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "reject merge " + SegString(merge.Segments) + ": segment " + SegString(info) + " is already marked for merge"); } return false; } if (!segmentInfos.Contains(info)) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "reject merge " + SegString(merge.Segments) + ": segment " + SegString(info) + " does not exist in live infos"); } return false; } if (info.Info.Dir != directory) { isExternal = true; } if (SegmentsToMerge.ContainsKey(info)) { merge.MaxNumSegments = MergeMaxNumSegments; } } EnsureValidMerge(merge); PendingMerges.AddLast(merge); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "add merge to pendingMerges: " + SegString(merge.Segments) + " [total " + PendingMerges.Count + " pending]"); } merge.MergeGen = MergeGen; merge.IsExternal = isExternal; // OK it does not conflict; now record that this merge // is running (while synchronized) to avoid race // condition where two conflicting merges from different // threads, start if (infoStream.IsEnabled("IW")) { StringBuilder builder = new StringBuilder("registerMerge merging= ["); foreach (SegmentCommitInfo info in mergingSegments) { builder.Append(info.Info.Name).Append(", "); } builder.Append("]"); // don't call mergingSegments.toString() could lead to ConcurrentModException // since merge updates the segments FieldInfos if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", builder.ToString()); } } foreach (SegmentCommitInfo info in merge.Segments) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "registerMerge info=" + SegString(info)); } mergingSegments.Add(info); } Debug.Assert(merge.EstimatedMergeBytes == 0); Debug.Assert(merge.TotalMergeBytes == 0); foreach (SegmentCommitInfo info in merge.Segments) { if (info.Info.DocCount > 0) { int delCount = NumDeletedDocs(info); Debug.Assert(delCount <= info.Info.DocCount); double delRatio = ((double)delCount) / info.Info.DocCount; merge.EstimatedMergeBytes += (long)(info.SizeInBytes() * (1.0 - delRatio)); merge.TotalMergeBytes += info.SizeInBytes(); } } // Merge is now registered merge.RegisterDone = true; return true; } }
/// <summary> /// Expert: <seealso cref="MergePolicy"/> is invoked whenever there are changes to the /// segments in the index. Its role is to select which merges to do, if any, /// and return a <seealso cref="MergePolicy.MergeSpecification"/> describing the merges. /// It also selects merges to do for forceMerge. /// /// <p>Only takes effect when IndexWriter is first created. /// </summary> public IndexWriterConfig SetMergePolicy(MergePolicy mergePolicy) { if (mergePolicy == null) { throw new System.ArgumentException("mergePolicy must not be null"); } this.mergePolicy = mergePolicy; return this; }
/// <summary> /// Does initial setup for a merge, which is fast but holds /// the synchronized lock on IndexWriter instance. /// </summary> internal void MergeInit(MergePolicy.OneMerge merge) { lock (this) { bool success = false; try { _mergeInit(merge); success = true; } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "hit exception in mergeInit"); } MergeFinish(merge); } } } }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { Console.WriteLine("TEST: cycle pass="******"TEST: open writer for forceMerge"); } writer = new IndexWriter(dir, conf); policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.ForceMerge(1); writer.Dispose(); } Assert.AreEqual(needsMerging ? 2 : 1, policy.NumOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(1 + (needsMerging ? 1 : 0), policy.NumOnCommit); // Test listCommits ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); // 2 from closing writer Assert.AreEqual(1 + (needsMerging ? 1 : 0), commits.Count); // Make sure we can open a reader on each commit: foreach (IndexCommit commit in commits) { IndexReader r = DirectoryReader.Open(commit); r.Dispose(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetLastCommitGeneration(dir); while (gen > 0) { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy)); writer.Dispose(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Dispose(); } }
private void _mergeInit(MergePolicy.OneMerge merge) { lock (this) { Debug.Assert(TestPoint("startMergeInit")); Debug.Assert(merge.RegisterDone); Debug.Assert(merge.MaxNumSegments == -1 || merge.MaxNumSegments > 0); if (HitOOM) { throw new InvalidOperationException("this writer hit an OutOfMemoryError; cannot merge"); } if (merge.Info_Renamed != null) { // mergeInit already done return; } if (merge.Aborted) { return; } // TODO: in the non-pool'd case this is somewhat // wasteful, because we open these readers, close them, // and then open them again for merging. Maybe we // could pre-pool them somehow in that case... // Lock order: IW -> BD BufferedUpdatesStream.ApplyDeletesResult result = BufferedUpdatesStream.ApplyDeletesAndUpdates(readerPool, merge.Segments); if (result.AnyDeletes) { Checkpoint(); } if (!KeepFullyDeletedSegments_Renamed && result.AllDeleted != null) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "drop 100% deleted segments: " + result.AllDeleted); } foreach (SegmentCommitInfo info in result.AllDeleted) { segmentInfos.Remove(info); if (merge.Segments.Contains(info)) { mergingSegments.Remove(info); merge.Segments.Remove(info); } readerPool.Drop(info); } Checkpoint(); } // Bind a new segment name here so even with // ConcurrentMergePolicy we keep deterministic segment // names. string mergeSegmentName = NewSegmentName(); SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, -1, false, Codec, null); IDictionary<string, string> details = new Dictionary<string, string>(); details["mergeMaxNumSegments"] = "" + merge.MaxNumSegments; details["mergeFactor"] = Convert.ToString(merge.Segments.Count); SetDiagnostics(si, SOURCE_MERGE, details); merge.Info = new SegmentCommitInfo(si, 0, -1L, -1L); // System.out.println("[" + Thread.currentThread().getName() + "] IW._mergeInit: " + segString(merge.segments) + " into " + si); // Lock order: IW -> BD BufferedUpdatesStream.Prune(segmentInfos); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "merge seg=" + merge.Info_Renamed.Info.Name + " " + SegString(merge.Segments)); } } }
public virtual void TestKeepLastNDeletionPolicy() { const int N = 5; for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; Directory dir = NewDirectory(); KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); for (int j = 0; j < N + 1; j++) { IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; IndexWriter writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int i = 0; i < 17; i++) { AddDoc(writer); } writer.ForceMerge(1); writer.Dispose(); } Assert.IsTrue(policy.NumDelete > 0); Assert.AreEqual(N + 1, policy.NumOnInit); Assert.AreEqual(N + 1, policy.NumOnCommit); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetLastCommitGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); if (i == N) { Assert.Fail("should have failed on commits prior to last " + N); } } catch (IOException /*e*/) { if (i != N) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Dispose(); } }
/// <summary> /// Does fininishing for a merge, which is fast but holds /// the synchronized lock on IndexWriter instance. /// </summary> public void MergeFinish(MergePolicy.OneMerge merge) { lock (this) { // forceMerge, addIndexes or finishMerges may be waiting // on merges to finish. Monitor.PulseAll(this); // It's possible we are called twice, eg if there was an // exception inside mergeInit if (merge.RegisterDone) { IList<SegmentCommitInfo> sourceSegments = merge.Segments; foreach (SegmentCommitInfo info in sourceSegments) { mergingSegments.Remove(info); } merge.RegisterDone = false; } RunningMerges.Remove(merge); } }
/// <summary> /// Wrap the given <seealso cref="MergePolicy"/> and intercept forceMerge requests to /// only upgrade segments written with previous Lucene versions. /// </summary> public UpgradeIndexMergePolicy(MergePolicy @base) { this.@base = @base; }
public virtual void Test([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BPostingsBytes1")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); IndexWriter w = new IndexWriter(dir, config); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; MyTokenStream tokenStream = new MyTokenStream(); Field field = new Field("field", tokenStream, ft); doc.Add(field); const int numDocs = 1000; for (int i = 0; i < numDocs; i++) { if (i % 2 == 1) // trick blockPF's little optimization { tokenStream.n = 65536; } else { tokenStream.n = 65537; } w.AddDocument(doc); } w.ForceMerge(1); w.Dispose(); DirectoryReader oneThousand = DirectoryReader.Open(dir); IndexReader[] subReaders = new IndexReader[1000]; Arrays.Fill(subReaders, oneThousand); MultiReader mr = new MultiReader(subReaders); BaseDirectoryWrapper dir2 = NewFSDirectory(CreateTempDir("2BPostingsBytes2")); if (dir2 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir2).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w2.AddIndexes(mr); w2.ForceMerge(1); w2.Dispose(); oneThousand.Dispose(); DirectoryReader oneMillion = DirectoryReader.Open(dir2); subReaders = new IndexReader[2000]; Arrays.Fill(subReaders, oneMillion); mr = new MultiReader(subReaders); BaseDirectoryWrapper dir3 = NewFSDirectory(CreateTempDir("2BPostingsBytes3")); if (dir3 is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir3).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); w3.AddIndexes(mr); w3.ForceMerge(1); w3.Dispose(); oneMillion.Dispose(); dir.Dispose(); dir2.Dispose(); dir3.Dispose(); }
public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { this.merge = merge; this.dir = dir; }
protected override void DoMerge(MergePolicy.OneMerge merge) { TotMergedBytes += merge.TotalBytesSize(); base.DoMerge(merge); }
private void CloseMergeReaders(MergePolicy.OneMerge merge, bool suppressExceptions) { lock (this) { int numSegments = merge.Readers.Count; Exception th = null; bool drop = !suppressExceptions; for (int i = 0; i < numSegments; i++) { SegmentReader sr = merge.Readers[i]; if (sr != null) { try { ReadersAndUpdates rld = readerPool.Get(sr.SegmentInfo, false); // We still hold a ref so it should not have been removed: Debug.Assert(rld != null); if (drop) { rld.DropChanges(); } else { rld.DropMergingUpdates(); } rld.Release(sr); readerPool.Release(rld); if (drop) { readerPool.Drop(rld.Info); } } catch (Exception t) { if (th == null) { th = t; } } merge.Readers[i] = null; } } // If any error occured, throw it. if (!suppressExceptions) { IOUtils.ReThrow(th); } } }
private static void ConfigureRandom(Random r, MergePolicy mergePolicy) { if (r.NextBoolean()) { mergePolicy.NoCFSRatio = 0.1 + r.NextDouble() * 0.8; } else { mergePolicy.NoCFSRatio = r.NextBoolean() ? 1.0 : 0.0; } if (Rarely()) { mergePolicy.MaxCFSSegmentSizeMB = 0.2 + r.NextDouble() * 2.0; } else { mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity; } }
protected override void DoMerge(MergePolicy.OneMerge merge) { try { // Stall all incoming merges until we see // maxMergeCount: int count = RunningMergeCount.IncrementAndGet(); try { Assert.IsTrue(count <= MaxMergeCount, "count=" + count + " vs maxMergeCount=" + MaxMergeCount); EnoughMergesWaiting.Signal(); // Stall this merge until we see exactly // maxMergeCount merges waiting while (true) { // wait for 10 milliseconds if (EnoughMergesWaiting.Wait(new TimeSpan(0, 0, 0, 0, 10)) || Failed.Get()) { break; } } // Then sleep a bit to give a chance for the bug // (too many pending merges) to appear: Thread.Sleep(20); base.DoMerge(merge); } finally { RunningMergeCount.DecrementAndGet(); } } catch (Exception t) { Failed.Set(true); Writer.MergeFinish(merge); throw new Exception(t.Message, t); } }
/// <summary> /// Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.Info_Renamed.Info.Name; IList<SegmentCommitInfo> sourceSegments = merge.Segments; IOContext context = new IOContext(merge.MergeInfo); MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory); TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "merging " + SegString(merge.Segments)); } merge.Readers = new List<SegmentReader>(); // this is try/finally to make sure merger's readers are // closed: bool success = false; try { int segUpto = 0; while (segUpto < sourceSegments.Count) { SegmentCommitInfo info = sourceSegments[segUpto]; // Hold onto the "live" reader; we will use this to // commit merged deletes ReadersAndUpdates rld = readerPool.Get(info, true); // Carefully pull the most recent live docs and reader SegmentReader reader; Bits liveDocs; int delCount; lock (this) { // Must sync to ensure BufferedDeletesStream cannot change liveDocs, // pendingDeleteCount and field updates while we pull a copy: reader = rld.GetReaderForMerge(context); liveDocs = rld.ReadOnlyLiveDocs; delCount = rld.PendingDeleteCount + info.DelCount; Debug.Assert(reader != null); Debug.Assert(rld.VerifyDocCounts()); if (infoStream.IsEnabled("IW")) { if (rld.PendingDeleteCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount + " pendingDelCount=" + rld.PendingDeleteCount); } else if (info.DelCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount); } else { infoStream.Message("IW", "seg=" + SegString(info) + " no deletes"); } } } // Deletes might have happened after we pulled the merge reader and // before we got a read-only copy of the segment's actual live docs // (taking pending deletes into account). In that case we need to // make a new reader with updated live docs and del count. if (reader.NumDeletedDocs() != delCount) { // fix the reader's live docs and del count Debug.Assert(delCount > reader.NumDeletedDocs()); // beware of zombies SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.Info.DocCount - delCount); bool released = false; try { rld.Release(reader); released = true; } finally { if (!released) { newReader.DecRef(); } } reader = newReader; } merge.Readers.Add(reader); Debug.Assert(delCount <= info.Info.DocCount, "delCount=" + delCount + " info.docCount=" + info.Info.DocCount + " rld.pendingDeleteCount=" + rld.PendingDeleteCount + " info.getDelCount()=" + info.DelCount); segUpto++; } // System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); // we pass merge.getMergeReaders() instead of merge.readers to allow the // OneMerge to return a view over the actual segments to merge SegmentMerger merger = new SegmentMerger(merge.MergeReaders, merge.Info_Renamed.Info, infoStream, dirWrapper, Config_Renamed.TermIndexInterval, checkAbort, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); merge.CheckAborted(directory); // this is where all the work happens: MergeState mergeState; bool success3 = false; try { if (!merger.ShouldMerge()) { // would result in a 0 document segment: nothing to merge! mergeState = new MergeState(new List<AtomicReader>(), merge.Info_Renamed.Info, infoStream, checkAbort); } else { mergeState = merger.Merge(); } success3 = true; } finally { if (!success3) { lock (this) { Deleter.Refresh(merge.Info_Renamed.Info.Name); } } } Debug.Assert(mergeState.SegmentInfo == merge.Info_Renamed.Info); merge.Info_Renamed.Info.Files = new HashSet<string>(dirWrapper.CreatedFiles); // Record which codec was used to write the segment if (infoStream.IsEnabled("IW")) { if (merge.Info_Renamed.Info.DocCount == 0) { infoStream.Message("IW", "merge away fully deleted segments"); } else { infoStream.Message("IW", "merge codec=" + Codec + " docCount=" + merge.Info_Renamed.Info.DocCount + "; merged segment has " + (mergeState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (mergeState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (mergeState.FieldInfos.HasProx() ? "freqs" : "no freqs")); } } // Very important to do this before opening the reader // because codec must know if prox was written for // this segment: //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name); bool useCompoundFile; lock (this) // Guard segmentInfos { useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, merge.Info_Renamed); } if (useCompoundFile) { success = false; ICollection<string> filesToRemove = merge.Info_Renamed.Files(); try { filesToRemove = CreateCompoundFile(infoStream, directory, checkAbort, merge.Info_Renamed.Info, context); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.Aborted) { // this can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "hit exception creating compound file during merge"); } lock (this) { Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // So that, if we hit exc in deleteNewFiles (next) // or in commitMerge (later), we close the // per-segment readers in the finally clause below: success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD Deleter.DeleteNewFiles(filesToRemove); if (merge.Aborted) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "abort merge after building CFS"); } Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); return 0; } } merge.Info_Renamed.Info.UseCompoundFile = true; } else { // So that, if we hit exc in commitMerge (later), // we close the per-segment readers in the finally // clause below: success = false; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: bool success2 = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(directory, merge.Info_Renamed.Info, mergeState.FieldInfos, context); success2 = true; } finally { if (!success2) { lock (this) { Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // TODO: ideally we would freeze merge.info here!! // because any changes after writing the .si will be // lost... if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", string.Format(CultureInfo.InvariantCulture, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.Info_Renamed.SizeInBytes() / 1024.0 / 1024.0, merge.EstimatedMergeBytes / 1024 / 1024.0)); } IndexReaderWarmer mergedSegmentWarmer = Config_Renamed.MergedSegmentWarmer; if (PoolReaders && mergedSegmentWarmer != null && merge.Info_Renamed.Info.DocCount != 0) { ReadersAndUpdates rld = readerPool.Get(merge.Info_Renamed, true); SegmentReader sr = rld.GetReader(IOContext.READ); try { mergedSegmentWarmer.Warm(sr); } finally { lock (this) { rld.Release(sr); readerPool.Release(rld); } } } // Force READ context because we merge deletes onto // this reader: if (!CommitMerge(merge, mergeState)) { // commitMerge will return false if this merge was // aborted return 0; } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } return merge.Info_Renamed.Info.DocCount; }
public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")] IConcurrentMergeScheduler scheduler) { if ("Lucene3x".Equals(Codec.Default.Name)) { throw new Exception("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } dir.CheckIndexOnClose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random(), TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = Environment.TickCount; w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec"); } savedTerms = ts.SavedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms == null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms)); Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
internal virtual void AddMergeException(MergePolicy.OneMerge merge) { lock (this) { Debug.Assert(merge.Exception != null); if (!MergeExceptions.Contains(merge) && MergeGen == merge.MergeGen) { MergeExceptions.Add(merge); } } }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); DateTime t0 = DateTime.UtcNow; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName)); TaskScheduler es = null; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet <string> delIDs = new ConcurrentHashSet <string>(new HashSet <string>()); ISet <string> delPackIDs = new ConcurrentHashSet <string>(new HashSet <string>()); IList <SubDocs> allSubDocs = new SynchronizedCollection <SubDocs>(); DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC); ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs.ToList()) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + delIDs); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) * { * es.shutdown(); * es.awaitTermination(1, TimeUnit.SECONDS); * }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } }
internal readonly Codec Codec; // for writing new segments /// <summary> /// Constructs a new IndexWriter per the settings given in <code>conf</code>. /// If you want to make "live" changes to this writer instance, use /// <seealso cref="#getConfig()"/>. /// /// <p> /// <b>NOTE:</b> after ths writer is created, the given configuration instance /// cannot be passed to another writer. If you intend to do so, you should /// <seealso cref="IndexWriterConfig#clone() clone"/> it beforehand. /// </summary> /// <param name="d"> /// the index directory. The index is either created or appended /// according <code>conf.getOpenMode()</code>. </param> /// <param name="conf"> /// the configuration settings according to which IndexWriter should /// be initialized. </param> /// <exception cref="IOException"> /// if the directory cannot be read/written to, or if it does not /// exist and <code>conf.getOpenMode()</code> is /// <code>OpenMode.APPEND</code> or if there is any other low-level /// IO error </exception> public IndexWriter(Directory d, IndexWriterConfig conf) { /*if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; }*/ readerPool = new ReaderPool(this); conf.SetIndexWriter(this); // prevent reuse by other instances Config_Renamed = new LiveIndexWriterConfig(conf); directory = d; analyzer = Config_Renamed.Analyzer; infoStream = Config_Renamed.InfoStream; mergePolicy = Config_Renamed.MergePolicy; mergePolicy.IndexWriter = this; mergeScheduler = Config_Renamed.MergeScheduler; Codec = Config_Renamed.Codec; BufferedUpdatesStream = new BufferedUpdatesStream(infoStream); PoolReaders = Config_Renamed.ReaderPooling; WriteLock = directory.MakeLock(WRITE_LOCK_NAME); if (!WriteLock.Obtain(Config_Renamed.WriteLockTimeout)) // obtain write lock { throw new LockObtainFailedException("Index locked for write: " + WriteLock); } bool success = false; try { OpenMode_e? mode = Config_Renamed.OpenMode; bool create; if (mode == OpenMode_e.CREATE) { create = true; } else if (mode == OpenMode_e.APPEND) { create = false; } else { // CREATE_OR_APPEND - create only if an index does not exist create = !DirectoryReader.IndexExists(directory); } // If index is too old, reading the segments will throw // IndexFormatTooOldException. segmentInfos = new SegmentInfos(); bool initialIndexExists = true; if (create) { // Try to read first. this is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: try { segmentInfos.Read(directory); segmentInfos.Clear(); } catch (IOException) { // Likely this means it's a fresh directory initialIndexExists = false; } // Record that we have a change (zero out all // segments) pending: Changed(); } else { segmentInfos.Read(directory); IndexCommit commit = Config_Renamed.IndexCommit; if (commit != null) { // Swap out all segments, but, keep metadata in // SegmentInfos, like version & generation, to // preserve write-once. this is important if // readers are open against the future commit // points. if (commit.Directory != directory) { throw new System.ArgumentException("IndexCommit's directory doesn't match my directory"); } SegmentInfos oldInfos = new SegmentInfos(); oldInfos.Read(directory, commit.SegmentsFileName); segmentInfos.Replace(oldInfos); Changed(); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: loaded commit \"" + commit.SegmentsFileName + "\""); } } } RollbackSegments = segmentInfos.CreateBackupSegmentInfos(); // start with previous field numbers, but new FieldInfos GlobalFieldNumberMap = FieldNumberMap; Config_Renamed.FlushPolicy.Init(Config_Renamed); DocWriter = new DocumentsWriter(this, Config_Renamed, directory); eventQueue = DocWriter.EventQueue(); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: lock (this) { Deleter = new IndexFileDeleter(directory, Config_Renamed.DelPolicy, segmentInfos, infoStream, this, initialIndexExists); } if (Deleter.StartingCommitDeleted) { // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we // are closed w/o any further changes we write a new // segments_N file. Changed(); } if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: create=" + create); MessageState(); } success = true; } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "init: hit exception on init; releasing write lock"); } WriteLock.Release(); IOUtils.CloseWhileHandlingException(WriteLock); WriteLock = null; } } }
public virtual void TestDeleteLeftoverFiles() { Directory dir = NewDirectory(); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).PreventDoubleWrite = false; } MergePolicy mergePolicy = NewLogMergePolicy(true, 10); // this test expects all of its segments to be in CFS mergePolicy.NoCFSRatio = 1.0; mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(10).SetMergePolicy(mergePolicy).SetUseCompoundFile(true)); int i; for (i = 0; i < 35; i++) { AddDoc(writer, i); } writer.Config.MergePolicy.NoCFSRatio = 0.0; writer.Config.SetUseCompoundFile(false); for (; i < 45; i++) { AddDoc(writer, i); } writer.Dispose(); // Delete one doc so we get a .del file: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).SetUseCompoundFile(true)); Term searchTerm = new Term("id", "7"); writer.DeleteDocuments(searchTerm); writer.Dispose(); // Now, artificially create an extra .del file & extra // .s0 file: string[] files = dir.ListAll(); /* * for(int j=0;j<files.Length;j++) { * System.out.println(j + ": " + files[j]); * } */ // TODO: fix this test better string ext = Codec.Default.Name.Equals("SimpleText", StringComparison.Ordinal) ? ".liv" : ".del"; // Create a bogus separate del file for a // segment that already has a separate del file: CopyFile(dir, "_0_1" + ext, "_0_2" + ext); // Create a bogus separate del file for a // segment that does not yet have a separate del file: CopyFile(dir, "_0_1" + ext, "_1_1" + ext); // Create a bogus separate del file for a // non-existent segment: CopyFile(dir, "_0_1" + ext, "_188_1" + ext); // Create a bogus segment file: CopyFile(dir, "_0.cfs", "_188.cfs"); // Create a bogus fnm file when the CFS already exists: CopyFile(dir, "_0.cfs", "_0.fnm"); // Create some old segments file: CopyFile(dir, "segments_2", "segments"); CopyFile(dir, "segments_2", "segments_1"); // Create a bogus cfs file shadowing a non-cfs segment: // TODO: assert is bogus (relies upon codec-specific filenames) Assert.IsTrue(SlowFileExists(dir, "_3.fdt") || SlowFileExists(dir, "_3.fld")); Assert.IsTrue(!SlowFileExists(dir, "_3.cfs")); CopyFile(dir, "_1.cfs", "_3.cfs"); string[] filesPre = dir.ListAll(); // Open & close a writer: it should delete the above 4 // files and nothing more: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND)); writer.Dispose(); string[] files2 = dir.ListAll(); dir.Dispose(); Array.Sort(files); Array.Sort(files2); ISet <string> dif = DifFiles(files, files2); if (!Arrays.Equals(files, files2)) { Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n " + AsString(files) + "\n actual files:\n " + AsString(files2) + "\ndiff: " + dif); } }
/// <summary> /// Wrap the given <see cref="MergePolicy"/> and intercept <see cref="IndexWriter.ForceMerge(int)"/> requests to /// only upgrade segments written with previous Lucene versions. /// </summary> public UpgradeIndexMergePolicy(MergePolicy @base) { this.m_base = @base; }