public override void SetUp() { base.SetUp(); Dir = NewDirectory(); //Build index, of records 1 to 100, committing after each batch of 10 IndexDeletionPolicy sdp = new KeepAllDeletionPolicy(this); IndexWriter w = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(sdp)); for (int currentRecordId = 1; currentRecordId <= 100; currentRecordId++) { Document doc = new Document(); doc.Add(NewTextField(FIELD_RECORD_ID, currentRecordId.ToString(CultureInfo.InvariantCulture), Field.Store.YES)); w.AddDocument(doc); if (currentRecordId % 10 == 0) { IDictionary <string, string> data = new Dictionary <string, string>(); data["index"] = "records 1-" + currentRecordId.ToString(CultureInfo.InvariantCulture); w.SetCommitData(data); w.Commit(); } } w.Dispose(); }
public override void SetUp() { base.SetUp(); dir = new MockRAMDirectory(); //Build index, of records 1 to 100, committing after each batch of 10 IndexDeletionPolicy sdp = new KeepAllDeletionPolicy(this); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), sdp, MaxFieldLength.UNLIMITED); for (int currentRecordId = 1; currentRecordId <= 100; currentRecordId++) { Document doc = new Document(); doc.Add(new Field(FIELD_RECORD_ID, "" + currentRecordId, Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); if (currentRecordId % 10 == 0) { System.Collections.Generic.IDictionary <string, string> data = new System.Collections.Generic.Dictionary <string, string>(); data["index"] = "records 1-" + currentRecordId; w.Commit(data); } } w.Close(); }
public virtual void TestOpenPriorSnapshot() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new KeepAllDeletionPolicy(this, dir)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(10))); KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int i = 0; i < 10; i++) { AddDoc(writer); if ((1 + i) % 2 == 0) { writer.Commit(); } } writer.Dispose(); ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); Assert.AreEqual(5, commits.Count); IndexCommit lastCommit = null; foreach (IndexCommit commit in commits) { if (lastCommit == null || commit.Generation > lastCommit.Generation) { lastCommit = commit; } } Assert.IsTrue(lastCommit != null); // Now add 1 doc and merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy)); AddDoc(writer); Assert.AreEqual(11, writer.NumDocs); writer.ForceMerge(1); writer.Dispose(); Assert.AreEqual(6, DirectoryReader.ListCommits(dir).Count); // Now open writer on the commit just before merge: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs); // Should undo our rollback: writer.Rollback(); DirectoryReader r = DirectoryReader.Open(dir); // Still merged, still 11 docs Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(11, r.NumDocs); r.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs); // Commits the rollback: writer.Dispose(); // Now 8 because we made another commit Assert.AreEqual(7, DirectoryReader.ListCommits(dir).Count); r = DirectoryReader.Open(dir); // Not fully merged because we rolled it back, and now only // 10 docs Assert.IsTrue(r.Leaves.Count > 1); Assert.AreEqual(10, r.NumDocs); r.Dispose(); // Re-merge writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(policy)); writer.ForceMerge(1); writer.Dispose(); r = DirectoryReader.Open(dir); Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(10, r.NumDocs); r.Dispose(); // Now open writer on the commit just before merging, // but this time keeping only the last commit: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexCommit(lastCommit)); Assert.AreEqual(10, writer.NumDocs); // Reader still sees fully merged index, because writer // opened on the prior commit has not yet committed: r = DirectoryReader.Open(dir); Assert.AreEqual(1, r.Leaves.Count); Assert.AreEqual(10, r.NumDocs); r.Dispose(); writer.Dispose(); // Now reader sees not-fully-merged index: r = DirectoryReader.Open(dir); Assert.IsTrue(r.Leaves.Count > 1); Assert.AreEqual(10, r.NumDocs); r.Dispose(); dir.Dispose(); }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { Console.WriteLine("TEST: cycle pass="******"TEST: open writer for forceMerge"); } writer = new IndexWriter(dir, conf); policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.ForceMerge(1); writer.Dispose(); } Assert.AreEqual(needsMerging ? 2 : 1, policy.NumOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(1 + (needsMerging ? 1 : 0), policy.NumOnCommit); // Test listCommits ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); // 2 from closing writer Assert.AreEqual(1 + (needsMerging ? 1 : 0), commits.Count); // Make sure we can open a reader on each commit: foreach (IndexCommit commit in commits) { IndexReader r = DirectoryReader.Open(commit); r.Dispose(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetLastCommitGeneration(dir); while (gen > 0) { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy)); writer.Dispose(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Dispose(); } }
public override void SetUp() { base.SetUp(); dir = new MockRAMDirectory(); //Build index, of records 1 to 100, committing after each batch of 10 IndexDeletionPolicy sdp = new KeepAllDeletionPolicy(this); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), sdp, MaxFieldLength.UNLIMITED); for (int currentRecordId = 1; currentRecordId <= 100; currentRecordId++) { Document doc = new Document(); doc.Add(new Field(FIELD_RECORD_ID, "" + currentRecordId, Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); if (currentRecordId % 10 == 0) { System.Collections.Generic.IDictionary<string, string> data = new System.Collections.Generic.Dictionary<string,string>(); data["index"] = "records 1-" + currentRecordId; w.Commit(data); } } w.Close(); }
public virtual void TestOpenPriorSnapshot() { // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new MockRAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); for (int i = 0; i < 10; i++) { AddDoc(writer); if ((1 + i) % 2 == 0) { writer.Commit(); } } writer.Close(); System.Collections.ICollection commits = IndexReader.ListCommits(dir); Assert.AreEqual(6, commits.Count); IndexCommit lastCommit = null; System.Collections.IEnumerator it = commits.GetEnumerator(); while (it.MoveNext()) { IndexCommit commit = (IndexCommit)it.Current; if (lastCommit == null || commit.GetGeneration() > lastCommit.GetGeneration()) { lastCommit = commit; } } Assert.IsTrue(lastCommit != null); // Now add 1 doc and optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); AddDoc(writer); Assert.AreEqual(11, writer.NumDocs()); writer.Optimize(); writer.Close(); Assert.AreEqual(7, IndexReader.ListCommits(dir).Count); // Now open writer on the commit just before optimize: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Should undo our rollback: writer.Rollback(); IndexReader r = IndexReader.Open(dir); // Still optimized, still 11 docs Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(11, r.NumDocs()); r.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Commits the rollback: writer.Close(); // Now 8 because we made another commit Assert.AreEqual(8, IndexReader.ListCommits(dir).Count); r = IndexReader.Open(dir); // Not optimized because we rolled it back, and now only // 10 docs Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Reoptimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.Optimize(); writer.Close(); r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Now open writer on the commit just before optimize, // but this time keeping only the last commit: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Reader still sees optimized index, because writer // opened on the prior commit has not yet committed: r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); writer.Close(); // Now reader sees unoptimized index: r = IndexReader.Open(dir); Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); dir.Close(); }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new RAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.SetMergeScheduler(new SerialMergeScheduler()); for (int i = 0; i < 107; i++) { AddDoc(writer); if (autoCommit && i % 10 == 0) { writer.Commit(); } } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (!autoCommit) { // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); } // Test listCommits System.Collections.ICollection commits = IndexReader.ListCommits(dir); if (!autoCommit) { // 1 from opening writer + 2 from closing writer Assert.AreEqual(3, commits.Count); } // 1 from opening writer + 2 from closing writer + // 11 from calling writer.commit() explicitly above else { Assert.AreEqual(14, commits.Count); } System.Collections.IEnumerator it = commits.GetEnumerator(); // Make sure we can open a reader on each commit: while (it.MoveNext()) { IndexCommit commit = (IndexCommit)it.Current; IndexReader r = IndexReader.Open(commit, null); r.Close(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); while (gen > 0) { IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Close(); } }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int i = 0; i < 107; i++) { AddDoc(writer); } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 2); } else { // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); while (gen > 0) { IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.List().Length; writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false, policy); writer.Close(); int postCount = dir.List().Length; Assert.IsTrue(postCount < preCount); } } dir.Close(); } }
public virtual void TestOpenPriorSnapshot() { // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new MockRAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); for (int i = 0; i < 10; i++) { AddDoc(writer); if ((1 + i) % 2 == 0) writer.Commit(); } writer.Close(); System.Collections.ICollection commits = IndexReader.ListCommits(dir); Assert.AreEqual(6, commits.Count); IndexCommit lastCommit = null; System.Collections.IEnumerator it = commits.GetEnumerator(); while (it.MoveNext()) { IndexCommit commit = (IndexCommit) it.Current; if (lastCommit == null || commit.GetGeneration() > lastCommit.GetGeneration()) lastCommit = commit; } Assert.IsTrue(lastCommit != null); // Now add 1 doc and optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); AddDoc(writer); Assert.AreEqual(11, writer.NumDocs()); writer.Optimize(); writer.Close(); Assert.AreEqual(7, IndexReader.ListCommits(dir).Count); // Now open writer on the commit just before optimize: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Should undo our rollback: writer.Rollback(); IndexReader r = IndexReader.Open(dir); // Still optimized, still 11 docs Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(11, r.NumDocs()); r.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Commits the rollback: writer.Close(); // Now 8 because we made another commit Assert.AreEqual(8, IndexReader.ListCommits(dir).Count); r = IndexReader.Open(dir); // Not optimized because we rolled it back, and now only // 10 docs Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Reoptimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); writer.Optimize(); writer.Close(); r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); // Now open writer on the commit just before optimize, // but this time keeping only the last commit: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.LIMITED, lastCommit); Assert.AreEqual(10, writer.NumDocs()); // Reader still sees optimized index, because writer // opened on the prior commit has not yet committed: r = IndexReader.Open(dir); Assert.IsTrue(r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); writer.Close(); // Now reader sees unoptimized index: r = IndexReader.Open(dir); Assert.IsTrue(!r.IsOptimized()); Assert.AreEqual(10, r.NumDocs()); r.Close(); dir.Close(); }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new RAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.SetMergeScheduler(new SerialMergeScheduler()); for (int i = 0; i < 107; i++) { AddDoc(writer); if (autoCommit && i % 10 == 0) writer.Commit(); } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (!autoCommit) // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); // Test listCommits System.Collections.ICollection commits = IndexReader.ListCommits(dir); if (!autoCommit) // 1 from opening writer + 2 from closing writer Assert.AreEqual(3, commits.Count); // 1 from opening writer + 2 from closing writer + // 11 from calling writer.commit() explicitly above else Assert.AreEqual(14, commits.Count); System.Collections.IEnumerator it = commits.GetEnumerator(); // Make sure we can open a reader on each commit: while (it.MoveNext()) { IndexCommit commit = (IndexCommit) it.Current; IndexReader r = IndexReader.Open(commit, null); r.Close(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); while (gen > 0) { IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Close(); } }