public virtual void TestForceFlush() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(10); LogDocMergePolicy mp = new LogDocMergePolicy(writer); mp.SetMinMergeDocs(100); writer.SetMergePolicy(mp); for (int i = 0; i < 100; i++) { AddDoc(writer); writer.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergePolicy(mp); mp.SetMinMergeDocs(100); writer.SetMergeFactor(10); CheckInvariants(writer); } writer.Close(); }
public virtual void TestNoOverMerge() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(10); writer.SetMergePolicy(new LogDocMergePolicy(writer)); bool noOverMerge = false; for (int i = 0; i < 100; i++) { AddDoc(writer); CheckInvariants(writer); if (writer.GetNumBufferedDocuments() + writer.GetSegmentCount() >= 18) { noOverMerge = true; } } Assert.IsTrue(noOverMerge); writer.Close(); }
private IndexWriter NewWriter(Directory dir, bool create) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMergePolicy(new LogDocMergePolicy(writer)); return(writer); }
private IndexWriter NewWriter(Directory dir, bool create) { IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), create); writer.SetMergePolicy(new LogDocMergePolicy(writer)); return(writer); }
public virtual void TestMergeFactorChange() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(100); writer.SetMergePolicy(new LogDocMergePolicy(writer)); for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.SetMergeFactor(5); // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } CheckInvariants(writer); writer.Close(); }
public static void ApplyTo(IndexWriter writer) { writer.MergeFactor = MergeFactor; writer.MaxMergeDocs = MaxMergeDocs; var mergePolicy = new LogByteSizeMergePolicy(writer) { MaxMergeDocs = MaxMergeDocs, MergeFactor = MergeFactor, MinMergeMB = MinMergeMB, MaxMergeMB = MaxMergeMB }; writer.SetMergePolicy(mergePolicy); }
public virtual void TestMaxBufferedDocsChange() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMaxBufferedDocs(101); writer.MergeFactor = 101; writer.SetMergePolicy(new LogDocMergePolicy(writer)); // leftmost* segment has 1 doc // rightmost* segment has 100 docs for (int i = 1; i <= 100; i++) { for (int j = 0; j < i; j++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMaxBufferedDocs(101); writer.MergeFactor = 101; writer.SetMergePolicy(new LogDocMergePolicy(writer)); } writer.SetMaxBufferedDocs(10); writer.MergeFactor = 10; // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds for (int i = 0; i < 100; i++) { AddDoc(writer); } CheckInvariants(writer); for (int i = 100; i < 1000; i++) { AddDoc(writer); } writer.Commit(null); ((ConcurrentMergeScheduler)writer.MergeScheduler).Sync(); writer.Commit(null); CheckInvariants(writer); writer.Close(); }
public virtual void TestDeleteMerging() { RAMDirectory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); LogDocMergePolicy mp = new LogDocMergePolicy(writer); writer.SetMergePolicy(mp); // Force degenerate merging so we can get a mix of // merging of segments with and without deletes at the // start: mp.MinMergeDocs = 1000; Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); for (int i = 0; i < 10; i++) { for (int j = 0; j < 100; j++) { idField.SetValue(System.Convert.ToString(i * 100 + j)); writer.AddDocument(doc); } int delID = i; while (delID < 100 * (1 + i)) { writer.DeleteDocuments(new Term("id", "" + delID)); delID += 10; } writer.Commit(); } writer.Close(); IndexReader reader = IndexReader.Open(directory, true); // Verify that we did not lose any deletes... Assert.AreEqual(450, reader.NumDocs()); reader.Close(); directory.Close(); }
public virtual void TestNormalCase() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 10; writer.SetMergePolicy(new LogDocMergePolicy(writer)); for (int i = 0; i < 100; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); }
public virtual void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergePolicy(new LogByteSizeMergePolicy(writer)); writer.SetMaxBufferedDocs(5); writer.SetUseCompoundFile(false); writer.SetMergeFactor(100); Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 60; i++) { writer.AddDocument(doc); } writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) { writer.AddDocument(doc2); } writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer); lmp.SetMinMergeMB(0.0001); writer.SetMergePolicy(lmp); writer.SetMergeFactor(4); writer.SetUseCompoundFile(false); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[] { dir }); writer.Close(); dir.Close(); dir2.Close(); }
public virtual void TestNormalCase() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 10; writer.SetMergePolicy(new LogDocMergePolicy(writer)); for (int i = 0; i < 100; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); }
/// <summary> Delete a document by term and return the doc id /// /// </summary> /// <returns> /// /// public static int deleteDocument(Term term, IndexWriter writer) throws /// IOException { IndexReader reader = writer.getReader(); TermDocs td = /// reader.termDocs(term); int doc = -1; //if (td.next()) { // doc = td.doc(); /// //} //writer.deleteDocuments(term); td.close(); return doc; } /// </returns> public static void CreateIndex(Directory dir1, System.String indexName, bool multiSegment) { IndexWriter w = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); w.SetMergePolicy(new LogDocMergePolicy(w)); for (int i = 0; i < 100; i++) { w.AddDocument(CreateDocument(i, indexName, 4)); if (multiSegment && (i % 10) == 0) { } } if (!multiSegment) { w.Optimize(); } w.Close(); }
public virtual void TestMergeDocCount0() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 100; for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); IndexReader reader = IndexReader.Open(dir, false, null); reader.DeleteDocuments(new Term("content", "aaa"), null); reader.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 5; // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } writer.Commit(null); ((ConcurrentMergeScheduler)writer.MergeScheduler).Sync(); writer.Commit(null); CheckInvariants(writer); Assert.AreEqual(10, writer.MaxDoc()); writer.Close(); }
public virtual void TestNoOverMerge() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 10; writer.SetMergePolicy(new LogDocMergePolicy(writer)); bool noOverMerge = false; for (int i = 0; i < 100; i++) { AddDoc(writer); CheckInvariants(writer); if (writer.GetNumBufferedDocuments() + writer.GetSegmentCount() >= 18) { noOverMerge = true; } } Assert.IsTrue(noOverMerge); writer.Close(); }
public virtual void TestMergeDocCount0() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(100); for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); IndexReader reader = IndexReader.Open(dir); reader.DeleteDocuments(new Term("content", "aaa")); reader.Close(); writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(5); // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } CheckInvariants(writer); Assert.AreEqual(10, writer.DocCount()); writer.Close(); }
private IndexWriter NewWriter(Directory dir, bool create) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMergePolicy(new LogDocMergePolicy(writer)); return writer; }
public virtual void TestMergeDocCount0() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 100; for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); IndexReader reader = IndexReader.Open(dir, false); reader.DeleteDocuments(new Term("content", "aaa")); reader.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 5; // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } writer.Commit(); ((ConcurrentMergeScheduler)writer.MergeScheduler).Sync(); writer.Commit(); CheckInvariants(writer); Assert.AreEqual(10, writer.MaxDoc()); writer.Close(); }
public void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true); writer.SetMergePolicy(new LogByteSizeMergePolicy()); writer.SetMaxBufferedDocs(5); writer.SetUseCompoundFile(false); writer.SetMergeFactor(100); Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 60; i++) writer.AddDocument(doc); writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) writer.AddDocument(doc2); writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, false, new WhitespaceAnalyzer(), true); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.SetMinMergeMB(0.0001); writer.SetMergePolicy(lmp); writer.SetMergeFactor(4); writer.SetUseCompoundFile(false); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[] { dir }); writer.Close(); dir.Close(); dir2.Close(); }
public virtual void TestMergeFactorChange() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 100; writer.SetMergePolicy(new LogDocMergePolicy(writer)); for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.MergeFactor = 5; // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } CheckInvariants(writer); writer.Close(); }
public virtual void TestMaxBufferedDocsChange() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMaxBufferedDocs(101); writer.MergeFactor = 101; writer.SetMergePolicy(new LogDocMergePolicy(writer)); // leftmost* segment has 1 doc // rightmost* segment has 100 docs for (int i = 1; i <= 100; i++) { for (int j = 0; j < i; j++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetMaxBufferedDocs(101); writer.MergeFactor = 101; writer.SetMergePolicy(new LogDocMergePolicy(writer)); } writer.SetMaxBufferedDocs(10); writer.MergeFactor = 10; // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds for (int i = 0; i < 100; i++) { AddDoc(writer); } CheckInvariants(writer); for (int i = 100; i < 1000; i++) { AddDoc(writer); } writer.Commit(); ((ConcurrentMergeScheduler) writer.MergeScheduler).Sync(); writer.Commit(); CheckInvariants(writer); writer.Close(); }
public virtual void TestTermVectorCorruption() { Directory dir = new MockRAMDirectory(); for (int iter = 0; iter < 4; iter++) { bool autoCommit = 1 == iter / 2; IndexWriter writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy(writer)); Document document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); writer.AddDocument(document); writer.AddDocument(document); document = new Document(); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); writer.AddDocument(document); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir); for (int i = 0; i < reader.NumDocs(); i++) { reader.Document(i); reader.GetTermFreqVectors(i); } reader.Close(); writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy(writer)); Directory[] indexDirs = new Directory[]{new MockRAMDirectory(dir)}; writer.AddIndexes(indexDirs); writer.Close(); } dir.Close(); }
private static void CreateIndex(Directory dir, bool multiSegment) { IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer()); w.SetMergePolicy(new LogDocMergePolicy()); for (int i = 0; i < 100; i++) { w.AddDocument(CreateDocument(i, 4)); if (multiSegment && (i % 10) == 0) { w.Flush(); } } if (!multiSegment) { w.Optimize(); } w.Close(); IndexReader r = IndexReader.Open(dir); if (multiSegment) { Assert.IsTrue(r is MultiSegmentReader); } else { Assert.IsTrue(r is SegmentReader); } r.Close(); }
public virtual void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergePolicy(new LogByteSizeMergePolicy(writer)); writer.SetMaxBufferedDocs(5); writer.UseCompoundFile = false; writer.MergeFactor = 100; Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 60; i++) writer.AddDocument(doc); writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) writer.AddDocument(doc2); writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer); lmp.MinMergeMB = 0.0001; writer.SetMergePolicy(lmp); writer.MergeFactor = 4; writer.UseCompoundFile = false; writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[]{dir}); writer.Close(); dir.Close(); dir2.Close(); }
public virtual void TestDeleteMerging() { RAMDirectory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, true, ANALYZER, true); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); LogDocMergePolicy mp = new LogDocMergePolicy(writer); writer.SetMergePolicy(mp); // Force degenerate merging so we can get a mix of // merging of segments with and without deletes at the // start: mp.SetMinMergeDocs(1000); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); for (int i = 0; i < 10; i++) { for (int j = 0; j < 100; j++) { idField.SetValue(System.Convert.ToString(i * 100 + j)); writer.AddDocument(doc); } int delID = i; while (delID < 100 * (1 + i)) { writer.DeleteDocuments(new Term("id", "" + delID)); delID += 10; } writer.Flush(); } writer.Close(); IndexReader reader = IndexReader.Open(directory); // Verify that we did not lose any deletes... Assert.AreEqual(450, reader.NumDocs()); reader.Close(); directory.Close(); }
public virtual void TestOptimizeMaxNumSegments2() { MockRAMDirectory dir = new MockRAMDirectory(); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.SetMinMergeDocs(1); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(4); writer.SetMaxBufferedDocs(2); for (int iter = 0; iter < 10; iter++) { for (int i = 0; i < 19; i++) writer.AddDocument(doc); writer.Flush(); SegmentInfos sis = new SegmentInfos(); ((ConcurrentMergeScheduler) writer.GetMergeScheduler()).Sync(); sis.Read(dir); int segCount = sis.Count; writer.Optimize(7); sis = new SegmentInfos(); ((ConcurrentMergeScheduler) writer.GetMergeScheduler()).Sync(); sis.Read(dir); int optSegCount = sis.Count; if (segCount < 7) Assert.AreEqual(segCount, optSegCount); else Assert.AreEqual(7, optSegCount); } }
public virtual void TestOptimizeMaxNumSegments() { MockRAMDirectory dir = new MockRAMDirectory(); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED)); for (int numDocs = 38; numDocs < 500; numDocs += 38) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.SetMinMergeDocs(1); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(5); writer.SetMaxBufferedDocs(2); for (int j = 0; j < numDocs; j++) writer.AddDocument(doc); writer.Close(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Count; writer = new IndexWriter(dir, new WhitespaceAnalyzer()); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(5); writer.Optimize(3); writer.Close(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Count; if (segCount < 3) Assert.AreEqual(segCount, optSegCount); else Assert.AreEqual(3, optSegCount); } }
public virtual void TestTermVectorCorruption3() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); Document document = new Document(); document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); for (int i = 0; i < 10; i++) writer.AddDocument(document); writer.Close(); writer = new IndexWriter(dir, false, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); for (int i = 0; i < 6; i++) writer.AddDocument(document); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir); for (int i = 0; i < 10; i++) { reader.GetTermFreqVectors(i); reader.Document(i); } reader.Close(); dir.Close(); }
private IndexWriter NewWriter(Directory dir, bool create) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), create); writer.SetMergePolicy(new LogDocMergePolicy()); return writer; }
public static void CreateIndex(Directory dir, bool multiSegment) { IndexWriter.Unlock(dir); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); w.SetMergePolicy(new LogDocMergePolicy(w)); for (int i = 0; i < 100; i++) { w.AddDocument(CreateDocument(i, 4)); if (multiSegment && (i % 10) == 0) { w.Flush(); } } if (!multiSegment) { w.Optimize(); } w.Close(); IndexReader r = IndexReader.Open(dir); if (multiSegment) { Assert.IsTrue(r.GetSequentialSubReaders().Length > 1); } else { Assert.IsTrue(r.GetSequentialSubReaders().Length == 1); } r.Close(); }
public virtual void TestTermVectorCorruption2() { Directory dir = new MockRAMDirectory(); for (int iter = 0; iter < 4; iter++) { bool autoCommit = 1 == iter / 2; IndexWriter writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); Document document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); writer.AddDocument(document); writer.AddDocument(document); document = new Document(); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); writer.AddDocument(document); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(reader.GetTermFreqVectors(0) == null); Assert.IsTrue(reader.GetTermFreqVectors(1) == null); Assert.IsTrue(reader.GetTermFreqVectors(2) != null); reader.Close(); } dir.Close(); }