private void SetUpDirs(Directory dir, Directory aux) { IndexWriter writer = null; writer = NewWriter(dir, true); writer.SetMaxBufferedDocs(1000); // add 1000 documents in 1 segment AddDocs(writer, 1000); Assert.AreEqual(1000, writer.DocCount()); Assert.AreEqual(1, writer.GetSegmentCount()); writer.Close(); writer = NewWriter(aux, true); writer.SetUseCompoundFile(false); // use one without a compound file writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(10); // add 30 documents in 3 segments for (int i = 0; i < 3; i++) { AddDocs(writer, 10); writer.Close(); writer = NewWriter(aux, false); writer.SetUseCompoundFile(false); // use one without a compound file writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(10); } Assert.AreEqual(30, writer.DocCount()); Assert.AreEqual(3, writer.GetSegmentCount()); writer.Close(); }
public virtual void TestMergeFactorChange() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(100); writer.SetMergePolicy(new LogDocMergePolicy(writer)); for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.SetMergeFactor(5); // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } CheckInvariants(writer); writer.Close(); }
public virtual void TestNoWaitClose() { RAMDirectory directory = new MockRAMDirectory(); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); for (int pass = 0; pass < 2; pass++) { bool autoCommit = pass == 0; IndexWriter writer = new IndexWriter(directory, autoCommit, ANALYZER, true); for (int iter = 0; iter < 10; iter++) { ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(100); for (int j = 0; j < 201; j++) { idField.SetValue(System.Convert.ToString(iter * 201 + j)); writer.AddDocument(doc); } int delID = iter * 201; for (int j = 0; j < 20; j++) { writer.DeleteDocuments(new Term("id", System.Convert.ToString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we // stress out aborting them on close: writer.SetMergeFactor(3); writer.AddDocument(doc); writer.Flush(); writer.Close(false); IndexReader reader = IndexReader.Open(directory); Assert.AreEqual((1 + iter) * 182, reader.NumDocs()); reader.Close(); // Reopen writer = new IndexWriter(directory, autoCommit, ANALYZER, false); } writer.Close(); } directory.Close(); }
public virtual void TestMoreMerges() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); Directory aux2 = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(aux2, true); writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(10); writer.AddIndexesNoOptimize(new Directory[] { aux }); Assert.AreEqual(30, writer.DocCount()); Assert.AreEqual(3, writer.GetSegmentCount()); writer.Close(); IndexReader reader = IndexReader.Open(aux); for (int i = 0; i < 27; i++) { reader.DeleteDocument(i); } Assert.AreEqual(3, reader.NumDocs()); reader.Close(); reader = IndexReader.Open(aux2); for (int i = 0; i < 8; i++) { reader.DeleteDocument(i); } Assert.AreEqual(22, reader.NumDocs()); reader.Close(); writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(6); writer.SetMergeFactor(4); writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 }); Assert.AreEqual(1025, writer.DocCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1025); }
public void ApplyToWriter(IndexWriter writer) { try { if (MergeFactor != null) { writer.SetMergeFactor((int) MergeFactor); } if (MaxMergeDocs != null) { writer.SetMaxMergeDocs((int) MaxMergeDocs); } if (MaxBufferedDocs != null) { writer.SetMaxBufferedDocs((int) MaxBufferedDocs); } if (RamBufferSizeMb != null) { writer.SetRAMBufferSizeMB((int) RamBufferSizeMb); } if (TermIndexInterval != null) { writer.SetTermIndexInterval((int) TermIndexInterval); } } catch (ArgumentOutOfRangeException) { // TODO: Log it } }
public virtual void TestMergeAfterCopy() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexReader reader = IndexReader.Open(aux); for (int i = 0; i < 20; i++) { reader.DeleteDocument(i); } Assert.AreEqual(10, reader.NumDocs()); reader.Close(); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(4); writer.SetMergeFactor(4); writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); Assert.AreEqual(1020, writer.DocCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1020); }
public virtual void TestSorting() { Directory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(1000); writer.AddDocument(Adoc(new System.String[]{"id", "a", "title", "ipod", "str_s", "a"})); writer.AddDocument(Adoc(new System.String[]{"id", "b", "title", "ipod ipod", "str_s", "b"})); writer.AddDocument(Adoc(new System.String[]{"id", "c", "title", "ipod ipod ipod", "str_s", "c"})); writer.AddDocument(Adoc(new System.String[]{"id", "x", "title", "boosted", "str_s", "x"})); writer.AddDocument(Adoc(new System.String[]{"id", "y", "title", "boosted boosted", "str_s", "y"})); writer.AddDocument(Adoc(new System.String[]{"id", "z", "title", "boosted boosted boosted", "str_s", "z"})); IndexReader r = writer.GetReader(); writer.Close(); IndexSearcher searcher = new IndexSearcher(r); RunTest(searcher, true); RunTest(searcher, false); searcher.Close(); r.Close(); directory.Close(); }
public virtual void TestLucene() { int num = 100; Directory indexA = new MockRAMDirectory(); Directory indexB = new MockRAMDirectory(); FillIndex(indexA, 0, num); bool fail = VerifyIndex(indexA, 0); if (fail) { Assert.Fail("Index a is invalid"); } FillIndex(indexB, num, num); fail = VerifyIndex(indexB, num); if (fail) { Assert.Fail("Index b is invalid"); } Directory merged = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(merged, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.AddIndexes(new Directory[]{indexA, indexB}); writer.Close(); fail = VerifyIndex(merged, 0); merged.Close(); Assert.IsFalse(fail, "The merged index is invalid"); }
public virtual void TestNoPrxFile() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.SetMergeFactor(2); writer.SetUseCompoundFile(false); Document d = new Document(); Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); f1.SetOmitTermFreqAndPositions(true); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } writer.Commit(); AssertNoPrx(ram); // force merge writer.Optimize(); // flush writer.Close(); AssertNoPrx(ram); _TestUtil.CheckIndex(ram); ram.Close(); }
public virtual void TestForceFlush() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(10); LogDocMergePolicy mp = new LogDocMergePolicy(writer); mp.SetMinMergeDocs(100); writer.SetMergePolicy(mp); for (int i = 0; i < 100; i++) { AddDoc(writer); writer.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergePolicy(mp); mp.SetMinMergeDocs(100); writer.SetMergeFactor(10); CheckInvariants(writer); } writer.Close(); }
public virtual void TestNoOverMerge() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(10); writer.SetMergePolicy(new LogDocMergePolicy(writer)); bool noOverMerge = false; for (int i = 0; i < 100; i++) { AddDoc(writer); CheckInvariants(writer); if (writer.GetNumBufferedDocuments() + writer.GetSegmentCount() >= 18) { noOverMerge = true; } } Assert.IsTrue(noOverMerge); writer.Close(); }
public virtual void TestMaxBufferedDocsChange() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(101); writer.SetMergeFactor(101); writer.SetMergePolicy(new LogDocMergePolicy(writer)); // leftmost* segment has 1 doc // rightmost* segment has 100 docs for (int i = 1; i <= 100; i++) { for (int j = 0; j < i; j++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false); writer.SetMaxBufferedDocs(101); writer.SetMergeFactor(101); writer.SetMergePolicy(new LogDocMergePolicy(writer)); } writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(10); // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds for (int i = 0; i < 100; i++) { AddDoc(writer); } CheckInvariants(writer); for (int i = 100; i < 1000; i++) { AddDoc(writer); } CheckInvariants(writer); writer.Close(); }
public virtual void TestMixedMerge() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.SetMergeFactor(2); Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); f2.SetOmitTermFreqAndPositions(true); d.Add(f2); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese f1.SetOmitTermFreqAndPositions(true); d.Add(f1); f2.SetOmitTermFreqAndPositions(false); d.Add(f2); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); }
public override void DoWork() { IndexWriter writer1 = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer1.SetMaxBufferedDocs(3); writer1.SetMergeFactor(2); ((ConcurrentMergeScheduler)writer1.GetMergeScheduler()).SetSuppressExceptions(); IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); // Intentionally use different params so flush/merge // happen @ different times writer2.SetMaxBufferedDocs(2); writer2.SetMergeFactor(3); ((ConcurrentMergeScheduler)writer2.GetMergeScheduler()).SetSuppressExceptions(); Update(writer1); Update(writer2); TestTransactions.doFail = true; try { lock (lock_Renamed) { try { writer1.PrepareCommit(); } catch (System.Exception t) { writer1.Rollback(); writer2.Rollback(); return; } try { writer2.PrepareCommit(); } catch (System.Exception t) { writer1.Rollback(); writer2.Rollback(); return; } writer1.Commit(); writer2.Commit(); } } finally { TestTransactions.doFail = false; } writer1.Close(); writer2.Close(); }
public virtual void TestDuringAddDelete() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); writer.SetMergeFactor(2); // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); IndexReader r = writer.GetReader(); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread1(endTime, writer, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int sum = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { IndexReader r2 = r.Reopen(); if (r2 != r) { r.Close(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); sum += new IndexSearcher(r).Search(q, 10).totalHits; } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.IsTrue(sum > 0); Assert.AreEqual(0, excs.Count); writer.Close(); _TestUtil.CheckIndex(dir1); r.Close(); dir1.Close(); }
private void CreateIndex(Directory dir) { IndexWriter iw = new IndexWriter(dir, anlzr, true, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.SetSimilarity(similarityOne); iw.SetUseCompoundFile(true); iw.Close(); }
public static IndexWriter GetAzureIndexWriter(this LuceneIndexer indexer) { indexer.EnsureIndex(false); var writer = new IndexWriter(indexer.GetLuceneDirectory(), indexer.IndexingAnalyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetRAMBufferSizeMB(10.0); writer.SetUseCompoundFile(false); writer.SetMaxMergeDocs(10000); writer.SetMergeFactor(100); return writer; }
/// <summary> Determines how often segment indices are merged by addDocument(). With /// smaller values, less RAM is used while indexing, and searches on /// unoptimized indices are faster, but indexing speed is slower. With larger /// values, more RAM is used during indexing, and while searches on unoptimized /// indices are slower, indexing is faster. Thus larger values (> 10) are best /// for batch index creation, and smaller values (< 10) for indices that are /// interactively maintained. /// <p>This must never be less than 2. The default value is 10. /// /// </summary> /// <seealso cref="IndexWriter#SetMergeFactor(int)"> /// </seealso> /// <throws> IllegalStateException if the index is closed </throws> public virtual void SetMergeFactor(int mergeFactor) { lock (directory) { AssureOpen(); if (indexWriter != null) { indexWriter.SetMergeFactor(mergeFactor); } this.mergeFactor = mergeFactor; } }
private void AddDocs(Directory dir, int ndocs, bool compound) { IndexWriter iw = new IndexWriter(dir, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.SetSimilarity(similarityOne); iw.SetUseCompoundFile(compound); for (int i = 0; i < ndocs; i++) { iw.AddDocument(NewDoc()); } iw.Close(); }
private void FillIndex(Directory dir, int start, int numDocs) { IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(2); for (int i = start; i < (start + numDocs); i++) { Document temp = new Document(); temp.Add(new Field("count", ("" + i), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(temp); } writer.Close(); }
public virtual void TestNormalCase() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(10); writer.SetMergePolicy(new LogDocMergePolicy(writer)); for (int i = 0; i < 100; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); }
/// <summary> Close the IndexReader and open an IndexWriter.</summary> /// <throws> IOException </throws> protected internal virtual void CreateIndexWriter() { if (indexWriter == null) { if (indexReader != null) { indexReader.Close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false); indexWriter.SetInfoStream(infoStream); indexWriter.SetUseCompoundFile(useCompoundFile); indexWriter.SetMaxBufferedDocs(maxBufferedDocs); indexWriter.SetMaxFieldLength(maxFieldLength); indexWriter.SetMergeFactor(mergeFactor); } }
public virtual void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergePolicy(new LogByteSizeMergePolicy(writer)); writer.SetMaxBufferedDocs(5); writer.SetUseCompoundFile(false); writer.SetMergeFactor(100); Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 60; i++) { writer.AddDocument(doc); } writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) { writer.AddDocument(doc2); } writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer); lmp.SetMinMergeMB(0.0001); writer.SetMergePolicy(lmp); writer.SetMergeFactor(4); writer.SetUseCompoundFile(false); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[] { dir }); writer.Close(); dir.Close(); dir2.Close(); }
public virtual void TestIndexing() { Directory mainDir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(mainDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); IndexReader reader = writer.GetReader(); // start pooling readers reader.Close(); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(10); RunThread[] indexThreads = new RunThread[4]; for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x] = new RunThread(this, x % 2, writer); indexThreads[x].Name = "Thread " + x; indexThreads[x].Start(); } long startTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); long duration = 5 * 1000; while (((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - startTime) < duration) { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 100)); } int delCount = 0; int addCount = 0; for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x].run_Renamed_Field = false; Assert.IsTrue(indexThreads[x].ex == null); addCount += indexThreads[x].addCount; delCount += indexThreads[x].delCount; } for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x].Join(); } //System.out.println("addCount:"+addCount); //System.out.println("delCount:"+delCount); writer.Close(); mainDir.Close(); }
public virtual void TestNoMergeAfterCopy() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(4); writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); Assert.AreEqual(1060, writer.DocCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1060); }
/// <summary> Close the IndexReader and open an IndexWriter.</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<code>write.lock</code> could not /// be obtained) /// </summary> /// <throws> IOException if there is a low-level IO error </throws> protected internal virtual void CreateIndexWriter() { if (indexWriter == null) { if (indexReader != null) { indexReader.Close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(maxFieldLength)); // IndexModifier cannot use ConcurrentMergeScheduler // because it synchronizes on the directory which can // cause deadlock indexWriter.SetMergeScheduler(new SerialMergeScheduler()); indexWriter.SetInfoStream(infoStream); indexWriter.SetUseCompoundFile(useCompoundFile); if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH) { indexWriter.SetMaxBufferedDocs(maxBufferedDocs); } indexWriter.SetMergeFactor(mergeFactor); } }
internal virtual void AddDocs(Directory dir, int ndocs, System.String field, System.String val, int maxTF, float percentDocs) { System.Random random = NewRandom(); RepeatingTokenStream ts = new RepeatingTokenStream(val); Analyzer analyzer = new AnonymousClassAnalyzer(random, percentDocs, ts, maxTF, this); Document doc = new Document(); doc.Add(new Field(field, val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(100); for (int i = 0; i < ndocs; i++) { writer.AddDocument(doc); } writer.Optimize(); writer.Close(); }
public virtual void TestMergeWarmer() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); // create the index CreateIndexNoClose(false, "test", writer); // get a reader to put writer into near real-time mode IndexReader r1 = writer.GetReader(); // Enroll warmer MyWarmer warmer = new MyWarmer(); writer.SetMergedSegmentWarmer(warmer); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(2); for (int i = 0; i < 10; i++) { writer.AddDocument(CreateDocument(i, "test", 4)); } ((ConcurrentMergeScheduler)writer.GetMergeScheduler()).Sync(); Assert.IsTrue(warmer.warmCount > 0); int count = warmer.warmCount; writer.AddDocument(CreateDocument(17, "test", 4)); writer.Optimize(); Assert.IsTrue(warmer.warmCount > count); writer.Close(); r1.Close(); dir1.Close(); }
public virtual void TestMergeDocCount0() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(100); for (int i = 0; i < 250; i++) { AddDoc(writer); CheckInvariants(writer); } writer.Close(); IndexReader reader = IndexReader.Open(dir); reader.DeleteDocuments(new Term("content", "aaa")); reader.Close(); writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false); writer.SetMergePolicy(new LogDocMergePolicy(writer)); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(5); // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { AddDoc(writer); } CheckInvariants(writer); Assert.AreEqual(10, writer.DocCount()); writer.Close(); }
public virtual void TestLucene() { int num = 100; Directory indexA = new MockRAMDirectory(); Directory indexB = new MockRAMDirectory(); FillIndex(indexA, 0, num); bool fail = VerifyIndex(indexA, 0); if (fail) { Assert.Fail("Index a is invalid"); } FillIndex(indexB, num, num); fail = VerifyIndex(indexB, num); if (fail) { Assert.Fail("Index b is invalid"); } Directory merged = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(merged, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.AddIndexes(new Directory[] { indexA, indexB }); writer.Close(); fail = VerifyIndex(merged, 0); merged.Close(); Assert.IsFalse(fail, "The merged index is invalid"); }
public virtual void TestOptimizeMaxNumSegments2() { MockRAMDirectory dir = new MockRAMDirectory(); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.SetMinMergeDocs(1); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(4); writer.SetMaxBufferedDocs(2); for (int iter = 0; iter < 10; iter++) { for (int i = 0; i < 19; i++) writer.AddDocument(doc); writer.Flush(); SegmentInfos sis = new SegmentInfos(); ((ConcurrentMergeScheduler) writer.GetMergeScheduler()).Sync(); sis.Read(dir); int segCount = sis.Count; writer.Optimize(7); sis = new SegmentInfos(); ((ConcurrentMergeScheduler) writer.GetMergeScheduler()).Sync(); sis.Read(dir); int optSegCount = sis.Count; if (segCount < 7) Assert.AreEqual(segCount, optSegCount); else Assert.AreEqual(7, optSegCount); } }
public virtual void TestDuringAddIndexes_LuceneNet() { MockRAMDirectory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); writer.SetMergeFactor(2); // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); Directory[] dirs = new Directory[10]; for (int i = 0; i < 10; i++) { dirs[i] = new MockRAMDirectory(dir1); } IndexReader r = writer.GetReader(); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int lastCount = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { using (IndexReader r2 = writer.GetReader()) { Query q = new TermQuery(new Term("indexname", "test")); int count = new IndexSearcher(r2).Search(q, 10).TotalHits; Assert.IsTrue(count >= lastCount); lastCount = count; } } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.AreEqual(0, excs.Count); r.Close(); Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count); writer.Close(); _TestUtil.CheckIndex(dir1); dir1.Close(); }
public virtual void TestNorms() { // tmp dir System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } // test with a single index: index1 System.IO.FileInfo indexDir1 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex1")); Directory dir1 = FSDirectory.Open(indexDir1); IndexWriter.Unlock(dir1); norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); CreateIndex(dir1); DoTestNorms(dir1); // test with a single index: index2 System.Collections.ArrayList norms1 = norms; System.Collections.ArrayList modifiedNorms1 = modifiedNorms; int numDocNorms1 = numDocNorms; norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); numDocNorms = 0; System.IO.FileInfo indexDir2 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex2")); Directory dir2 = FSDirectory.Open(indexDir2); CreateIndex(dir2); DoTestNorms(dir2); // add index1 and index2 to a third index: index3 System.IO.FileInfo indexDir3 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex3")); Directory dir3 = FSDirectory.Open(indexDir3); CreateIndex(dir3); IndexWriter iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.AddIndexes(new Directory[] { dir1, dir2 }); iw.Close(); norms1.AddRange(norms); norms = norms1; modifiedNorms1.AddRange(modifiedNorms); modifiedNorms = modifiedNorms1; numDocNorms += numDocNorms1; // test with index3 VerifyIndex(dir3); DoTestNorms(dir3); // now with optimize iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.Optimize(); iw.Close(); VerifyIndex(dir3); dir1.Close(); dir2.Close(); dir3.Close(); }
public static bool Index(Analyzer analyzer, FileIndexSet fileIndex,IndexerSet indexer, bool create) { try { IndexWriter writer = new IndexWriter(fileIndex.Path, analyzer, create); writer.SetMaxFieldLength(indexer.MaxFieldLength); writer.SetRAMBufferSizeMB(indexer.RamBufferSize); writer.SetMergeFactor(indexer.MergeFactor); writer.SetMaxBufferedDocs(indexer.MaxBufferedDocs); foreach (string dir in fileIndex.BaseDirs) { IndexDir(writer, dir); } return true; } catch (Exception ) { return false; } }
public virtual void TestSetMaxMergeDocs() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); iw.SetMergeScheduler(new MyMergeScheduler(this)); iw.SetMaxMergeDocs(20); iw.SetMaxBufferedDocs(2); iw.SetMergeFactor(2); Document document = new Document(); document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); for (int i = 0; i < 177; i++) iw.AddDocument(document); iw.Close(); }
public virtual void TestVariableSchema() { MockRAMDirectory dir = new MockRAMDirectory(); int delID = 0; for (int i = 0; i < 20; i++) { IndexWriter writer = new IndexWriter(dir, false, new WhitespaceAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(2); writer.SetUseCompoundFile(false); Document doc = new Document(); System.String contents = "aa bb cc dd ee ff gg hh ii jj kk"; if (i == 7) { // Add empty docs here doc.Add(new Field("content3", "", Field.Store.NO, Field.Index.TOKENIZED)); } else { Field.Store storeVal; if (i % 2 == 0) { doc.Add(new Field("content4", contents, Field.Store.YES, Field.Index.TOKENIZED)); storeVal = Field.Store.YES; } else storeVal = Field.Store.NO; doc.Add(new Field("content1", contents, storeVal, Field.Index.TOKENIZED)); doc.Add(new Field("content3", "", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("content5", "", storeVal, Field.Index.TOKENIZED)); } for (int j = 0; j < 4; j++) writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); reader.DeleteDocument(delID++); reader.Close(); if (0 == i % 4) { writer = new IndexWriter(dir, false, new WhitespaceAnalyzer()); writer.SetUseCompoundFile(false); writer.Optimize(); writer.Close(); } } }
public virtual void TestCloseWithThreads() { int NUM_THREADS = 3; for (int iter = 0; iter < 50; iter++) { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(4); IndexerThread[] threads = new IndexerThread[NUM_THREADS]; //bool diskFull = false; for (int i = 0; i < NUM_THREADS; i++) threads[i] = new IndexerThread(this, writer, false); for (int i = 0; i < NUM_THREADS; i++) threads[i].Start(); try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 50)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } writer.Close(false); // Make sure threads that are adding docs are not hung: for (int i = 0; i < NUM_THREADS; i++) { while (true) { try { // Without fix for LUCENE-1130: one of the // threads will hang threads[i].Join(); break; } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } if (threads[i].IsAlive) Assert.Fail("thread seems to be hung"); } // Quick test to make sure index is not corrupt: IndexReader reader = IndexReader.Open(dir); TermDocs tdocs = reader.TermDocs(new Term("field", "aaa")); int count = 0; while (tdocs.Next()) { count++; } Assert.IsTrue(count > 0); reader.Close(); dir.Close(); } }
public virtual void TestImmediateDiskFullWithThreads() { int NUM_THREADS = 3; for (int iter = 0; iter < 10; iter++) { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); // We expect disk full exceptions in the merge threads cms.SetSuppressExceptions_ForNUnitTest(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(4); dir.SetMaxSizeInBytes(4 * 1024 + 20 * iter); IndexerThread[] threads = new IndexerThread[NUM_THREADS]; //bool diskFull = false; for (int i = 0; i < NUM_THREADS; i++) threads[i] = new IndexerThread(this, writer, true); for (int i = 0; i < NUM_THREADS; i++) threads[i].Start(); for (int i = 0; i < NUM_THREADS; i++) { while (true) { try { // Without fix for LUCENE-1130: one of the // threads will hang threads[i].Join(); break; } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } if (threads[i].IsAlive) Assert.Fail("thread seems to be hung"); else Assert.IsTrue(threads[i].error == null, "hit unexpected Throwable"); } try { writer.Close(false); } catch (System.IO.IOException) { } dir.Close(); } }
// Runs test, with multiple threads, using the specific // failure to trigger an IOException public virtual void _testMultipleThreadsFailure(MockRAMDirectory.Failure failure) { int NUM_THREADS = 3; for (int iter = 0; iter < 5; iter++) { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer()); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); // We expect disk full exceptions in the merge threads cms.SetSuppressExceptions_ForNUnitTest(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(4); IndexerThread[] threads = new IndexerThread[NUM_THREADS]; //bool diskFull = false; for (int i = 0; i < NUM_THREADS; i++) threads[i] = new IndexerThread(this, writer, true); for (int i = 0; i < NUM_THREADS; i++) threads[i].Start(); try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 10)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } dir.FailOn(failure); failure.SetDoFail(); for (int i = 0; i < NUM_THREADS; i++) { while (true) { try { threads[i].Join(); break; } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } if (threads[i].IsAlive) Assert.Fail("thread seems to be hung"); else Assert.IsTrue(threads[i].error == null, "hit unexpected Throwable"); } bool success = false; try { writer.Close(false); success = true; } catch (System.IO.IOException) { } if (success) { IndexReader reader = IndexReader.Open(dir); for (int j = 0; j < reader.MaxDoc(); j++) { if (!reader.IsDeleted(j)) { reader.Document(j); reader.GetTermFreqVectors(j); } } reader.Close(); } dir.Close(); } }
/// <summary> /// Creates the index for the bz2 file on a separate thread. /// </summary> private void CreateIndexAsync() { bool failed = false; try { // Close any searchers if (searcher != null) { searcher.Close(); searcher = null; } indexExists = false; // Create the index writer indexer = new IndexWriter(indexPath, textAnalyzer, true); memoryIndexer = new IndexWriter(new RAMDirectory(), textAnalyzer, true); memoryIndexer.SetMaxBufferedDocs(1000); memoryIndexer.SetMergeFactor(100); indexer.SetMaxBufferedDocs(1000); indexer.SetMergeFactor(100); // Locate the bzip2 blocks in the file LocateBlocks(); // Two times more than the first block but not less than 100 bytes long bufSize = ((ends[0] - beginnings[0]) / 8) * 2 + 100; // Buffers for the current and next block blockBuf = new byte[bufSize]; charBuf = new char[bufSize]; // Whether there was a Wiki topic carryover from current block to the next one char[] charCarryOver = new char[0]; // The length of the currently loaded data long loadedLength = 0; StringBuilder sb = new StringBuilder(); // Starting indexing ReportProgress(0, IndexingProgress.State.Running, "Indexing"); for (long i = 0; i < totalBlocks && !abortIndexing; i++) { ReportProgress((int)((double)(i * 100) / (double)totalBlocks), IndexingProgress.State.Running, String.Empty); #region Indexing logic loadedLength = LoadBlock(beginnings[i], ends[i], ref blockBuf); if (charBuf.Length < blockBuf.Length) { charBuf = new char[blockBuf.Length]; } int bytesUsed = 0; int charsUsed = 0; bool completed = false; // Convert the text to UTF8 utf8.Convert(blockBuf, 0, (int)loadedLength, charBuf, 0, charBuf.Length, i == totalBlocks - 1, out bytesUsed, out charsUsed, out completed); if (!completed) { throw new Exception("UTF8 decoder could not complete the conversion"); } // Construct a current string sb.Length = 0; if (charCarryOver.Length > 0) { sb.Append(charCarryOver); } sb.Append(charBuf, 0, charsUsed); int carryOverLength = charCarryOver.Length; int charsMatched = IndexString(sb.ToString(), beginnings[i], ends[i], carryOverLength, i == totalBlocks - 1); // There's a Wiki topic carryover, let's store the characters which need to be carried over if (charsMatched > 0) { charCarryOver = new char[charsMatched]; sb.CopyTo(charsUsed + carryOverLength - charsMatched, charCarryOver, 0, charsMatched); } else { charCarryOver = new char[0]; } #endregion } // Wait till all the threads finish while (activeThreads != 0) { ReportProgress(0, IndexingProgress.State.Running, "Waiting for tokenizer threads to finish"); Thread.Sleep(TimeSpan.FromSeconds(5)); } ReportProgress(0, IndexingProgress.State.Running, "Flushing documents to disk"); Lucene.Net.Store.Directory dir = memoryIndexer.GetDirectory(); memoryIndexer.Close(); indexer.AddIndexes(new Lucene.Net.Store.Directory[] { dir }); memoryIndexer = null; ReportProgress(0, IndexingProgress.State.Running, "Optimizing index"); indexer.Optimize(); indexExists = true; } catch (Exception ex) { ReportProgress(0, IndexingProgress.State.Failure, ex.ToString()); failed = true; } // Try to release some memory if (indexer != null) { indexer.Close(); indexer = null; } if (failed || abortIndexing) { Directory.Delete(indexPath, true); indexExists = false; } else { if (indexExists) { searcher = new IndexSearcher(indexPath); } } ReportProgress(0, IndexingProgress.State.Finished, String.Empty); }
/// <summary> Close the IndexReader and open an IndexWriter.</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<c>write.lock</c> could not /// be obtained) /// </summary> /// <throws> IOException if there is a low-level IO error </throws> protected internal virtual void CreateIndexWriter() { if (indexWriter == null) { if (indexReader != null) { indexReader.Close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(maxFieldLength)); // IndexModifier cannot use ConcurrentMergeScheduler // because it synchronizes on the directory which can // cause deadlock indexWriter.SetMergeScheduler(new SerialMergeScheduler()); indexWriter.SetInfoStream(infoStream); indexWriter.SetUseCompoundFile(useCompoundFile); if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH) indexWriter.SetMaxBufferedDocs(maxBufferedDocs); indexWriter.SetMergeFactor(mergeFactor); } }
public virtual void TestMaxThreadPriority() { int pri = (System.Int32) SupportClass.ThreadClass.Current().Priority; try { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); Document document = new Document(); document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); iw.SetMaxBufferedDocs(2); iw.SetMergeFactor(2); SupportClass.ThreadClass.Current().Priority = (System.Threading.ThreadPriority) System.Threading.ThreadPriority.Highest; for (int i = 0; i < 4; i++) iw.AddDocument(document); iw.Close(); } finally { SupportClass.ThreadClass.Current().Priority = (System.Threading.ThreadPriority) pri; } }
public virtual void TestBackgroundOptimize() { Directory dir = new MockRAMDirectory(); for (int pass = 0; pass < 2; pass++) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMergeScheduler(new ConcurrentMergeScheduler()); Document doc = new Document(); doc.Add(new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(101); for (int i = 0; i < 200; i++) writer.AddDocument(doc); writer.Optimize(false); if (0 == pass) { writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(reader.IsOptimized()); reader.Close(); } else { // Get another segment to flush so we can verify it is // NOT included in the optimization writer.AddDocument(doc); writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(!reader.IsOptimized()); reader.Close(); SegmentInfos infos = new SegmentInfos(); infos.Read(dir); Assert.AreEqual(2, infos.Count); } } dir.Close(); }
public virtual void TestNorms_Renamed() { // tmp dir System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } // test with a single index: index1 System.IO.FileInfo indexDir1 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex1")); Directory dir1 = FSDirectory.Open(indexDir1); norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); CreateIndex(dir1); DoTestNorms(dir1); // test with a single index: index2 System.Collections.ArrayList norms1 = norms; System.Collections.ArrayList modifiedNorms1 = modifiedNorms; int numDocNorms1 = numDocNorms; norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); numDocNorms = 0; System.IO.FileInfo indexDir2 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex2")); Directory dir2 = FSDirectory.Open(indexDir2); CreateIndex(dir2); DoTestNorms(dir2); // add index1 and index2 to a third index: index3 System.IO.FileInfo indexDir3 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex3")); Directory dir3 = FSDirectory.Open(indexDir3); CreateIndex(dir3); IndexWriter iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.AddIndexes(new Directory[]{dir1, dir2}); iw.Close(); norms1.AddRange(norms); norms = norms1; modifiedNorms1.AddRange(modifiedNorms); modifiedNorms = modifiedNorms1; numDocNorms += numDocNorms1; // test with index3 VerifyIndex(dir3); DoTestNorms(dir3); // now with optimize iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.Optimize(); iw.Close(); VerifyIndex(dir3); dir1.Close(); dir2.Close(); dir3.Close(); }
public static bool Index(Analyzer analyzer, FileIndexSet set, int maxFieldLength, double ramBufferSize, int mergeFactor, int maxBufferedDocs, bool create) { try { IndexWriter writer = new IndexWriter(set.Path, analyzer, create); writer.SetMaxFieldLength(maxFieldLength); writer.SetRAMBufferSizeMB(ramBufferSize); writer.SetMergeFactor(mergeFactor); writer.SetMaxBufferedDocs(maxBufferedDocs); foreach (string dir in set.BaseDirs) { IndexDir(writer, dir); } return true; } catch (Exception ) { return false; } }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.SetOmitTermFreqAndPositions(true); d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir); searcher.SetSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
/// <summary> /// Retrieve a read/write <see cref="IndexWriter" /> /// </summary> /// <param name="provider"></param> /// <param name="entity"></param> /// <param name="modificationOperation"></param> /// <returns></returns> public IndexWriter GetIndexWriter(IDirectoryProvider provider, System.Type entity, bool modificationOperation) { // Have to close the reader before the writer is accessed. IndexReader reader; readers.TryGetValue(provider, out reader); if (reader != null) { try { reader.Close(); } catch (IOException ex) { throw new SearchException("Exception while closing IndexReader", ex); } finally { readers.Remove(provider); // PH - Moved the exit lock out of the try otherwise it won't take place when we have an error closing the reader. // Exit Lock added by Kailuo Wang, because the lock needs to be obtained immediately afterwards object syncLock = searchFactoryImplementor.GetLockableDirectoryProviders()[provider]; Monitor.Exit(syncLock); } } if (writers.ContainsKey(provider)) { return writers[provider]; } LockProvider(provider); if (modificationOperation) dpStatistics[provider].Operations++; try { Analyzer analyzer = entity != null ? searchFactoryImplementor.DocumentBuilders[entity].Analyzer : new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(provider.Directory, analyzer, false); LuceneIndexingParameters indexingParams = searchFactoryImplementor.GetIndexingParameters(provider); if (IsBatch) { writer.SetMergeFactor(indexingParams.BatchMergeFactor); writer.SetMaxMergeDocs(indexingParams.BatchMaxMergeDocs); writer.SetMaxBufferedDocs(indexingParams.BatchMaxBufferedDocs); } else { writer.SetMergeFactor(indexingParams.TransactionMergeFactor); writer.SetMaxMergeDocs(indexingParams.TransactionMaxMergeDocs); writer.SetMaxBufferedDocs(indexingParams.TransactionMaxBufferedDocs); } writers.Add(provider, writer); return writer; } catch (IOException ex) { CleanUp(new SearchException("Unable to open IndexWriter" + (entity != null ? " for " + entity : ""), ex)); } return null; }
public virtual void runTest(Directory directory, bool autoCommit, MergeScheduler merger) { IndexWriter writer = new IndexWriter(directory, autoCommit, ANALYZER, true); writer.SetMaxBufferedDocs(2); if (merger != null) { writer.SetMergeScheduler(merger); } for (int iter = 0; iter < NUM_ITER; iter++) { int iterFinal = iter; writer.SetMergeFactor(1000); for (int i = 0; i < 200; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.SetMergeFactor(4); //writer.setInfoStream(System.out); int docCount = writer.DocCount(); SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { int iFinal = i; IndexWriter writerFinal = writer; threads[i] = new AnonymousClassThread(writerFinal, iFinal, iterFinal, this); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); } Assert.IsTrue(!failed); int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS))); // System.out.println("TEST: now index=" + writer.segString()); Assert.AreEqual(expectedDocCount, writer.DocCount()); if (!autoCommit) { writer.Close(); writer = new IndexWriter(directory, autoCommit, ANALYZER, false); writer.SetMaxBufferedDocs(2); } IndexReader reader = IndexReader.Open(directory); Assert.IsTrue(reader.IsOptimized()); Assert.AreEqual(expectedDocCount, reader.NumDocs()); reader.Close(); } writer.Close(); }
public virtual void TestOptimizeOverMerge() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(100); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); Document document = new Document(); document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); for (int i = 0; i < 170; i++) writer.AddDocument(document); writer.Close(); MyIndexWriter myWriter = new MyIndexWriter(this, dir); myWriter.Optimize(); Assert.AreEqual(10, myWriter.mergeCount); }
public virtual void TestOptimizeMaxNumSegments() { MockRAMDirectory dir = new MockRAMDirectory(); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED)); for (int numDocs = 38; numDocs < 500; numDocs += 38) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.SetMinMergeDocs(1); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(5); writer.SetMaxBufferedDocs(2); for (int j = 0; j < numDocs; j++) writer.AddDocument(doc); writer.Close(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); int segCount = sis.Count; writer = new IndexWriter(dir, new WhitespaceAnalyzer()); writer.SetMergePolicy(ldmp); writer.SetMergeFactor(5); writer.Optimize(3); writer.Close(); sis = new SegmentInfos(); sis.Read(dir); int optSegCount = sis.Count; if (segCount < 3) Assert.AreEqual(segCount, optSegCount); else Assert.AreEqual(3, optSegCount); } }