public virtual void TestMergeAfterCopy() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexReader reader = IndexReader.Open(aux); for (int i = 0; i < 20; i++) { reader.DeleteDocument(i); } Assert.AreEqual(10, reader.NumDocs()); reader.Close(); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(4); writer.SetMergeFactor(4); writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); Assert.AreEqual(1020, writer.DocCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1020); }
private void CheckInvariants(IndexWriter writer) { _TestUtil.SyncConcurrentMerges(writer); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; }while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } System.String[] files = writer.GetDirectory().ListAll(); int segmentCfsCount = 0; for (int i = 0; i < files.Length; i++) { if (files[i].EndsWith(".cfs")) { segmentCfsCount++; } } Assert.AreEqual(segmentCount, segmentCfsCount); }
public virtual void TestMoreMerges() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); Directory aux2 = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(aux2, true); writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(10); writer.AddIndexesNoOptimize(new Directory[] { aux }); Assert.AreEqual(30, writer.DocCount()); Assert.AreEqual(3, writer.GetSegmentCount()); writer.Close(); IndexReader reader = IndexReader.Open(aux); for (int i = 0; i < 27; i++) { reader.DeleteDocument(i); } Assert.AreEqual(3, reader.NumDocs()); reader.Close(); reader = IndexReader.Open(aux2); for (int i = 0; i < 8; i++) { reader.DeleteDocument(i); } Assert.AreEqual(22, reader.NumDocs()); reader.Close(); writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(6); writer.SetMergeFactor(4); writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 }); Assert.AreEqual(1025, writer.DocCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1025); }
private void CheckInvariants(IndexWriter writer) { writer.WaitForMerges(); int maxBufferedDocs = writer.Config.MaxBufferedDocs; int mergeFactor = ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor; int maxMergeDocs = ((LogMergePolicy)writer.Config.MergePolicy).MaxMergeDocs; int ramSegmentCount = writer.NumBufferedDocuments; Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.SegmentCount; for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound, "docCount=" + docCount + " lowerBound=" + lowerBound + " upperBound=" + upperBound + " i=" + i + " segmentCount=" + segmentCount + " index=" + writer.SegString() + " config=" + writer.Config); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor, "maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor + "; segs=" + writer.SegString() + " config=" + writer.Config); } do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } }
private void CheckInvariants(IndexWriter writer) { writer.WaitForMerges(); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; }while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } }
public virtual void TestNoMergeAfterCopy() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(10); writer.SetMergeFactor(4); writer.AddIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); Assert.AreEqual(1060, writer.DocCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1060); }
/// <summary> /// This method indexes the content that is sent across to it. Each piece of content (or "document") /// that is indexed has to have a unique identifier (so that the caller can take action based on the /// document id). Therefore, this method accepts key-value pairs in the form of a dictionary. The key /// is a ulong which uniquely identifies the string to be indexed. The string itself is the value /// within the dictionary for that key. Be aware that stop words (like the, this, at, etc.) are _not_ /// indexed. /// </summary> /// <param name="txtIdPairToBeIndexed">A dictionary of key-value pairs that are sent by the caller /// to uniquely identify each string that is to be indexed.</param> /// <returns>The number of documents indexed.</returns> public int Index (Dictionary<long, string> txtIdPairToBeIndexed) { using (Directory directory = FSDirectory.Open(_indexDir)) using (Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)) using (IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) using (IndexReader reader = writer.GetReader()) { //writer.DeleteAll(); Dictionary<long, string>.KeyCollection keys = txtIdPairToBeIndexed.Keys; foreach (long id in keys) { char[] delimiter = { ';' }; string[] text = txtIdPairToBeIndexed[id].Split(delimiter); Document document = new Document(); Field title = new Field("title", text[0], Field.Store.YES, Field.Index.NO); Field type = new Field("type", text[1], Field.Store.YES, Field.Index.NO); Field idField = new Field("date", (id).ToString(), Field.Store.YES, Field.Index.ANALYZED); document.Add(title); document.Add(type); document.Add(idField); writer.AddDocument(document); } int numIndexed = writer.GetDocCount(0);//TODO check number writer.Optimize(); writer.Flush(true,true,true); return numIndexed; } }
public virtual void TestNoCopySegments() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); SetUpDirs(dir, aux); IndexWriter writer = NewWriter(dir, false); writer.SetMaxBufferedDocs(9); writer.MergeFactor = 4; AddDocs(writer, 2); writer.AddIndexesNoOptimize(new Directory[] { aux }); Assert.AreEqual(1032, writer.MaxDoc()); Assert.AreEqual(2, writer.GetSegmentCount()); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 1032); }
private void CheckInvariants(IndexWriter writer) { writer.WaitForMerges(); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.MergeFactor; int maxMergeDocs = writer.MaxMergeDocs; int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = - 1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor, "maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } }
public virtual void TestMoreMerges() { // main directory Directory dir = NewDirectory(); // auxiliary directory Directory aux = NewDirectory(); Directory aux2 = NewDirectory(); SetUpDirs(dir, aux, true); IndexWriter writer = NewWriter(aux2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(10))); writer.AddIndexes(aux); Assert.AreEqual(30, writer.MaxDoc); Assert.AreEqual(3, writer.SegmentCount); writer.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(aux, dontMergeConfig); for (int i = 0; i < 27; i++) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(aux); Assert.AreEqual(3, reader.NumDocs); reader.Dispose(); dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(aux2, dontMergeConfig); for (int i = 0; i < 8; i++) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); reader = DirectoryReader.Open(aux2); Assert.AreEqual(22, reader.NumDocs); reader.Dispose(); writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(6).SetMergePolicy(NewLogMergePolicy(4))); writer.AddIndexes(aux, aux2); Assert.AreEqual(1040, writer.MaxDoc); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Dispose(); dir.Dispose(); aux.Dispose(); aux2.Dispose(); }
public virtual void TestMergeAfterCopy() { // main directory Directory dir = NewDirectory(); // auxiliary directory Directory aux = NewDirectory(); SetUpDirs(dir, aux, true); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); IndexWriter writer = new IndexWriter(aux, dontMergeConfig); for (int i = 0; i < 20; i++) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(aux); Assert.AreEqual(10, reader.NumDocs); reader.Dispose(); writer = NewWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND).SetMaxBufferedDocs(4).SetMergePolicy(NewLogMergePolicy(4))); if (VERBOSE) { Console.WriteLine("\nTEST: now addIndexes"); } writer.AddIndexes(aux, new MockDirectoryWrapper(Random(), new RAMDirectory(aux, NewIOContext(Random())))); Assert.AreEqual(1020, writer.MaxDoc); Assert.AreEqual(1000, writer.GetDocCount(0)); writer.Dispose(); dir.Dispose(); aux.Dispose(); }
private void CheckInvariants(IndexWriter writer) { _TestUtil.SyncConcurrentMerges(writer); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = - 1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } System.String[] files = writer.GetDirectory().ListAll(); int segmentCfsCount = 0; for (int i = 0; i < files.Length; i++) { if (files[i].EndsWith(".cfs")) { segmentCfsCount++; } } Assert.AreEqual(segmentCount, segmentCfsCount); }
private void PrintSegmentDocCounts(IndexWriter writer) { int segmentCount = writer.GetSegmentCount(); System.Console.Out.WriteLine("" + segmentCount + " segments total"); for (int i = 0; i < segmentCount; i++) { System.Console.Out.WriteLine(" segment " + i + " has " + writer.GetDocCount(i) + " docs"); } }