public virtual void TestByteSizeLimit() { // tests that the max merge size constraint is applied during forceMerge. Directory dir = new RAMDirectory(); // Prepare an index w/ several small segments and a large one. IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); const int numSegments = 15; for (int i = 0; i < numSegments; i++) { int numDocs = i == 7 ? 30 : 1; AddDocs(writer, numDocs); } writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); double min = sis.Info(0).SizeInBytes(); conf = NewWriterConfig(); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20); conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 3 segments in the index, because one of them exceeds the size limit sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); }
private static IndexWriter GetWriter(Directory directory) { MergePolicy policy = new LogByteSizeMergePolicy(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); conf.SetMergePolicy(policy); conf.SetOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, conf); return(writer); }
public static void ApplyTo(IndexWriter writer) { writer.MergeFactor = MergeFactor; writer.MaxMergeDocs = MaxMergeDocs; var mergePolicy = new LogByteSizeMergePolicy(writer) { MaxMergeDocs = MaxMergeDocs, MergeFactor = MergeFactor, MinMergeMB = MinMergeMB, MaxMergeMB = MaxMergeMB }; writer.SetMergePolicy(mergePolicy); }
public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged) { // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes: DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/idx/", dirName)); TestUtil.Rm(indexDir); Directory dir = NewFSDirectory(indexDir); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; mp.MaxCFSSegmentSizeMB = double.PositiveInfinity; // TODO: remove randomness IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp); IndexWriter writer = new IndexWriter(dir, conf); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc, "wrong doc count"); if (fullyMerged) { writer.ForceMerge(1); } writer.Dispose(); if (!fullyMerged) { // open fresh writer so we get no prx file in the added segment mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; // TODO: remove randomness conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp); writer = new IndexWriter(dir, conf); AddNoProxDoc(writer); writer.Dispose(); conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES); writer = new IndexWriter(dir, conf); Term searchTerm = new Term("id", "7"); writer.DeleteDocuments(searchTerm); writer.Dispose(); } dir.Dispose(); return(indexDir); }
public virtual void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergePolicy(new LogByteSizeMergePolicy(writer)); writer.SetMaxBufferedDocs(5); writer.SetUseCompoundFile(false); writer.SetMergeFactor(100); Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 60; i++) { writer.AddDocument(doc); } writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) { writer.AddDocument(doc2); } writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer); lmp.SetMinMergeMB(0.0001); writer.SetMergePolicy(lmp); writer.SetMergeFactor(4); writer.SetUseCompoundFile(false); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[] { dir }); writer.Close(); dir.Close(); dir2.Close(); }
public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged) { // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes: DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/idx/", dirName)); TestUtil.Rm(indexDir); Directory dir = NewFSDirectory(indexDir); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; mp.MaxCFSSegmentSizeMB = double.PositiveInfinity; // TODO: remove randomness IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp); IndexWriter writer = new IndexWriter(dir, conf); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc, "wrong doc count"); if (fullyMerged) { writer.ForceMerge(1); } writer.Dispose(); if (!fullyMerged) { // open fresh writer so we get no prx file in the added segment mp = new LogByteSizeMergePolicy(); mp.NoCFSRatio = doCFS ? 1.0 : 0.0; // TODO: remove randomness conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp); writer = new IndexWriter(dir, conf); AddNoProxDoc(writer); writer.Dispose(); conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES); writer = new IndexWriter(dir, conf); Term searchTerm = new Term("id", "7"); writer.DeleteDocuments(searchTerm); writer.Dispose(); } dir.Dispose(); return indexDir; }
public void Test() { DirectoryInfo dir = CreateTempDir(GetType().Name); DirectoryInfo destDir = CreateTempDir(GetType().Name); Store.Directory fsDir = NewFSDirectory(dir); // IndexSplitter.split makes its own commit directly with SIPC/SegmentInfos, // so the unreferenced files are expected. if (fsDir is MockDirectoryWrapper) { ((MockDirectoryWrapper)fsDir).AssertNoUnrefencedFilesOnClose = (false); } MergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.NoCFSRatio = 1.0; mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity; IndexWriter iw = new IndexWriter( fsDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())). SetOpenMode(OpenMode.CREATE). SetMergePolicy(mergePolicy) ); for (int x = 0; x < 100; x++) { Document doc = DocHelper.CreateDocument(x, "index", 5); iw.AddDocument(doc); } iw.Commit(); for (int x = 100; x < 150; x++) { Document doc = DocHelper.CreateDocument(x, "index2", 5); iw.AddDocument(doc); } iw.Commit(); for (int x = 150; x < 200; x++) { Document doc = DocHelper.CreateDocument(x, "index3", 5); iw.AddDocument(doc); } iw.Commit(); DirectoryReader iwReader = iw.GetReader(); assertEquals(3, iwReader.Leaves.Count); iwReader.Dispose(); iw.Dispose(); // we should have 2 segments now IndexSplitter @is = new IndexSplitter(dir); string splitSegName = @is.Infos.Info(1).Info.Name; @is.Split(destDir, new string[] { splitSegName }); Store.Directory fsDirDest = NewFSDirectory(destDir); DirectoryReader r = DirectoryReader.Open(fsDirDest); assertEquals(50, r.MaxDoc); r.Dispose(); fsDirDest.Dispose(); // now test cmdline DirectoryInfo destDir2 = CreateTempDir(GetType().Name); IndexSplitter.Main(new String[] { dir.FullName, destDir2.FullName, splitSegName }); assertEquals(5, destDir2.GetFiles().Length); Store.Directory fsDirDest2 = NewFSDirectory(destDir2); r = DirectoryReader.Open(fsDirDest2); assertEquals(50, r.MaxDoc); r.Dispose(); fsDirDest2.Dispose(); // now remove the copied segment from src IndexSplitter.Main(new String[] { dir.FullName, "-d", splitSegName }); r = DirectoryReader.Open(fsDir); assertEquals(2, r.Leaves.size()); r.Dispose(); fsDir.Dispose(); }
public virtual void TestHangOnClose() { Directory dir = NewDirectory(); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.NoCFSRatio = 0.0; lmp.MergeFactor = 100; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(5).SetMergePolicy(lmp)); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; doc.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType)); for (int i = 0; i < 60; i++) { writer.AddDocument(doc); } Document doc2 = new Document(); FieldType customType2 = new FieldType(); customType2.Stored = true; doc2.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); doc2.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); doc2.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); doc2.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); for (int i = 0; i < 10; i++) { writer.AddDocument(doc2); } writer.Dispose(); Directory dir2 = NewDirectory(); lmp = new LogByteSizeMergePolicy(); lmp.MinMergeMB = 0.0001; lmp.NoCFSRatio = 0.0; lmp.MergeFactor = 4; writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(lmp)); writer.AddIndexes(dir); writer.Dispose(); dir.Dispose(); dir2.Dispose(); }
private void InitBlock() { similarity = Similarity.GetDefault(); mergePolicy = new LogByteSizeMergePolicy(this); readerPool = new ReaderPool(this); }
public virtual void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergePolicy(new LogByteSizeMergePolicy(writer)); writer.SetMaxBufferedDocs(5); writer.UseCompoundFile = false; writer.MergeFactor = 100; Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 60; i++) writer.AddDocument(doc); writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) writer.AddDocument(doc2); writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer); lmp.MinMergeMB = 0.0001; writer.SetMergePolicy(lmp); writer.MergeFactor = 4; writer.UseCompoundFile = false; writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[]{dir}); writer.Close(); dir.Close(); dir2.Close(); }
public void Test() { DirectoryInfo dir = CreateTempDir(GetType().Name); DirectoryInfo destDir = CreateTempDir(GetType().Name); Store.Directory fsDir = NewFSDirectory(dir); // IndexSplitter.split makes its own commit directly with SIPC/SegmentInfos, // so the unreferenced files are expected. if (fsDir is MockDirectoryWrapper) { ((MockDirectoryWrapper)fsDir).AssertNoUnrefencedFilesOnClose = (false); } MergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.NoCFSRatio = 1.0; mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity; IndexWriter iw = new IndexWriter( fsDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())). SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE). SetMergePolicy(mergePolicy) ); for (int x = 0; x < 100; x++) { Document doc = DocHelper.CreateDocument(x, "index", 5); iw.AddDocument(doc); } iw.Commit(); for (int x = 100; x < 150; x++) { Document doc = DocHelper.CreateDocument(x, "index2", 5); iw.AddDocument(doc); } iw.Commit(); for (int x = 150; x < 200; x++) { Document doc = DocHelper.CreateDocument(x, "index3", 5); iw.AddDocument(doc); } iw.Commit(); DirectoryReader iwReader = iw.Reader; assertEquals(3, iwReader.Leaves.Count); iwReader.Dispose(); iw.Dispose(); // we should have 2 segments now IndexSplitter @is = new IndexSplitter(dir); string splitSegName = @is.infos.Info(1).Info.Name; @is.Split(destDir, new string[] { splitSegName }); Store.Directory fsDirDest = NewFSDirectory(destDir); DirectoryReader r = DirectoryReader.Open(fsDirDest); assertEquals(50, r.MaxDoc); r.Dispose(); fsDirDest.Dispose(); // now test cmdline DirectoryInfo destDir2 = CreateTempDir(GetType().Name); IndexSplitter.Main(new String[] { dir.FullName, destDir2.FullName, splitSegName }); assertEquals(5, destDir2.GetFiles().Length); Store.Directory fsDirDest2 = NewFSDirectory(destDir2); r = DirectoryReader.Open(fsDirDest2); assertEquals(50, r.MaxDoc); r.Dispose(); fsDirDest2.Dispose(); // now remove the copied segment from src IndexSplitter.Main(new String[] { dir.FullName, "-d", splitSegName }); r = DirectoryReader.Open(fsDir); assertEquals(2, r.Leaves.size()); r.Dispose(); fsDir.Dispose(); }
public void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true); writer.SetMergePolicy(new LogByteSizeMergePolicy()); writer.SetMaxBufferedDocs(5); writer.SetUseCompoundFile(false); writer.SetMergeFactor(100); Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 60; i++) writer.AddDocument(doc); writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) writer.AddDocument(doc2); writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, false, new WhitespaceAnalyzer(), true); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.SetMinMergeMB(0.0001); writer.SetMergePolicy(lmp); writer.SetMergeFactor(4); writer.SetUseCompoundFile(false); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[] { dir }); writer.Close(); dir.Close(); dir2.Close(); }
private static IndexWriter GetWriter(Directory directory) { MergePolicy policy = new LogByteSizeMergePolicy(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetMergePolicy(policy); conf.SetOpenMode(OpenMode_e.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, conf); return writer; }