this is a LogMergePolicy that measures size of a segment as the total byte size of the segment's files.
Inheritance: LogMergePolicy
        public virtual void TestByteSizeLimit()
        {
            // tests that the max merge size constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf = NewWriterConfig();
            IndexWriter writer = new IndexWriter(dir, conf);
            const int numSegments = 15;
            for (int i = 0; i < numSegments; i++)
            {
                int numDocs = i == 7 ? 30 : 1;
                AddDocs(writer, numDocs);
            }
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();
            sis.Read(dir);
            double min = sis.Info(0).SizeInBytes();

            conf = NewWriterConfig();
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
            lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20);
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
Exemple #2
0
        private static IndexWriter GetWriter(Directory directory)
        {
            MergePolicy       policy = new LogByteSizeMergePolicy();
            IndexWriterConfig conf   = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            conf.SetMergePolicy(policy);
            conf.SetOpenMode(OpenMode.CREATE_OR_APPEND);

            IndexWriter writer = new IndexWriter(directory, conf);

            return(writer);
        }
        public static void ApplyTo(IndexWriter writer)
        {
            writer.MergeFactor = MergeFactor;
            writer.MaxMergeDocs = MaxMergeDocs;

            var mergePolicy = new LogByteSizeMergePolicy(writer)
            {
                MaxMergeDocs = MaxMergeDocs,
                MergeFactor = MergeFactor,
                MinMergeMB = MinMergeMB,
                MaxMergeMB = MaxMergeMB
            };
            writer.SetMergePolicy(mergePolicy);
        }           
        public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged)
        {
            // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
            DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/idx/", dirName));

            TestUtil.Rm(indexDir);
            Directory dir             = NewFSDirectory(indexDir);
            LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();

            mp.NoCFSRatio          = doCFS ? 1.0 : 0.0;
            mp.MaxCFSSegmentSizeMB = double.PositiveInfinity;
            // TODO: remove randomness
            IndexWriterConfig conf   = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp);
            IndexWriter       writer = new IndexWriter(dir, conf);

            for (int i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            Assert.AreEqual(35, writer.MaxDoc, "wrong doc count");
            if (fullyMerged)
            {
                writer.ForceMerge(1);
            }
            writer.Dispose();

            if (!fullyMerged)
            {
                // open fresh writer so we get no prx file in the added segment
                mp            = new LogByteSizeMergePolicy();
                mp.NoCFSRatio = doCFS ? 1.0 : 0.0;
                // TODO: remove randomness
                conf   = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp);
                writer = new IndexWriter(dir, conf);
                AddNoProxDoc(writer);
                writer.Dispose();

                conf   = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES);
                writer = new IndexWriter(dir, conf);
                Term searchTerm = new Term("id", "7");
                writer.DeleteDocuments(searchTerm);
                writer.Dispose();
            }

            dir.Dispose();

            return(indexDir);
        }
        public virtual void  TestHangOnClose()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMergePolicy(new LogByteSizeMergePolicy(writer));
            writer.SetMaxBufferedDocs(5);
            writer.SetUseCompoundFile(false);
            writer.SetMergeFactor(100);

            Document doc = new Document();

            doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            for (int i = 0; i < 60; i++)
            {
                writer.AddDocument(doc);
            }
            writer.SetMaxBufferedDocs(200);
            Document doc2 = new Document();

            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            for (int i = 0; i < 10; i++)
            {
                writer.AddDocument(doc2);
            }
            writer.Close();

            Directory dir2 = new MockRAMDirectory();

            writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer);

            lmp.SetMinMergeMB(0.0001);
            writer.SetMergePolicy(lmp);
            writer.SetMergeFactor(4);
            writer.SetUseCompoundFile(false);
            writer.SetMergeScheduler(new SerialMergeScheduler());
            writer.AddIndexesNoOptimize(new Directory[] { dir });
            writer.Close();
            dir.Close();
            dir2.Close();
        }
Exemple #6
0
        public virtual void TestByteSizeLimit()
        {
            // tests that the max merge size constraint is applied during forceMerge.
            Directory dir = new RAMDirectory();

            // Prepare an index w/ several small segments and a large one.
            IndexWriterConfig conf        = NewWriterConfig();
            IndexWriter       writer      = new IndexWriter(dir, conf);
            const int         numSegments = 15;

            for (int i = 0; i < numSegments; i++)
            {
                int numDocs = i == 7 ? 30 : 1;
                AddDocs(writer, numDocs);
            }
            writer.Dispose();

            SegmentInfos sis = new SegmentInfos();

            sis.Read(dir);
            double min = sis.Info(0).SizeInBytes();

            conf = NewWriterConfig();
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();

            lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20);
            conf.SetMergePolicy(lmp);

            writer = new IndexWriter(dir, conf);
            writer.ForceMerge(1);
            writer.Dispose();

            // Should only be 3 segments in the index, because one of them exceeds the size limit
            sis = new SegmentInfos();
            sis.Read(dir);
            Assert.AreEqual(3, sis.Size());
        }
        public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged)
        {
            // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
            DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/idx/", dirName));
            TestUtil.Rm(indexDir);
            Directory dir = NewFSDirectory(indexDir);
            LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
            mp.NoCFSRatio = doCFS ? 1.0 : 0.0;
            mp.MaxCFSSegmentSizeMB = double.PositiveInfinity;
            // TODO: remove randomness
            IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp);
            IndexWriter writer = new IndexWriter(dir, conf);

            for (int i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            Assert.AreEqual(35, writer.MaxDoc, "wrong doc count");
            if (fullyMerged)
            {
                writer.ForceMerge(1);
            }
            writer.Dispose();

            if (!fullyMerged)
            {
                // open fresh writer so we get no prx file in the added segment
                mp = new LogByteSizeMergePolicy();
                mp.NoCFSRatio = doCFS ? 1.0 : 0.0;
                // TODO: remove randomness
                conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(mp);
                writer = new IndexWriter(dir, conf);
                AddNoProxDoc(writer);
                writer.Dispose();

                conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetUseCompoundFile(doCFS).SetMaxBufferedDocs(10).SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES);
                writer = new IndexWriter(dir, conf);
                Term searchTerm = new Term("id", "7");
                writer.DeleteDocuments(searchTerm);
                writer.Dispose();
            }

            dir.Dispose();

            return indexDir;
        }
Exemple #8
0
        public void Test()
        {
            DirectoryInfo dir     = CreateTempDir(GetType().Name);
            DirectoryInfo destDir = CreateTempDir(GetType().Name);

            Store.Directory fsDir = NewFSDirectory(dir);
            // IndexSplitter.split makes its own commit directly with SIPC/SegmentInfos,
            // so the unreferenced files are expected.
            if (fsDir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)fsDir).AssertNoUnrefencedFilesOnClose = (false);
            }

            MergePolicy mergePolicy = new LogByteSizeMergePolicy();

            mergePolicy.NoCFSRatio          = 1.0;
            mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity;
            IndexWriter iw = new IndexWriter(
                fsDir,
                new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).
                SetOpenMode(OpenMode.CREATE).
                SetMergePolicy(mergePolicy)
                );

            for (int x = 0; x < 100; x++)
            {
                Document doc = DocHelper.CreateDocument(x, "index", 5);
                iw.AddDocument(doc);
            }
            iw.Commit();
            for (int x = 100; x < 150; x++)
            {
                Document doc = DocHelper.CreateDocument(x, "index2", 5);
                iw.AddDocument(doc);
            }
            iw.Commit();
            for (int x = 150; x < 200; x++)
            {
                Document doc = DocHelper.CreateDocument(x, "index3", 5);
                iw.AddDocument(doc);
            }
            iw.Commit();
            DirectoryReader iwReader = iw.GetReader();

            assertEquals(3, iwReader.Leaves.Count);
            iwReader.Dispose();
            iw.Dispose();
            // we should have 2 segments now
            IndexSplitter @is          = new IndexSplitter(dir);
            string        splitSegName = @is.Infos.Info(1).Info.Name;

            @is.Split(destDir, new string[] { splitSegName });
            Store.Directory fsDirDest = NewFSDirectory(destDir);
            DirectoryReader r         = DirectoryReader.Open(fsDirDest);

            assertEquals(50, r.MaxDoc);
            r.Dispose();
            fsDirDest.Dispose();

            // now test cmdline
            DirectoryInfo destDir2 = CreateTempDir(GetType().Name);

            IndexSplitter.Main(new String[] { dir.FullName, destDir2.FullName, splitSegName });
            assertEquals(5, destDir2.GetFiles().Length);
            Store.Directory fsDirDest2 = NewFSDirectory(destDir2);
            r = DirectoryReader.Open(fsDirDest2);
            assertEquals(50, r.MaxDoc);
            r.Dispose();
            fsDirDest2.Dispose();

            // now remove the copied segment from src
            IndexSplitter.Main(new String[] { dir.FullName, "-d", splitSegName });
            r = DirectoryReader.Open(fsDir);
            assertEquals(2, r.Leaves.size());
            r.Dispose();
            fsDir.Dispose();
        }
        public virtual void TestHangOnClose()
        {
            Directory dir = NewDirectory();
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
            lmp.NoCFSRatio = 0.0;
            lmp.MergeFactor = 100;
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(5).SetMergePolicy(lmp));

            Document doc = new Document();
            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.StoreTermVectors = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorOffsets = true;
            doc.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType));
            for (int i = 0; i < 60; i++)
            {
                writer.AddDocument(doc);
            }

            Document doc2 = new Document();
            FieldType customType2 = new FieldType();
            customType2.Stored = true;
            doc2.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
            doc2.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
            doc2.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
            doc2.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
            for (int i = 0; i < 10; i++)
            {
                writer.AddDocument(doc2);
            }
            writer.Dispose();

            Directory dir2 = NewDirectory();
            lmp = new LogByteSizeMergePolicy();
            lmp.MinMergeMB = 0.0001;
            lmp.NoCFSRatio = 0.0;
            lmp.MergeFactor = 4;
            writer = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(lmp));
            writer.AddIndexes(dir);
            writer.Dispose();
            dir.Dispose();
            dir2.Dispose();
        }
Exemple #10
0
		private void  InitBlock()
		{
			similarity = Similarity.GetDefault();
			mergePolicy = new LogByteSizeMergePolicy(this);
			readerPool = new ReaderPool(this);
		}
		public virtual void  TestHangOnClose()
		{
			
			Directory dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMergePolicy(new LogByteSizeMergePolicy(writer));
			writer.SetMaxBufferedDocs(5);
			writer.UseCompoundFile = false;
			writer.MergeFactor = 100;
			
			Document doc = new Document();
			doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			for (int i = 0; i < 60; i++)
				writer.AddDocument(doc);
			writer.SetMaxBufferedDocs(200);
			Document doc2 = new Document();
			doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
			doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
			doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
			doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
			for (int i = 0; i < 10; i++)
				writer.AddDocument(doc2);
			writer.Close();
			
			Directory dir2 = new MockRAMDirectory();
			writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer);
			lmp.MinMergeMB = 0.0001;
			writer.SetMergePolicy(lmp);
			writer.MergeFactor = 4;
			writer.UseCompoundFile = false;
			writer.SetMergeScheduler(new SerialMergeScheduler());
			writer.AddIndexesNoOptimize(new Directory[]{dir});
			writer.Close();
			dir.Close();
			dir2.Close();
		}
        public void Test()
        {
            DirectoryInfo dir = CreateTempDir(GetType().Name);
            DirectoryInfo destDir = CreateTempDir(GetType().Name);
            Store.Directory fsDir = NewFSDirectory(dir);
            // IndexSplitter.split makes its own commit directly with SIPC/SegmentInfos,
            // so the unreferenced files are expected.
            if (fsDir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)fsDir).AssertNoUnrefencedFilesOnClose = (false);
            }

            MergePolicy mergePolicy = new LogByteSizeMergePolicy();
            mergePolicy.NoCFSRatio = 1.0;
            mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity;
            IndexWriter iw = new IndexWriter(
                fsDir,
                new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).
                    SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE).
                    SetMergePolicy(mergePolicy)
            );
            for (int x = 0; x < 100; x++)
            {
                Document doc = DocHelper.CreateDocument(x, "index", 5);
                iw.AddDocument(doc);
            }
            iw.Commit();
            for (int x = 100; x < 150; x++)
            {
                Document doc = DocHelper.CreateDocument(x, "index2", 5);
                iw.AddDocument(doc);
            }
            iw.Commit();
            for (int x = 150; x < 200; x++)
            {
                Document doc = DocHelper.CreateDocument(x, "index3", 5);
                iw.AddDocument(doc);
            }
            iw.Commit();
            DirectoryReader iwReader = iw.Reader;
            assertEquals(3, iwReader.Leaves.Count);
            iwReader.Dispose();
            iw.Dispose();
            // we should have 2 segments now
            IndexSplitter @is = new IndexSplitter(dir);
            string splitSegName = @is.infos.Info(1).Info.Name;
            @is.Split(destDir, new string[] { splitSegName });
            Store.Directory fsDirDest = NewFSDirectory(destDir);
            DirectoryReader r = DirectoryReader.Open(fsDirDest);
            assertEquals(50, r.MaxDoc);
            r.Dispose();
            fsDirDest.Dispose();

            // now test cmdline
            DirectoryInfo destDir2 = CreateTempDir(GetType().Name);
            IndexSplitter.Main(new String[] { dir.FullName, destDir2.FullName, splitSegName });
            assertEquals(5, destDir2.GetFiles().Length);
            Store.Directory fsDirDest2 = NewFSDirectory(destDir2);
            r = DirectoryReader.Open(fsDirDest2);
            assertEquals(50, r.MaxDoc);
            r.Dispose();
            fsDirDest2.Dispose();

            // now remove the copied segment from src
            IndexSplitter.Main(new String[] { dir.FullName, "-d", splitSegName });
            r = DirectoryReader.Open(fsDir);
            assertEquals(2, r.Leaves.size());
            r.Dispose();
            fsDir.Dispose();
        }
        public void TestHangOnClose()
        {
            Directory dir = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true);
            writer.SetMergePolicy(new LogByteSizeMergePolicy());
            writer.SetMaxBufferedDocs(5);
            writer.SetUseCompoundFile(false);
            writer.SetMergeFactor(100);

            Document doc = new Document();
            doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            for (int i = 0; i < 60; i++)
                writer.AddDocument(doc);
            writer.SetMaxBufferedDocs(200);
            Document doc2 = new Document();
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            for (int i = 0; i < 10; i++)
                writer.AddDocument(doc2);
            writer.Close();

            Directory dir2 = new MockRAMDirectory();
            writer = new IndexWriter(dir2, false, new WhitespaceAnalyzer(), true);
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
            lmp.SetMinMergeMB(0.0001);
            writer.SetMergePolicy(lmp);
            writer.SetMergeFactor(4);
            writer.SetUseCompoundFile(false);
            writer.SetMergeScheduler(new SerialMergeScheduler());
            writer.AddIndexesNoOptimize(new Directory[] { dir });
            writer.Close();

            dir.Close();
            dir2.Close();
        }
Exemple #14
0
        private static IndexWriter GetWriter(Directory directory)
        {
            MergePolicy policy = new LogByteSizeMergePolicy();
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            conf.SetMergePolicy(policy);
            conf.SetOpenMode(OpenMode_e.CREATE_OR_APPEND);

            IndexWriter writer = new IndexWriter(directory, conf);

            return writer;
        }